пре 2 година · eee6af2ece
--- a/README.md
+++ b/README.md
@@ -13,7 +13,6 @@
 
				 | [**Highlights**](#highlights)
			
 
				 | [**Installation**](#installation)
			
 
				 | [**Docs**](https://alibaba-damo-academy.github.io/FunASR/en/index.html)
			
 
				-| [**Tutorial_CN**](https://github.com/alibaba-damo-academy/FunASR/wiki#funasr%E7%94%A8%E6%88%B7%E6%89%8B%E5%86%8C)
			
 
				 | [**Papers**](https://github.com/alibaba-damo-academy/FunASR#citations)
			
 
				 | [**Runtime**](https://github.com/alibaba-damo-academy/FunASR/tree/main/funasr/runtime)
			
 
				 | [**Model Zoo**](https://github.com/alibaba-damo-academy/FunASR/blob/main/docs/model_zoo/modelscope_models.md)
			
--- a/egs_modelscope/asr/TEMPLATE/README.md
+++ b/egs_modelscope/asr/TEMPLATE/README.md
@@ -20,11 +20,13 @@ rec_result = inference_pipeline(audio_in='https://isv-data.oss-cn-hangzhou.aliyu
 
				 print(rec_result)
			
 
				 ```
			
 
				 #### [Paraformer-online Model](https://www.modelscope.cn/models/damo/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-online/summary)
			
 
				+##### Streaming Decoding
			
 
				 ```python
			
 
				 inference_pipeline = pipeline(
			
 
				     task=Tasks.auto_speech_recognition,
			
 
				     model='damo/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-online',
			
 
				-    model_revision='v1.0.6',
			
 
				+    model_revision='v1.0.4',
			
 
				+    update_model='v1.0.4',
			
 
				     mode='paraformer_streaming'
			
 
				     )
			
 
				 import soundfile
			
@@ -42,6 +44,23 @@ speech_chunk = speech[chunk_stride:chunk_stride+chunk_stride]
 
				 rec_result = inference_pipeline(audio_in=speech_chunk, param_dict=param_dict)
			
 
				 print(rec_result)
			
 
				 ```
			
 
				+
			
 
				+##### Fake Streaming Decoding
			
 
				+```python
			
 
				+from modelscope.pipelines import pipeline
			
 
				+from modelscope.utils.constant import Tasks
			
 
				+
			
 
				+inference_pipeline = pipeline(
			
 
				+    task=Tasks.auto_speech_recognition,
			
 
				+    model='damo/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-online',
			
 
				+    model_revision='v1.0.6',
			
 
				+    update_model='v1.0.6',
			
 
				+    mode="paraformer_fake_streaming"
			
 
				+)
			
 
				+audio_in='https://isv-data.oss-cn-hangzhou.aliyuncs.com/ics/MaaS/ASR/test_audio/asr_example_zh.wav'
			
 
				+rec_result = inference_pipeline(audio_in=audio_in)
			
 
				+print(rec_result)
			
 
				+```
			
 
				 Full code of demo, please ref to [demo](https://github.com/alibaba-damo-academy/FunASR/discussions/241)
			
 
				 
			
 
				 #### [UniASR Model](https://www.modelscope.cn/models/damo/speech_UniASR_asr_2pass-zh-cn-8k-common-vocab3445-pytorch-online/summary)
			
--- a/egs_modelscope/asr/paraformer/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-online/demo.py
+++ b/egs_modelscope/asr/paraformer/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-online/demo.py
@@ -5,6 +5,7 @@ inference_pipeline = pipeline(
 
				     task=Tasks.auto_speech_recognition,
			
 
				     model='damo/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-online',
			
 
				     model_revision='v1.0.6',
			
 
				+    update_model='v1.0.6',
			
 
				     mode="paraformer_fake_streaming"
			
 
				 )
			
 
				 audio_in='https://isv-data.oss-cn-hangzhou.aliyuncs.com/ics/MaaS/ASR/test_audio/asr_example_zh.wav'
			
--- a/egs_modelscope/asr/paraformer/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-online/demo_online.py
+++ b/egs_modelscope/asr/paraformer/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-online/demo_online.py
@@ -14,7 +14,8 @@ os.environ["MODELSCOPE_CACHE"] = "./"
 
				 inference_pipeline = pipeline(
			
 
				     task=Tasks.auto_speech_recognition,
			
 
				     model='damo/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-online',
			
 
				-    model_revision='v1.0.6',
			
 
				+    model_revision='v1.0.4',
			
 
				+    update_model='v1.0.4',
			
 
				     mode="paraformer_streaming"
			
 
				 )
			
 
				 
			
--- a/egs_modelscope/asr/paraformer/speech_paraformer_asr_nat-zh-cn-16k-common-vocab8404-online/demo.py
+++ b/egs_modelscope/asr/paraformer/speech_paraformer_asr_nat-zh-cn-16k-common-vocab8404-online/demo.py
@@ -5,6 +5,7 @@ inference_pipeline = pipeline(
 
				     task=Tasks.auto_speech_recognition,
			
 
				     model='damo/speech_paraformer_asr_nat-zh-cn-16k-common-vocab8404-online',
			
 
				     model_revision='v1.0.6',
			
 
				+    update_model='v1.0.6',
			
 
				     mode="paraformer_fake_streaming"
			
 
				 )
			
 
				 audio_in='https://isv-data.oss-cn-hangzhou.aliyuncs.com/ics/MaaS/ASR/test_audio/asr_example_zh.wav'
			
--- a/egs_modelscope/asr/paraformer/speech_paraformer_asr_nat-zh-cn-16k-common-vocab8404-online/demo_online.py
+++ b/egs_modelscope/asr/paraformer/speech_paraformer_asr_nat-zh-cn-16k-common-vocab8404-online/demo_online.py
@@ -14,7 +14,8 @@ os.environ["MODELSCOPE_CACHE"] = "./"
 
				 inference_pipeline = pipeline(
			
 
				     task=Tasks.auto_speech_recognition,
			
 
				     model='damo/speech_paraformer_asr_nat-zh-cn-16k-common-vocab8404-online',
			
 
				-    model_revision='v1.0.6',
			
 
				+    model_revision='v1.0.4',
			
 
				+    update_model='v1.0.4',
			
 
				     mode="paraformer_streaming"
			
 
				 )
			
 
				 
			
--- a/egs_modelscope/tp/TEMPLATE/README.md
+++ b/egs_modelscope/tp/TEMPLATE/README.md
@@ -11,7 +11,7 @@ from modelscope.utils.constant import Tasks
 
				 inference_pipeline = pipeline(
			
 
				     task=Tasks.speech_timestamp,
			
 
				     model='damo/speech_timestamp_prediction-v1-16k-offline',
			
 
				-    output_dir=None)
			
 
				+    model_revision='v1.1.0')
			
 
				 
			
 
				 rec_result = inference_pipeline(
			
 
				     audio_in='https://isv-data.oss-cn-hangzhou.aliyuncs.com/ics/MaaS/ASR/test_audio/asr_example_timestamps.wav',
			
--- a/fun_text_processing/inverse_text_normalization/id/id_unit_test.tsv
+++ b/fun_text_processing/inverse_text_normalization/id/id_unit_test.tsv
@@ -1,10 +1,10 @@
 
				 dua ribu dua puluh dua	2022
			
 
				-tiga ribu	300
			
 
				+tiga ribu	3000
			
 
				 sembilan ribu sembilan ratus sembilan puluh sembilan	9999
			
 
				-seribu satu	100001
			
 
				-ribu	100
			
 
				+seribu satu	1001
			
 
				+ribu	1000
			
 
				 seribu	1000
			
 
				-seribu dua ratus delapan puluh sembilan	10289
			
 
				+seribu dua ratus delapan puluh sembilan	1289
			
 
				 ribu dua ratus delapan puluh sembilan	1289
			
 
				 nol satu dua tiga empat lima enam tujuh delapan sembilan	01 2345-6789
			
 
				 empat belas	14
			
@@ -22,8 +22,8 @@ satu miliar	1 miliar
 
				 seratus dua puluh tiga	123
			
 
				 ratus dua puluh tiga	123
			
 
				 dua puluh empat maret 	24 maret
			
 
				-ribu tujuh puluh enam	10076
			
 
				-seribu tujuh puluh enam	100076
			
 
				-ribu tujuh puluh enam rupiah	10076 rupiah
			
 
				+ribu tujuh puluh enam	1076
			
 
				+seribu tujuh puluh enam	1076
			
 
				+ribu tujuh puluh enam rupiah	1076 rupiah
			
 
				 tujuh puluh enam	76
			
 
				-ditambah enam dua dua satu enam lima tiga sembilan nol enam nol lima	+62 21 6539-0605
			
 
				+ditambah enam dua dua satu enam lima tiga sembilan nol enam nol lima	+62 21 6539-0605
			
--- a/fun_text_processing/inverse_text_normalization/id/taggers/cardinal.py
+++ b/fun_text_processing/inverse_text_normalization/id/taggers/cardinal.py
@@ -26,11 +26,10 @@ class CardinalFst(GraphFst):
 
				         graph_teen = pynini.string_file(get_abs_path("data/numbers/teen.tsv"))
			
 
				         graph_hundreds = pynini.string_file(get_abs_path("data/numbers/hundreds.tsv"))
			
 
				         graph_thousand = pynini.string_file(get_abs_path("data/numbers/thousand.tsv"))
			
 
				-
			
 
				-        graph_cents = pynini.cross("seratus", "100") | pynini.cross("ratus", "100") | pynini.union(graph_hundreds, pynutil.insert("0"))
			
 
				+        
			
 
				         graph_hundred = pynini.cross("ratus", "") | pynini.cross("seratus", "")
			
 
				 
			
 
				-        graph_hundred_component = pynini.union(graph_digit + delete_space + graph_hundred, pynutil.insert("00"))
			
 
				+        graph_hundred_component = pynini.union(graph_digit + delete_space + graph_hundred, pynutil.insert("0"))
			
 
				         graph_hundred_component += delete_space
			
 
				         graph_hundred_component += pynini.union(
			
 
				             graph_teen | pynutil.insert("00"),
			
@@ -44,8 +43,8 @@ class CardinalFst(GraphFst):
 
				                 (graph_ties | pynutil.insert("0")) + delete_space + (
			
 
				                             graph_digit | pynutil.insert("0")),
			
 
				         )
			
 
				-        graph_hundred_component = graph_hundred_component | graph_cents | graph_one_hundred_component
			
 
				-
			
 
				+        graph_hundred_component = graph_hundred_component | graph_one_hundred_component
			
 
				+    
			
 
				         graph_hundred_component_at_least_one_none_zero_digit = graph_hundred_component @ (
			
 
				             pynini.closure(DAMO_DIGIT) + (DAMO_DIGIT - "0") + pynini.closure(DAMO_DIGIT)
			
 
				         )
			
@@ -54,14 +53,12 @@ class CardinalFst(GraphFst):
 
				         )
			
 
				         graph_thousand = pynini.cross("ribu", "") | pynini.cross("seribu", "")
			
 
				         graph_one_thousand_component = pynini.union(pynini.cross("ribu", "1") | pynini.cross("seribu", "1"))
			
 
				-        graph_thousand_cents = pynini.cross("seribu", "10") | pynini.cross("ribu","10") | pynini.union(graph_thousand, pynutil.insert(""))
			
 
				+       
			
 
				         graph_thousands = pynini.union(
			
 
				             graph_hundred_component_at_least_one_none_zero_digit + delete_space + (pynutil.delete("ribu") | pynutil.delete("seribu")),
			
 
				             pynutil.insert("000", weight=0.1),
			
 
				         )
			
 
				-        graph_thousand_component = pynini.union(graph_digit + delete_space + graph_thousand, pynutil.insert("000"))
			
 
				-        graph_thousand_component += delete_space
			
 
				-        graph_thousands = graph_thousands | graph_thousand_cents | graph_thousand_component | graph_one_thousand_component
			
 
				+        graph_thousands = graph_thousands | (pynutil.insert("00") + graph_one_thousand_component)
			
 
				 
			
 
				         graph_million = pynini.union(
			
 
				             graph_hundred_component_at_least_one_none_zero_digit + delete_space + (pynutil.delete("juta") | pynutil.delete("sejuta")),
			
--- a/funasr/runtime/html5/static/main.js
+++ b/funasr/runtime/html5/static/main.js
@@ -145,7 +145,9 @@ function stop() {
 
				 	isRec = false;

			
 
				     info_div.innerHTML="请等候...";

			
 
				 	btnStop.disabled = true;

			
 
				-	setTimeout(function(){btnStart.disabled = false;info_div.innerHTML="请点击开始";}, 3000 );

			
 
				+	setTimeout(function(){

			
 
				+		console.log("call stop ws!");

			
 
				+		wsconnecter.wsStop();btnStart.disabled = false;info_div.innerHTML="请点击开始";}, 3000 );

			
 
				 	rec.stop(function(blob,duration){

			
 
				   

			
 
				 		console.log(blob);

			
--- a/funasr/runtime/html5/static/wsconnecter.js
+++ b/funasr/runtime/html5/static/wsconnecter.js
@@ -28,7 +28,11 @@ function WebSocketConnectMethod( config ) { //定义socket连接方法类
 
				 		if ( 'WebSocket' in window ) {

			
 
				 			speechSokt = new WebSocket( Uri ); // 定义socket连接对象

			
 
				 			speechSokt.onopen = function(e){onOpen(e);}; // 定义响应函数

			
 
				-			speechSokt.onclose = function(e){onClose(e);};

			
 
				+			speechSokt.onclose = function(e){

			
 
				+			    console.log("onclose ws!");

			
 
				+			    speechSokt.close();

			
 
				+				onClose(e);

			
 
				+				};

			
 
				 			speechSokt.onmessage = function(e){onMessage(e);};

			
 
				 			speechSokt.onerror = function(e){onError(e);};

			
 
				 			return 1;

			
@@ -42,6 +46,7 @@ function WebSocketConnectMethod( config ) { //定义socket连接方法类
 
				 	// 定义停止与发送函数

			
 
				 	this.wsStop = function () {

			
 
				 		if(speechSokt != undefined) {

			
 
				+			console.log("stop ws!");

			
 
				 			speechSokt.close();

			
 
				 		}

			
 
				 	};

			
--- a/funasr/runtime/python/websocket/wss_srv_asr.py
+++ b/funasr/runtime/python/websocket/wss_srv_asr.py
@@ -58,16 +58,36 @@ inference_pipeline_asr_online = pipeline(
 
				     model=args.asr_model_online,
			
 
				     ngpu=args.ngpu,
			
 
				     ncpu=args.ncpu,
			
 
				-    model_revision='v1.0.6',
			
 
				+    model_revision='v1.0.4',
			
 
				+    update_model='v1.0.4',
			
 
				     mode='paraformer_streaming')
			
 
				 
			
 
				-print("model loaded")
			
 
				+print("model loaded! only support one client at the same time now!!!!")
			
 
				 
			
 
				+async def ws_reset(websocket):
			
 
				+    print("ws reset now, total num is ",len(websocket_users))
			
 
				+    websocket.param_dict_asr_online = {"cache": dict()}
			
 
				+    websocket.param_dict_vad = {'in_cache': dict(), "is_final": True}
			
 
				+    websocket.param_dict_asr_online["is_final"]=True
			
 
				+    audio_in=b''.join(np.zeros(int(16000),dtype=np.int16))
			
 
				+    inference_pipeline_vad(audio_in=audio_in, param_dict=websocket.param_dict_vad)
			
 
				+    inference_pipeline_asr_online(audio_in=audio_in, param_dict=websocket.param_dict_asr_online)
			
 
				+    await websocket.close()
			
 
				+    
			
 
				+    
			
 
				+async def clear_websocket():
			
 
				+   for websocket in websocket_users:
			
 
				+       await ws_reset(websocket)
			
 
				+   websocket_users.clear()
			
 
				+ 
			
 
				+ 
			
 
				+       
			
 
				 async def ws_serve(websocket, path):
			
 
				     frames = []
			
 
				     frames_asr = []
			
 
				     frames_asr_online = []
			
 
				     global websocket_users
			
 
				+    await clear_websocket()
			
 
				     websocket_users.add(websocket)
			
 
				     websocket.param_dict_asr = {}
			
 
				     websocket.param_dict_asr_online = {"cache": dict()}
			
@@ -139,7 +159,8 @@ async def ws_serve(websocket, path):
 
				 
			
 
				      
			
 
				     except websockets.ConnectionClosed:
			
 
				-        print("ConnectionClosed...", websocket_users)
			
 
				+        print("ConnectionClosed...", websocket_users,flush=True)
			
 
				+        await ws_reset(websocket)
			
 
				         websocket_users.remove(websocket)
			
 
				     except websockets.InvalidState:
			
 
				         print("InvalidState...")