Просмотр исходного кода

Merge branch 'main' of https://github.com/alibaba-damo-academy/FunASR into main

雾聪 2 лет назад
Родитель
Сommit
eee6af2ece

+ 0 - 1
README.md

@@ -13,7 +13,6 @@
 | [**Highlights**](#highlights)
 | [**Highlights**](#highlights)
 | [**Installation**](#installation)
 | [**Installation**](#installation)
 | [**Docs**](https://alibaba-damo-academy.github.io/FunASR/en/index.html)
 | [**Docs**](https://alibaba-damo-academy.github.io/FunASR/en/index.html)
-| [**Tutorial_CN**](https://github.com/alibaba-damo-academy/FunASR/wiki#funasr%E7%94%A8%E6%88%B7%E6%89%8B%E5%86%8C)
 | [**Papers**](https://github.com/alibaba-damo-academy/FunASR#citations)
 | [**Papers**](https://github.com/alibaba-damo-academy/FunASR#citations)
 | [**Runtime**](https://github.com/alibaba-damo-academy/FunASR/tree/main/funasr/runtime)
 | [**Runtime**](https://github.com/alibaba-damo-academy/FunASR/tree/main/funasr/runtime)
 | [**Model Zoo**](https://github.com/alibaba-damo-academy/FunASR/blob/main/docs/model_zoo/modelscope_models.md)
 | [**Model Zoo**](https://github.com/alibaba-damo-academy/FunASR/blob/main/docs/model_zoo/modelscope_models.md)

+ 20 - 1
egs_modelscope/asr/TEMPLATE/README.md

@@ -20,11 +20,13 @@ rec_result = inference_pipeline(audio_in='https://isv-data.oss-cn-hangzhou.aliyu
 print(rec_result)
 print(rec_result)
 ```
 ```
 #### [Paraformer-online Model](https://www.modelscope.cn/models/damo/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-online/summary)
 #### [Paraformer-online Model](https://www.modelscope.cn/models/damo/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-online/summary)
+##### Streaming Decoding
 ```python
 ```python
 inference_pipeline = pipeline(
 inference_pipeline = pipeline(
     task=Tasks.auto_speech_recognition,
     task=Tasks.auto_speech_recognition,
     model='damo/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-online',
     model='damo/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-online',
-    model_revision='v1.0.6',
+    model_revision='v1.0.4',
+    update_model='v1.0.4',
     mode='paraformer_streaming'
     mode='paraformer_streaming'
     )
     )
 import soundfile
 import soundfile
@@ -42,6 +44,23 @@ speech_chunk = speech[chunk_stride:chunk_stride+chunk_stride]
 rec_result = inference_pipeline(audio_in=speech_chunk, param_dict=param_dict)
 rec_result = inference_pipeline(audio_in=speech_chunk, param_dict=param_dict)
 print(rec_result)
 print(rec_result)
 ```
 ```
+
+##### Fake Streaming Decoding
+```python
+from modelscope.pipelines import pipeline
+from modelscope.utils.constant import Tasks
+
+inference_pipeline = pipeline(
+    task=Tasks.auto_speech_recognition,
+    model='damo/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-online',
+    model_revision='v1.0.6',
+    update_model='v1.0.6',
+    mode="paraformer_fake_streaming"
+)
+audio_in='https://isv-data.oss-cn-hangzhou.aliyuncs.com/ics/MaaS/ASR/test_audio/asr_example_zh.wav'
+rec_result = inference_pipeline(audio_in=audio_in)
+print(rec_result)
+```
 Full code of demo, please ref to [demo](https://github.com/alibaba-damo-academy/FunASR/discussions/241)
 Full code of demo, please ref to [demo](https://github.com/alibaba-damo-academy/FunASR/discussions/241)
 
 
 #### [UniASR Model](https://www.modelscope.cn/models/damo/speech_UniASR_asr_2pass-zh-cn-8k-common-vocab3445-pytorch-online/summary)
 #### [UniASR Model](https://www.modelscope.cn/models/damo/speech_UniASR_asr_2pass-zh-cn-8k-common-vocab3445-pytorch-online/summary)

+ 1 - 0
egs_modelscope/asr/paraformer/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-online/demo.py

@@ -5,6 +5,7 @@ inference_pipeline = pipeline(
     task=Tasks.auto_speech_recognition,
     task=Tasks.auto_speech_recognition,
     model='damo/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-online',
     model='damo/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-online',
     model_revision='v1.0.6',
     model_revision='v1.0.6',
+    update_model='v1.0.6',
     mode="paraformer_fake_streaming"
     mode="paraformer_fake_streaming"
 )
 )
 audio_in='https://isv-data.oss-cn-hangzhou.aliyuncs.com/ics/MaaS/ASR/test_audio/asr_example_zh.wav'
 audio_in='https://isv-data.oss-cn-hangzhou.aliyuncs.com/ics/MaaS/ASR/test_audio/asr_example_zh.wav'

+ 2 - 1
egs_modelscope/asr/paraformer/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-online/demo_online.py

@@ -14,7 +14,8 @@ os.environ["MODELSCOPE_CACHE"] = "./"
 inference_pipeline = pipeline(
 inference_pipeline = pipeline(
     task=Tasks.auto_speech_recognition,
     task=Tasks.auto_speech_recognition,
     model='damo/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-online',
     model='damo/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-online',
-    model_revision='v1.0.6',
+    model_revision='v1.0.4',
+    update_model='v1.0.4',
     mode="paraformer_streaming"
     mode="paraformer_streaming"
 )
 )
 
 

+ 1 - 0
egs_modelscope/asr/paraformer/speech_paraformer_asr_nat-zh-cn-16k-common-vocab8404-online/demo.py

@@ -5,6 +5,7 @@ inference_pipeline = pipeline(
     task=Tasks.auto_speech_recognition,
     task=Tasks.auto_speech_recognition,
     model='damo/speech_paraformer_asr_nat-zh-cn-16k-common-vocab8404-online',
     model='damo/speech_paraformer_asr_nat-zh-cn-16k-common-vocab8404-online',
     model_revision='v1.0.6',
     model_revision='v1.0.6',
+    update_model='v1.0.6',
     mode="paraformer_fake_streaming"
     mode="paraformer_fake_streaming"
 )
 )
 audio_in='https://isv-data.oss-cn-hangzhou.aliyuncs.com/ics/MaaS/ASR/test_audio/asr_example_zh.wav'
 audio_in='https://isv-data.oss-cn-hangzhou.aliyuncs.com/ics/MaaS/ASR/test_audio/asr_example_zh.wav'

+ 2 - 1
egs_modelscope/asr/paraformer/speech_paraformer_asr_nat-zh-cn-16k-common-vocab8404-online/demo_online.py

@@ -14,7 +14,8 @@ os.environ["MODELSCOPE_CACHE"] = "./"
 inference_pipeline = pipeline(
 inference_pipeline = pipeline(
     task=Tasks.auto_speech_recognition,
     task=Tasks.auto_speech_recognition,
     model='damo/speech_paraformer_asr_nat-zh-cn-16k-common-vocab8404-online',
     model='damo/speech_paraformer_asr_nat-zh-cn-16k-common-vocab8404-online',
-    model_revision='v1.0.6',
+    model_revision='v1.0.4',
+    update_model='v1.0.4',
     mode="paraformer_streaming"
     mode="paraformer_streaming"
 )
 )
 
 

+ 1 - 1
egs_modelscope/tp/TEMPLATE/README.md

@@ -11,7 +11,7 @@ from modelscope.utils.constant import Tasks
 inference_pipeline = pipeline(
 inference_pipeline = pipeline(
     task=Tasks.speech_timestamp,
     task=Tasks.speech_timestamp,
     model='damo/speech_timestamp_prediction-v1-16k-offline',
     model='damo/speech_timestamp_prediction-v1-16k-offline',
-    output_dir=None)
+    model_revision='v1.1.0')
 
 
 rec_result = inference_pipeline(
 rec_result = inference_pipeline(
     audio_in='https://isv-data.oss-cn-hangzhou.aliyuncs.com/ics/MaaS/ASR/test_audio/asr_example_timestamps.wav',
     audio_in='https://isv-data.oss-cn-hangzhou.aliyuncs.com/ics/MaaS/ASR/test_audio/asr_example_timestamps.wav',

+ 8 - 8
fun_text_processing/inverse_text_normalization/id/id_unit_test.tsv

@@ -1,10 +1,10 @@
 dua ribu dua puluh dua	2022
 dua ribu dua puluh dua	2022
-tiga ribu	300
+tiga ribu	3000
 sembilan ribu sembilan ratus sembilan puluh sembilan	9999
 sembilan ribu sembilan ratus sembilan puluh sembilan	9999
-seribu satu	100001
-ribu	100
+seribu satu	1001
+ribu	1000
 seribu	1000
 seribu	1000
-seribu dua ratus delapan puluh sembilan	10289
+seribu dua ratus delapan puluh sembilan	1289
 ribu dua ratus delapan puluh sembilan	1289
 ribu dua ratus delapan puluh sembilan	1289
 nol satu dua tiga empat lima enam tujuh delapan sembilan	01 2345-6789
 nol satu dua tiga empat lima enam tujuh delapan sembilan	01 2345-6789
 empat belas	14
 empat belas	14
@@ -22,8 +22,8 @@ satu miliar	1 miliar
 seratus dua puluh tiga	123
 seratus dua puluh tiga	123
 ratus dua puluh tiga	123
 ratus dua puluh tiga	123
 dua puluh empat maret 	24 maret
 dua puluh empat maret 	24 maret
-ribu tujuh puluh enam	10076
-seribu tujuh puluh enam	100076
-ribu tujuh puluh enam rupiah	10076 rupiah
+ribu tujuh puluh enam	1076
+seribu tujuh puluh enam	1076
+ribu tujuh puluh enam rupiah	1076 rupiah
 tujuh puluh enam	76
 tujuh puluh enam	76
-ditambah enam dua dua satu enam lima tiga sembilan nol enam nol lima	+62 21 6539-0605
+ditambah enam dua dua satu enam lima tiga sembilan nol enam nol lima	+62 21 6539-0605

+ 6 - 9
fun_text_processing/inverse_text_normalization/id/taggers/cardinal.py

@@ -26,11 +26,10 @@ class CardinalFst(GraphFst):
         graph_teen = pynini.string_file(get_abs_path("data/numbers/teen.tsv"))
         graph_teen = pynini.string_file(get_abs_path("data/numbers/teen.tsv"))
         graph_hundreds = pynini.string_file(get_abs_path("data/numbers/hundreds.tsv"))
         graph_hundreds = pynini.string_file(get_abs_path("data/numbers/hundreds.tsv"))
         graph_thousand = pynini.string_file(get_abs_path("data/numbers/thousand.tsv"))
         graph_thousand = pynini.string_file(get_abs_path("data/numbers/thousand.tsv"))
-
-        graph_cents = pynini.cross("seratus", "100") | pynini.cross("ratus", "100") | pynini.union(graph_hundreds, pynutil.insert("0"))
+        
         graph_hundred = pynini.cross("ratus", "") | pynini.cross("seratus", "")
         graph_hundred = pynini.cross("ratus", "") | pynini.cross("seratus", "")
 
 
-        graph_hundred_component = pynini.union(graph_digit + delete_space + graph_hundred, pynutil.insert("00"))
+        graph_hundred_component = pynini.union(graph_digit + delete_space + graph_hundred, pynutil.insert("0"))
         graph_hundred_component += delete_space
         graph_hundred_component += delete_space
         graph_hundred_component += pynini.union(
         graph_hundred_component += pynini.union(
             graph_teen | pynutil.insert("00"),
             graph_teen | pynutil.insert("00"),
@@ -44,8 +43,8 @@ class CardinalFst(GraphFst):
                 (graph_ties | pynutil.insert("0")) + delete_space + (
                 (graph_ties | pynutil.insert("0")) + delete_space + (
                             graph_digit | pynutil.insert("0")),
                             graph_digit | pynutil.insert("0")),
         )
         )
-        graph_hundred_component = graph_hundred_component | graph_cents | graph_one_hundred_component
-
+        graph_hundred_component = graph_hundred_component | graph_one_hundred_component
+    
         graph_hundred_component_at_least_one_none_zero_digit = graph_hundred_component @ (
         graph_hundred_component_at_least_one_none_zero_digit = graph_hundred_component @ (
             pynini.closure(DAMO_DIGIT) + (DAMO_DIGIT - "0") + pynini.closure(DAMO_DIGIT)
             pynini.closure(DAMO_DIGIT) + (DAMO_DIGIT - "0") + pynini.closure(DAMO_DIGIT)
         )
         )
@@ -54,14 +53,12 @@ class CardinalFst(GraphFst):
         )
         )
         graph_thousand = pynini.cross("ribu", "") | pynini.cross("seribu", "")
         graph_thousand = pynini.cross("ribu", "") | pynini.cross("seribu", "")
         graph_one_thousand_component = pynini.union(pynini.cross("ribu", "1") | pynini.cross("seribu", "1"))
         graph_one_thousand_component = pynini.union(pynini.cross("ribu", "1") | pynini.cross("seribu", "1"))
-        graph_thousand_cents = pynini.cross("seribu", "10") | pynini.cross("ribu","10") | pynini.union(graph_thousand, pynutil.insert(""))
+       
         graph_thousands = pynini.union(
         graph_thousands = pynini.union(
             graph_hundred_component_at_least_one_none_zero_digit + delete_space + (pynutil.delete("ribu") | pynutil.delete("seribu")),
             graph_hundred_component_at_least_one_none_zero_digit + delete_space + (pynutil.delete("ribu") | pynutil.delete("seribu")),
             pynutil.insert("000", weight=0.1),
             pynutil.insert("000", weight=0.1),
         )
         )
-        graph_thousand_component = pynini.union(graph_digit + delete_space + graph_thousand, pynutil.insert("000"))
-        graph_thousand_component += delete_space
-        graph_thousands = graph_thousands | graph_thousand_cents | graph_thousand_component | graph_one_thousand_component
+        graph_thousands = graph_thousands | (pynutil.insert("00") + graph_one_thousand_component)
 
 
         graph_million = pynini.union(
         graph_million = pynini.union(
             graph_hundred_component_at_least_one_none_zero_digit + delete_space + (pynutil.delete("juta") | pynutil.delete("sejuta")),
             graph_hundred_component_at_least_one_none_zero_digit + delete_space + (pynutil.delete("juta") | pynutil.delete("sejuta")),

+ 3 - 1
funasr/runtime/html5/static/main.js

@@ -145,7 +145,9 @@ function stop() {
 	isRec = false;
 	isRec = false;
     info_div.innerHTML="请等候...";
     info_div.innerHTML="请等候...";
 	btnStop.disabled = true;
 	btnStop.disabled = true;
-	setTimeout(function(){btnStart.disabled = false;info_div.innerHTML="请点击开始";}, 3000 );
+	setTimeout(function(){
+		console.log("call stop ws!");
+		wsconnecter.wsStop();btnStart.disabled = false;info_div.innerHTML="请点击开始";}, 3000 );
 	rec.stop(function(blob,duration){
 	rec.stop(function(blob,duration){
   
   
 		console.log(blob);
 		console.log(blob);

+ 6 - 1
funasr/runtime/html5/static/wsconnecter.js

@@ -28,7 +28,11 @@ function WebSocketConnectMethod( config ) { //定义socket连接方法类
 		if ( 'WebSocket' in window ) {
 		if ( 'WebSocket' in window ) {
 			speechSokt = new WebSocket( Uri ); // 定义socket连接对象
 			speechSokt = new WebSocket( Uri ); // 定义socket连接对象
 			speechSokt.onopen = function(e){onOpen(e);}; // 定义响应函数
 			speechSokt.onopen = function(e){onOpen(e);}; // 定义响应函数
-			speechSokt.onclose = function(e){onClose(e);};
+			speechSokt.onclose = function(e){
+			    console.log("onclose ws!");
+			    speechSokt.close();
+				onClose(e);
+				};
 			speechSokt.onmessage = function(e){onMessage(e);};
 			speechSokt.onmessage = function(e){onMessage(e);};
 			speechSokt.onerror = function(e){onError(e);};
 			speechSokt.onerror = function(e){onError(e);};
 			return 1;
 			return 1;
@@ -42,6 +46,7 @@ function WebSocketConnectMethod( config ) { //定义socket连接方法类
 	// 定义停止与发送函数
 	// 定义停止与发送函数
 	this.wsStop = function () {
 	this.wsStop = function () {
 		if(speechSokt != undefined) {
 		if(speechSokt != undefined) {
+			console.log("stop ws!");
 			speechSokt.close();
 			speechSokt.close();
 		}
 		}
 	};
 	};

+ 24 - 3
funasr/runtime/python/websocket/wss_srv_asr.py

@@ -58,16 +58,36 @@ inference_pipeline_asr_online = pipeline(
     model=args.asr_model_online,
     model=args.asr_model_online,
     ngpu=args.ngpu,
     ngpu=args.ngpu,
     ncpu=args.ncpu,
     ncpu=args.ncpu,
-    model_revision='v1.0.6',
+    model_revision='v1.0.4',
+    update_model='v1.0.4',
     mode='paraformer_streaming')
     mode='paraformer_streaming')
 
 
-print("model loaded")
+print("model loaded! only support one client at the same time now!!!!")
 
 
+async def ws_reset(websocket):
+    print("ws reset now, total num is ",len(websocket_users))
+    websocket.param_dict_asr_online = {"cache": dict()}
+    websocket.param_dict_vad = {'in_cache': dict(), "is_final": True}
+    websocket.param_dict_asr_online["is_final"]=True
+    audio_in=b''.join(np.zeros(int(16000),dtype=np.int16))
+    inference_pipeline_vad(audio_in=audio_in, param_dict=websocket.param_dict_vad)
+    inference_pipeline_asr_online(audio_in=audio_in, param_dict=websocket.param_dict_asr_online)
+    await websocket.close()
+    
+    
+async def clear_websocket():
+   for websocket in websocket_users:
+       await ws_reset(websocket)
+   websocket_users.clear()
+ 
+ 
+       
 async def ws_serve(websocket, path):
 async def ws_serve(websocket, path):
     frames = []
     frames = []
     frames_asr = []
     frames_asr = []
     frames_asr_online = []
     frames_asr_online = []
     global websocket_users
     global websocket_users
+    await clear_websocket()
     websocket_users.add(websocket)
     websocket_users.add(websocket)
     websocket.param_dict_asr = {}
     websocket.param_dict_asr = {}
     websocket.param_dict_asr_online = {"cache": dict()}
     websocket.param_dict_asr_online = {"cache": dict()}
@@ -139,7 +159,8 @@ async def ws_serve(websocket, path):
 
 
      
      
     except websockets.ConnectionClosed:
     except websockets.ConnectionClosed:
-        print("ConnectionClosed...", websocket_users)
+        print("ConnectionClosed...", websocket_users,flush=True)
+        await ws_reset(websocket)
         websocket_users.remove(websocket)
         websocket_users.remove(websocket)
     except websockets.InvalidState:
     except websockets.InvalidState:
         print("InvalidState...")
         print("InvalidState...")