3 tahun lalu · 2edb3a1bf0
--- a/egs_modelscope/asr/paraformer/speech_paraformer-large_asr_nat-zh-cn-16k-aishell1-vocab8404-pytorch/RESULTS.md
+++ b/egs_modelscope/asr/paraformer/speech_paraformer-large_asr_nat-zh-cn-16k-aishell1-vocab8404-pytorch/RESULTS.md
@@ -0,0 +1,23 @@
 
				+# Paraformer-Large
			
 
				+- Model link: <https://www.modelscope.cn/models/damo/speech_paraformer-large_asr_nat-zh-cn-16k-aishell1-vocab8404-pytorch/summary>
			
 
				+- Model size: 220M
			
 
				+
			
 
				+# Environments
			
 
				+- date: `Fri Feb 10 13:34:24 CST 2023`
			
 
				+- python version: `3.7.12`
			
 
				+- FunASR version: `0.1.6`
			
 
				+- pytorch version: `pytorch 1.7.0`
			
 
				+- Git hash: ``
			
 
				+- Commit date: ``
			
 
				+
			
 
				+# Beachmark Results
			
 
				+
			
 
				+## AISHELL-1
			
 
				+- Decode config:
			
 
				+  - Decode without CTC
			
 
				+  - Decode without LM
			
 
				+
			
 
				+| testset CER(%) | base model|finetune model |
			
 
				+|:--------------:|:---------:|:-------------:|
			
 
				+| dev            | 1.75      |1.62           |
			
 
				+| test           | 1.95      |1.78           |
			
--- a/egs_modelscope/asr/paraformer/speech_paraformer-large_asr_nat-zh-cn-16k-aishell2-vocab8404-pytorch/RESULTS.md
+++ b/egs_modelscope/asr/paraformer/speech_paraformer-large_asr_nat-zh-cn-16k-aishell2-vocab8404-pytorch/RESULTS.md
@@ -0,0 +1,25 @@
 
				+# Paraformer-Large
			
 
				+- Model link: <https://www.modelscope.cn/models/damo/speech_paraformer-large_asr_nat-zh-cn-16k-aishell2-vocab8404-pytorch/summary>
			
 
				+- Model size: 220M
			
 
				+
			
 
				+# Environments
			
 
				+- date: `Fri Feb 10 13:34:24 CST 2023`
			
 
				+- python version: `3.7.12`
			
 
				+- FunASR version: `0.1.6`
			
 
				+- pytorch version: `pytorch 1.7.0`
			
 
				+- Git hash: ``
			
 
				+- Commit date: ``
			
 
				+
			
 
				+# Beachmark Results
			
 
				+
			
 
				+## AISHELL-2
			
 
				+- Decode config: 
			
 
				+  - Decode without CTC
			
 
				+  - Decode without LM
			
 
				+
			
 
				+| testset      | base model|finetune model|
			
 
				+|:------------:|:---------:|:------------:|
			
 
				+| dev_ios      | 2.80      |2.60          |
			
 
				+| test_android | 3.13      |2.84          |
			
 
				+| test_ios     | 2.85      |2.82          |
			
 
				+| test_mic     | 3.06      |2.88          |
			
--- a/egs_modelscope/asr/paraformer/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-pytorch/RESULTS.md
+++ b/egs_modelscope/asr/paraformer/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-pytorch/RESULTS.md
@@ -0,0 +1,75 @@
 
				+# Paraformer-Large
			
 
				+- Model link: <https://www.modelscope.cn/models/damo/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-pytorch/summary>
			
 
				+- Model size: 220M
			
 
				+
			
 
				+# Environments
			
 
				+- date: `Tue Nov 22 18:48:39 CST 2022`
			
 
				+- python version: `3.7.12`
			
 
				+- FunASR version: `0.1.0`
			
 
				+- pytorch version: `pytorch 1.7.0`
			
 
				+- Git hash: ``
			
 
				+- Commit date: ``
			
 
				+
			
 
				+# Beachmark Results
			
 
				+
			
 
				+## AISHELL-1
			
 
				+- Decode config: 
			
 
				+  - Decode without CTC
			
 
				+  - Decode without LM
			
 
				+
			
 
				+| testset   | CER(%)|
			
 
				+|:---------:|:-----:|
			
 
				+| dev       | 1.75  |
			
 
				+| test      | 1.95  |
			
 
				+
			
 
				+## AISHELL-2
			
 
				+- Decode config: 
			
 
				+  - Decode without CTC
			
 
				+  - Decode without LM
			
 
				+
			
 
				+| testset      | CER(%)|
			
 
				+|:------------:|:-----:|
			
 
				+| dev_ios      | 2.80  |
			
 
				+| test_android | 3.13  |
			
 
				+| test_ios     | 2.85  |
			
 
				+| test_mic     | 3.06  |
			
 
				+
			
 
				+## Wenetspeech
			
 
				+- Decode config: 
			
 
				+  - Decode without CTC
			
 
				+  - Decode without LM
			
 
				+
			
 
				+| testset   | CER(%)|
			
 
				+|:---------:|:-----:|
			
 
				+| dev       | 3.57  |
			
 
				+| test      | 6.97  |
			
 
				+| test_net  | 6.74  |
			
 
				+
			
 
				+## SpeechIO TIOBE
			
 
				+- Decode config 1:
			
 
				+  - Decode without CTC
			
 
				+  - Decode without LM
			
 
				+  - With text norm
			
 
				+- Decode config 2:
			
 
				+  - Decode without CTC
			
 
				+  - Decode with Transformer-LM
			
 
				+  - LM weight: 0.15
			
 
				+  - With text norm
			
 
				+
			
 
				+| testset | w/o LM | w/ LM |
			
 
				+|:------------------:|:----:|:----:|
			
 
				+|SPEECHIO_ASR_ZH00001| 0.49 | 0.35 |
			
 
				+|SPEECHIO_ASR_ZH00002| 3.23 | 2.86 |
			
 
				+|SPEECHIO_ASR_ZH00003| 1.13 | 0.80 |
			
 
				+|SPEECHIO_ASR_ZH00004| 1.33 | 1.10 |
			
 
				+|SPEECHIO_ASR_ZH00005| 1.41 | 1.18 |
			
 
				+|SPEECHIO_ASR_ZH00006| 5.25 | 4.85 |
			
 
				+|SPEECHIO_ASR_ZH00007| 5.51 | 4.97 |
			
 
				+|SPEECHIO_ASR_ZH00008| 3.69 | 3.18 |
			
 
				+|SPEECHIO_ASR_ZH00009| 3.02 | 2.78 |
			
 
				+|SPEECHIO_ASR_ZH000010| 3.35 | 2.99 |
			
 
				+|SPEECHIO_ASR_ZH000011| 1.54 | 1.25 |
			
 
				+|SPEECHIO_ASR_ZH000012| 2.06 | 1.68 |
			
 
				+|SPEECHIO_ASR_ZH000013| 2.57 | 2.25 |
			
 
				+|SPEECHIO_ASR_ZH000014| 3.86 | 3.08 |
			
 
				+|SPEECHIO_ASR_ZH000015| 3.34 | 2.67 |
			
--- a/funasr/bin/asr_inference.py
+++ b/funasr/bin/asr_inference.py
@@ -453,7 +453,7 @@ def inference_modelscope(
 
				                     ibest_writer["score"][key] = str(hyp.score)
			
 
				                 
			
 
				                 if text is not None:
			
 
				-                    text_postprocessed = postprocess_utils.sentence_postprocess(token)
			
 
				+                    text_postprocessed, _ = postprocess_utils.sentence_postprocess(token)
			
 
				                     item = {'key': key, 'value': text_postprocessed}
			
 
				                     asr_result_list.append(item)
			
 
				                     finish_count += 1
			
--- a/funasr/bin/asr_inference_paraformer.py
+++ b/funasr/bin/asr_inference_paraformer.py
@@ -428,7 +428,11 @@ def inference_modelscope(
 
				         format="%(asctime)s (%(module)s:%(lineno)d) %(levelname)s: %(message)s",
			
 
				     )
			
 
				 
			
 
				-    hotword_list_or_file = param_dict['hotword']
			
 
				+    if param_dict is not None:
			
 
				+        hotword_list_or_file = param_dict.get('hotword')
			
 
				+    else:
			
 
				+        hotword_list_or_file = None
			
 
				+
			
 
				     if ngpu >= 1 and torch.cuda.is_available():
			
 
				         device = "cuda"
			
 
				     else:
			
@@ -539,7 +543,7 @@ def inference_modelscope(
 
				                         ibest_writer["rtf"][key] = rtf_cur
			
 
				 
			
 
				                     if text is not None:
			
 
				-                        text_postprocessed = postprocess_utils.sentence_postprocess(token)
			
 
				+                        text_postprocessed, _ = postprocess_utils.sentence_postprocess(token)
			
 
				                         item = {'key': key, 'value': text_postprocessed}
			
 
				                         asr_result_list.append(item)
			
 
				                         finish_count += 1
			
--- a/funasr/bin/asr_inference_paraformer_timestamp.py
+++ b/funasr/bin/asr_inference_paraformer_timestamp.py
@@ -436,7 +436,7 @@ def inference(
 
				                     ibest_writer["score"][key] = str(hyp.score)
			
 
				     
			
 
				                 if text is not None:
			
 
				-                    text_postprocessed = postprocess_utils.sentence_postprocess(token)
			
 
				+                    text_postprocessed, _ = postprocess_utils.sentence_postprocess(token)
			
 
				                     item = {'key': key, 'value': text_postprocessed}
			
 
				                     asr_result_list.append(item)
			
 
				                     finish_count += 1
			
--- a/funasr/bin/asr_inference_paraformer_vad.py
+++ b/funasr/bin/asr_inference_paraformer_vad.py
@@ -241,6 +241,11 @@ def inference_modelscope(
 
				             allow_variable_data_keys=allow_variable_data_keys,
			
 
				             inference=True,
			
 
				         )
			
 
				+
			
 
				+        if param_dict is not None:
			
 
				+            use_timestamp = param_dict.get('use_timestamp', True)
			
 
				+        else:
			
 
				+            use_timestamp = True
			
 
				         
			
 
				         finish_count = 0
			
 
				         file_count = 1
			
@@ -284,8 +289,10 @@ def inference_modelscope(
 
				                 text, token, token_int = result[0], result[1], result[2]
			
 
				                 time_stamp = None if len(result) < 4 else result[3]
			
 
				                
			
 
				-                
			
 
				-                postprocessed_result = postprocess_utils.sentence_postprocess(token, time_stamp)
			
 
				+                if use_timestamp and time_stamp is not None:
			
 
				+                    postprocessed_result = postprocess_utils.sentence_postprocess(token, time_stamp)
			
 
				+                else:
			
 
				+                    postprocessed_result = postprocess_utils.sentence_postprocess(token)
			
 
				                 text_postprocessed = ""
			
 
				                 time_stamp_postprocessed = ""
			
 
				                 text_postprocessed_punc = postprocessed_result
			
@@ -293,9 +300,11 @@ def inference_modelscope(
 
				                     text_postprocessed, time_stamp_postprocessed, word_lists = postprocessed_result[0], \
			
 
				                                                                                postprocessed_result[1], \
			
 
				                                                                                postprocessed_result[2]
			
 
				-                    text_postprocessed_punc = text_postprocessed
			
 
				-                    if len(word_lists) > 0 and text2punc is not None:
			
 
				-                        text_postprocessed_punc, punc_id_list = text2punc(word_lists, 20)
			
 
				+                else:
			
 
				+                    text_postprocessed, word_lists = postprocessed_result[0], postprocessed_result[1]
			
 
				+                text_postprocessed_punc = text_postprocessed
			
 
				+                if len(word_lists) > 0 and text2punc is not None:
			
 
				+                    text_postprocessed_punc, punc_id_list = text2punc(word_lists, 20)
			
 
				 
			
 
				                 
			
 
				                 item = {'key': key, 'value': text_postprocessed_punc}
			
--- a/funasr/bin/asr_inference_paraformer_vad_punc.py
+++ b/funasr/bin/asr_inference_paraformer_vad_punc.py
@@ -570,6 +570,11 @@ def inference_modelscope(
 
				             allow_variable_data_keys=allow_variable_data_keys,
			
 
				             inference=True,
			
 
				         )
			
 
				+
			
 
				+        if param_dict is not None:
			
 
				+            use_timestamp = param_dict.get('use_timestamp', True)
			
 
				+        else:
			
 
				+            use_timestamp = True
			
 
				     
			
 
				         finish_count = 0
			
 
				         file_count = 1
			
@@ -612,8 +617,11 @@ def inference_modelscope(
 
				                 result = result_segments[0]
			
 
				                 text, token, token_int = result[0], result[1], result[2]
			
 
				                 time_stamp = None if len(result) < 4 else result[3]
			
 
				-    
			
 
				-                postprocessed_result = postprocess_utils.sentence_postprocess(token, time_stamp)
			
 
				+   
			
 
				+                if use_timestamp and time_stamp is not None: 
			
 
				+                    postprocessed_result = postprocess_utils.sentence_postprocess(token, time_stamp)
			
 
				+                else:
			
 
				+                    postprocessed_result = postprocess_utils.sentence_postprocess(token)
			
 
				                 text_postprocessed = ""
			
 
				                 time_stamp_postprocessed = ""
			
 
				                 text_postprocessed_punc = postprocessed_result
			
@@ -621,9 +629,12 @@ def inference_modelscope(
 
				                     text_postprocessed, time_stamp_postprocessed, word_lists = postprocessed_result[0], \
			
 
				                                                                                postprocessed_result[1], \
			
 
				                                                                                postprocessed_result[2]
			
 
				-                    text_postprocessed_punc = text_postprocessed
			
 
				-                    if len(word_lists) > 0 and text2punc is not None:
			
 
				-                        text_postprocessed_punc, punc_id_list = text2punc(word_lists, 20)
			
 
				+                else:
			
 
				+                    text_postprocessed, word_lists = postprocessed_result[0], postprocessed_result[1]
			
 
				+
			
 
				+                text_postprocessed_punc = text_postprocessed
			
 
				+                if len(word_lists) > 0 and text2punc is not None:
			
 
				+                    text_postprocessed_punc, punc_id_list = text2punc(word_lists, 20)
			
 
				     
			
 
				                 item = {'key': key, 'value': text_postprocessed_punc}
			
 
				                 if text_postprocessed != "":
			
--- a/funasr/bin/asr_inference_uniasr.py
+++ b/funasr/bin/asr_inference_uniasr.py
@@ -492,7 +492,7 @@ def inference_modelscope(
 
				                     ibest_writer["score"][key] = str(hyp.score)
			
 
				     
			
 
				                 if text is not None:
			
 
				-                    text_postprocessed = postprocess_utils.sentence_postprocess(token)
			
 
				+                    text_postprocessed, _ = postprocess_utils.sentence_postprocess(token)
			
 
				                     item = {'key': key, 'value': text_postprocessed}
			
 
				                     asr_result_list.append(item)
			
 
				                     finish_count += 1
			
--- a/funasr/bin/asr_inference_uniasr_vad.py
+++ b/funasr/bin/asr_inference_uniasr_vad.py
@@ -492,7 +492,7 @@ def inference_modelscope(
 
				                     ibest_writer["score"][key] = str(hyp.score)
			
 
				     
			
 
				                 if text is not None:
			
 
				-                    text_postprocessed = postprocess_utils.sentence_postprocess(token)
			
 
				+                    text_postprocessed, _ = postprocess_utils.sentence_postprocess(token)
			
 
				                     item = {'key': key, 'value': text_postprocessed}
			
 
				                     asr_result_list.append(item)
			
 
				                     finish_count += 1
			
--- a/funasr/utils/postprocess_utils.py
+++ b/funasr/utils/postprocess_utils.py
@@ -232,5 +232,9 @@ def sentence_postprocess(words: List[Any], time_stamp: List[List] = None):
 
				         return sentence, ts_lists, real_word_lists
			
 
				     else:
			
 
				         word_lists = abbr_dispose(word_lists)
			
 
				+        real_word_lists = []
			
 
				+        for ch in word_lists:
			
 
				+            if ch != ' ':
			
 
				+                real_word_lists.append(ch)
			
 
				         sentence = ''.join(word_lists).strip()
			
 
				-        return sentence
			
 
				+        return sentence, real_word_lists