Browse Source

fix uniasr postprocess

lzr265946 3 years ago
parent
commit
da830cb825
2 changed files with 4 additions and 2 deletions
  1. 2 1
      funasr/bin/asr_inference_uniasr.py
  2. 2 1
      funasr/bin/asr_inference_uniasr_vad.py

+ 2 - 1
funasr/bin/asr_inference_uniasr.py

@@ -261,6 +261,7 @@ class Speech2Text:
 
 
             # Change integer-ids to tokens
             # Change integer-ids to tokens
             token = self.converter.ids2tokens(token_int)
             token = self.converter.ids2tokens(token_int)
+            token = list(filter(lambda x: x != "<gbg>", token))
 
 
             if self.tokenizer is not None:
             if self.tokenizer is not None:
                 text = self.tokenizer.tokens2text(token)
                 text = self.tokenizer.tokens2text(token)
@@ -512,7 +513,7 @@ def inference_modelscope(
                     finish_count += 1
                     finish_count += 1
                     asr_utils.print_progress(finish_count / file_count)
                     asr_utils.print_progress(finish_count / file_count)
                     if writer is not None:
                     if writer is not None:
-                        ibest_writer["text"][key] = text
+                        ibest_writer["text"][key] = text_postprocessed
         return asr_result_list
         return asr_result_list
     
     
     return _forward
     return _forward

+ 2 - 1
funasr/bin/asr_inference_uniasr_vad.py

@@ -261,6 +261,7 @@ class Speech2Text:
 
 
             # Change integer-ids to tokens
             # Change integer-ids to tokens
             token = self.converter.ids2tokens(token_int)
             token = self.converter.ids2tokens(token_int)
+            token = list(filter(lambda x: x != "<gbg>", token))
 
 
             if self.tokenizer is not None:
             if self.tokenizer is not None:
                 text = self.tokenizer.tokens2text(token)
                 text = self.tokenizer.tokens2text(token)
@@ -512,7 +513,7 @@ def inference_modelscope(
                     finish_count += 1
                     finish_count += 1
                     asr_utils.print_progress(finish_count / file_count)
                     asr_utils.print_progress(finish_count / file_count)
                     if writer is not None:
                     if writer is not None:
-                        ibest_writer["text"][key] = text
+                        ibest_writer["text"][key] = text_postprocessed
         return asr_result_list
         return asr_result_list
     
     
     return _forward
     return _forward