3 лет назад · 3c632e4f1a
--- a/docs/build_task.md
+++ b/docs/build_task.md
@@ -103,4 +103,23 @@ def build_model(cls, args, train):
 
				         )
			
 
				     return model
			
 
				 ```
			
 
				-This function defines the detail of the model. For different speech recognition models, the same speech recognition `Task` can usually be shared and the remaining thing needed to be done is to define a specific model in this function. For example, a speech recognition model with a standard encoder-decoder structure has been shown above. Specifically, it first defines each module of the model, including encoder, decoder, etc. and then combine these modules together to generate a complete model. In FunASR, the model needs to inherit `AbsESPnetModel` and the corresponding code can be seen in `funasr/train/abs_espnet_model.py`. The main function needed to be implemented is the `forward` function.
			
 
				+This function defines the detail of the model. For different speech recognition models, the same speech recognition `Task` can usually be shared and the remaining thing needed to be done is to define a specific model in this function. For example, a speech recognition model with a standard encoder-decoder structure has been shown above. Specifically, it first defines each module of the model, including encoder, decoder, etc. and then combine these modules together to generate a complete model. In FunASR, the model needs to inherit `AbsESPnetModel` and the corresponding code can be seen in `funasr/train/abs_espnet_model.py`. The main function needed to be implemented is the `forward` function.
			
 
				+
			
 
				+Next, we take `SANMEncoder` as an example to introduce how to use a custom encoder as a part of the model when defining the specified model and the corresponding code can be seen in `funasr/models/encoder/sanm_encoder.py`. For a custom encoder, in addition to inheriting the common encoder class `AbsEncoder`, it is also necessary to define the `forward` function to achieve the forward computation of the `encoder`. After defining the `encoder`, it should also be registered in the `Task`. The corresponding code example can be seen as below:
			
 
				+```python
			
 
				+encoder_choices = ClassChoices(
			
 
				+    "encoder",
			
 
				+    classes=dict(
			
 
				+        conformer=ConformerEncoder,
			
 
				+        transformer=TransformerEncoder,
			
 
				+        rnn=RNNEncoder,
			
 
				+        sanm=SANMEncoder,
			
 
				+        sanm_chunk_opt=SANMEncoderChunkOpt,
			
 
				+        data2vec_encoder=Data2VecEncoder,
			
 
				+        mfcca_enc=MFCCAEncoder,
			
 
				+    ),
			
 
				+    type_check=AbsEncoder,
			
 
				+    default="rnn",
			
 
				+)
			
 
				+```
			
 
				+In this code, `sanm=SANMEncoder` takes the newly defined `SANMEncoder` as an optional choice of the `encoder`. Once the user specifies the `encoder` as `sanm` in the configuration file, the `SANMEncoder` will be correspondingly employed as the `encoder` module of the model.
			
--- a/docs_cn/build_task.md
+++ b/docs_cn/build_task.md
@@ -103,3 +103,22 @@ def build_model(cls, args, train):
 
				     return model
			
 
				 ```
			
 
				 该函数定义了具体的模型。对于不同的语音识别模型，往往可以共用同一个语音识别`Task`，额外需要做的是在此函数中定义特定的模型。例如，这里给出的是一个标准的encoder-decoder结构的语音识别模型。具体地，先定义该模型的各个模块，包括encoder，decoder等，然后在将这些模块组合在一起得到一个完整的模型。在FunASR中，模型需要继承`AbsESPnetModel`，其具体代码见`funasr/train/abs_espnet_model.py`，主要需要实现的是`forward`函数。
			
 
				+
			
 
				+下面我们将以`SANMEncoder`为例，介绍如何在定义模型的时候，使用自定义的`encoder`来作为模型的组成部分，其具体的代码见`funasr/models/encoder/sanm_encoder.py`。对于自定义的`encoder`，除了需要继承通用的`encoder`类`AbsEncoder`外，还需要自定义`forward`函数，实现`encoder`的前向计算。在定义完`encoder`后，还需要在`Task`中对其进行注册，下面给出了相应的代码示例：
			
 
				+```python
			
 
				+encoder_choices = ClassChoices(
			
 
				+    "encoder",
			
 
				+    classes=dict(
			
 
				+        conformer=ConformerEncoder,
			
 
				+        transformer=TransformerEncoder,
			
 
				+        rnn=RNNEncoder,
			
 
				+        sanm=SANMEncoder,
			
 
				+        sanm_chunk_opt=SANMEncoderChunkOpt,
			
 
				+        data2vec_encoder=Data2VecEncoder,
			
 
				+        mfcca_enc=MFCCAEncoder,
			
 
				+    ),
			
 
				+    type_check=AbsEncoder,
			
 
				+    default="rnn",
			
 
				+)
			
 
				+```
			
 
				+可以看到，`sanm=SANMEncoder`将新定义的`SANMEncoder`作为了`encoder`的一种可选项，当用户在配置文件中指定`encoder`为`sanm`时，即会相应地将`SANMEncoder`作为模型的`encoder`模块。
			
--- a/docs_cn/modelscope_usages.md
+++ b/docs_cn/modelscope_usages.md
@@ -1,4 +1,4 @@
 
				-# ModelScope Usage
			
 
				+# ModelScope 使用说明
			
 
				 ModelScope是阿里巴巴推出的开源模型即服务共享平台，为广大学术界用户和工业界用户提供灵活、便捷的模型应用支持。具体的使用方法和开源模型可以参见[ModelScope](https://www.modelscope.cn/models?page=1&tasks=auto-speech-recognition) 。在语音方向，我们提供了自回归/非自回归语音识别，语音预训练，标点预测等模型，用户可以方便使用。
			
 
				 
			
 
				 ## 整体介绍
			
--- a/funasr/bin/asr_inference_paraformer.py
+++ b/funasr/bin/asr_inference_paraformer.py
@@ -660,11 +660,9 @@ def inference_modelscope(
 
				         hotword_list_or_file = None
			
 
				         if param_dict is not None:
			
 
				             hotword_list_or_file = param_dict.get('hotword')
			
 
				-
			
 
				         if 'hotword' in kwargs:
			
 
				             hotword_list_or_file = kwargs['hotword']
			
 
				-
			
 
				-        if speech2text.hotword_list is None:
			
 
				+        if hotword_list_or_file is not None or 'hotword' in kwargs:
			
 
				             speech2text.hotword_list = speech2text.generate_hotwords_list(hotword_list_or_file)
			
 
				 
			
 
				         # 3. Build data-iterator
			
--- a/funasr/bin/asr_inference_uniasr.py
+++ b/funasr/bin/asr_inference_uniasr.py
@@ -398,6 +398,19 @@ def inference_modelscope(
 
				     else:
			
 
				         device = "cpu"
			
 
				     
			
 
				+    if param_dict is not None and "decoding_model" in param_dict:
			
 
				+        if param_dict["decoding_model"] == "fast":
			
 
				+            decoding_ind = 0
			
 
				+            decoding_mode = "model1"
			
 
				+        elif param_dict["decoding_model"] == "normal":
			
 
				+            decoding_ind = 0
			
 
				+            decoding_mode = "model2"
			
 
				+        elif param_dict["decoding_model"] == "offline":
			
 
				+            decoding_ind = 1
			
 
				+            decoding_mode = "model2"
			
 
				+        else:
			
 
				+            raise NotImplementedError("unsupported decoding model {}".format(param_dict["decoding_model"]))
			
 
				+
			
 
				     # 1. Set random-seed
			
 
				     set_all_random_seed(seed)
			
 
				 
			
@@ -440,18 +453,6 @@ def inference_modelscope(
 
				             if isinstance(raw_inputs, torch.Tensor):
			
 
				                 raw_inputs = raw_inputs.numpy()
			
 
				             data_path_and_name_and_type = [raw_inputs, "speech", "waveform"]
			
 
				-        if param_dict is not None and "decoding_model" in param_dict:
			
 
				-            if param_dict["decoding_model"] == "fast":
			
 
				-                speech2text.decoding_ind = 0
			
 
				-                speech2text.decoding_mode = "model1"
			
 
				-            elif param_dict["decoding_model"] == "normal":
			
 
				-                speech2text.decoding_ind = 0
			
 
				-                speech2text.decoding_mode = "model2"
			
 
				-            elif param_dict["decoding_model"] == "offline":
			
 
				-                speech2text.decoding_ind = 1
			
 
				-                speech2text.decoding_mode = "model2"
			
 
				-            else:
			
 
				-                raise NotImplementedError("unsupported decoding model {}".format(param_dict["decoding_model"]))
			
 
				         loader = ASRTask.build_streaming_iterator(
			
 
				             data_path_and_name_and_type,
			
 
				             dtype=dtype,
			
--- a/funasr/bin/asr_inference_uniasr_vad.py
+++ b/funasr/bin/asr_inference_uniasr_vad.py
@@ -398,6 +398,19 @@ def inference_modelscope(
 
				     else:
			
 
				         device = "cpu"
			
 
				 
			
 
				+    if param_dict is not None and "decoding_model" in param_dict:
			
 
				+        if param_dict["decoding_model"] == "fast":
			
 
				+            decoding_ind = 0
			
 
				+            decoding_mode = "model1"
			
 
				+        elif param_dict["decoding_model"] == "normal":
			
 
				+            decoding_ind = 0
			
 
				+            decoding_mode = "model2"
			
 
				+        elif param_dict["decoding_model"] == "offline":
			
 
				+            decoding_ind = 1
			
 
				+            decoding_mode = "model2"
			
 
				+        else:
			
 
				+            raise NotImplementedError("unsupported decoding model {}".format(param_dict["decoding_model"]))
			
 
				+
			
 
				     # 1. Set random-seed
			
 
				     set_all_random_seed(seed)
			
 
				 
			
@@ -440,18 +453,6 @@ def inference_modelscope(
 
				             if isinstance(raw_inputs, torch.Tensor):
			
 
				                 raw_inputs = raw_inputs.numpy()
			
 
				             data_path_and_name_and_type = [raw_inputs, "speech", "waveform"]
			
 
				-        if param_dict is not None and "decoding_model" in param_dict:
			
 
				-            if param_dict["decoding_model"] == "fast":
			
 
				-                speech2text.decoding_ind = 0
			
 
				-                speech2text.decoding_mode = "model1"
			
 
				-            elif param_dict["decoding_model"] == "normal":
			
 
				-                speech2text.decoding_ind = 0
			
 
				-                speech2text.decoding_mode = "model2"
			
 
				-            elif param_dict["decoding_model"] == "offline":
			
 
				-                speech2text.decoding_ind = 1
			
 
				-                speech2text.decoding_mode = "model2"
			
 
				-            else:
			
 
				-                raise NotImplementedError("unsupported decoding model {}".format(param_dict["decoding_model"]))
			
 
				         loader = ASRTask.build_streaming_iterator(
			
 
				             data_path_and_name_and_type,
			
 
				             dtype=dtype,