2 years ago · 9903ed6823
--- a/docs/m2met2/_build/doctrees/Baseline.doctree
+++ b/docs/m2met2/_build/doctrees/Baseline.doctree
--- a/docs/m2met2/_build/doctrees/Contact.doctree
+++ b/docs/m2met2/_build/doctrees/Contact.doctree
--- a/docs/m2met2/_build/doctrees/Dataset.doctree
+++ b/docs/m2met2/_build/doctrees/Dataset.doctree
--- a/docs/m2met2/_build/doctrees/Introduction.doctree
+++ b/docs/m2met2/_build/doctrees/Introduction.doctree
--- a/docs/m2met2/_build/doctrees/Organizers.doctree
+++ b/docs/m2met2/_build/doctrees/Organizers.doctree
--- a/docs/m2met2/_build/doctrees/Rules.doctree
+++ b/docs/m2met2/_build/doctrees/Rules.doctree
--- a/docs/m2met2/_build/doctrees/Track_setting_and_evaluation.doctree
+++ b/docs/m2met2/_build/doctrees/Track_setting_and_evaluation.doctree
--- a/docs/m2met2/_build/doctrees/environment.pickle
+++ b/docs/m2met2/_build/doctrees/environment.pickle
--- a/docs/m2met2/_build/doctrees/index.doctree
+++ b/docs/m2met2/_build/doctrees/index.doctree
--- a/docs/m2met2/_build/html/.buildinfo
+++ b/docs/m2met2/_build/html/.buildinfo
@@ -1,4 +1,4 @@
 
				 # Sphinx build info version 1
			
 
				 # This file hashes the configuration used when building these files. When it is not found, a full rebuild will be done.
			
 
				-config: a62852d90c3e533904d811bbf85f977d
			
 
				+config: 160d25833895e2f6c62a4c315cacc3b9
			
 
				 tags: 645f666f9bcd5a90fca523b33c5a78b7
			
--- a/docs/m2met2/_build/html/_images/qrcode.png
+++ b/docs/m2met2/_build/html/_images/qrcode.png
--- a/docs/m2met2/images/qrcode.png
+++ b/docs/m2met2/images/qrcode.png
--- a/docs/m2met2_cn/_build/doctrees/environment.pickle
+++ b/docs/m2met2_cn/_build/doctrees/environment.pickle
--- a/docs/m2met2_cn/_build/doctrees/index.doctree
+++ b/docs/m2met2_cn/_build/doctrees/index.doctree
--- a/docs/m2met2_cn/_build/doctrees/基线.doctree
+++ b/docs/m2met2_cn/_build/doctrees/基线.doctree
--- a/docs/m2met2_cn/_build/doctrees/数据集.doctree
+++ b/docs/m2met2_cn/_build/doctrees/数据集.doctree
--- a/docs/m2met2_cn/_build/doctrees/简介.doctree
+++ b/docs/m2met2_cn/_build/doctrees/简介.doctree
--- a/docs/m2met2_cn/_build/doctrees/组委会.doctree
+++ b/docs/m2met2_cn/_build/doctrees/组委会.doctree
--- a/docs/m2met2_cn/_build/doctrees/联系方式.doctree
+++ b/docs/m2met2_cn/_build/doctrees/联系方式.doctree
--- a/docs/m2met2_cn/_build/doctrees/规则.doctree
+++ b/docs/m2met2_cn/_build/doctrees/规则.doctree
--- a/docs/m2met2_cn/_build/doctrees/赛道设置与评估.doctree
+++ b/docs/m2met2_cn/_build/doctrees/赛道设置与评估.doctree
--- a/docs/m2met2_cn/_build/html/.buildinfo
+++ b/docs/m2met2_cn/_build/html/.buildinfo
@@ -1,4 +1,4 @@
 
				 # Sphinx build info version 1
			
 
				 # This file hashes the configuration used when building these files. When it is not found, a full rebuild will be done.
			
 
				-config: 06d9c1d4093817b45b9d4df7ab350eaf
			
 
				+config: a4d4595bd4f85adbedc556dc23e6150a
			
 
				 tags: 645f666f9bcd5a90fca523b33c5a78b7
			
--- a/docs/m2met2_cn/_build/html/_images/qrcode.png
+++ b/docs/m2met2_cn/_build/html/_images/qrcode.png
--- a/docs/m2met2_cn/images/qrcode.png
+++ b/docs/m2met2_cn/images/qrcode.png
--- a/egs/alimeeting/sa-asr/asr_local.sh
+++ b/egs/alimeeting/sa-asr/asr_local.sh
@@ -1153,10 +1153,10 @@ if ! "${skip_train}"; then
 
				         mkdir -p ${sa_asr_exp}/log
			
 
				         INIT_FILE=${sa_asr_exp}/ddp_init
			
 
				         
			
 
				-        if [ ! -f "exp/damo/speech_xvector_sv-zh-cn-cnceleb-16k-spk3465-pytorch/sv.pb" ]; then
			
 
				+        if [ ! -f "exp/damo/speech_xvector_sv-zh-cn-cnceleb-16k-spk3465-pytorch/sv.pth" ]; then
			
 
				             # download xvector extractor model file
			
 
				             python local/download_xvector_model.py exp
			
 
				-            log "Successfully download the pretrained xvector extractor to exp/damo/speech_xvector_sv-zh-cn-cnceleb-16k-spk3465-pytorch/sv.pb"
			
 
				+            log "Successfully download the pretrained xvector extractor to exp/damo/speech_xvector_sv-zh-cn-cnceleb-16k-spk3465-pytorch/sv.pth"
			
 
				         fi
			
 
				         
			
 
				         if [ -f $INIT_FILE ];then
			
@@ -1195,8 +1195,8 @@ if ! "${skip_train}"; then
 
				                     --init_param "${asr_exp}/valid.acc.ave.pb:decoder.decoders.3:decoder.decoder4.2" \
			
 
				                     --init_param "${asr_exp}/valid.acc.ave.pb:decoder.decoders.4:decoder.decoder4.3" \
			
 
				                     --init_param "${asr_exp}/valid.acc.ave.pb:decoder.decoders.5:decoder.decoder4.4" \
			
 
				-                    --init_param "exp/damo/speech_xvector_sv-zh-cn-cnceleb-16k-spk3465-pytorch/sv.pb:encoder:spk_encoder"   \
			
 
				-                    --init_param "exp/damo/speech_xvector_sv-zh-cn-cnceleb-16k-spk3465-pytorch/sv.pb:decoder:spk_encoder:decoder.output_dense"   \
			
 
				+                    --init_param "exp/damo/speech_xvector_sv-zh-cn-cnceleb-16k-spk3465-pytorch/sv.pth:encoder:spk_encoder"   \
			
 
				+                    --init_param "exp/damo/speech_xvector_sv-zh-cn-cnceleb-16k-spk3465-pytorch/sv.pth:decoder:spk_encoder:decoder.output_dense"   \
			
 
				                     --valid_data_path_and_name_and_type "${_asr_valid_dir}/${_scp},speech,${_type}" \
			
 
				                     --valid_data_path_and_name_and_type "${_asr_valid_dir}/text,text,text" \
			
 
				                     --valid_data_path_and_name_and_type "${_asr_valid_dir}/oracle_profile_nopadding.scp,profile,npy" \
			
--- a/egs/alimeeting/sa-asr/conf/train_asr_conformer.yaml
+++ b/egs/alimeeting/sa-asr/conf/train_asr_conformer.yaml
@@ -4,6 +4,7 @@ frontend_conf:
 
				     n_fft: 400
			
 
				     win_length: 400
			
 
				     hop_length: 160
			
 
				+    use_channel: 0
			
 
				     
			
 
				 # encoder related
			
 
				 encoder: conformer
			
--- a/egs/alimeeting/sa-asr/conf/train_sa_asr_conformer.yaml
+++ b/egs/alimeeting/sa-asr/conf/train_sa_asr_conformer.yaml
@@ -4,6 +4,7 @@ frontend_conf:
 
				     n_fft: 400
			
 
				     win_length: 400
			
 
				     hop_length: 160
			
 
				+    use_channel: 0
			
 
				 
			
 
				 # encoder related
			
 
				 asr_encoder: conformer
			
--- a/funasr/bin/asr_infer.py
+++ b/funasr/bin/asr_infer.py
@@ -1510,8 +1510,13 @@ class Speech2TextTransducer:
 
				         if isinstance(speech, np.ndarray):
			
 
				             speech = torch.tensor(speech)
			
 
				         
			
 
				-        feats = speech.unsqueeze(0).to(getattr(torch, self.dtype))
			
 
				-        feats_lengths = feats.new_full([1], dtype=torch.long, fill_value=feats.size(1))
			
 
				+        if self.frontend is not None:
			
 
				+            speech = torch.unsqueeze(speech, axis=0)
			
 
				+            speech_lengths = speech.new_full([1], dtype=torch.long, fill_value=speech.size(1))
			
 
				+            feats, feats_lengths = self.frontend(speech, speech_lengths)
			
 
				+        else:                
			
 
				+            feats = speech.unsqueeze(0).to(getattr(torch, self.dtype))
			
 
				+            feats_lengths = feats.new_full([1], dtype=torch.long, fill_value=feats.size(1))
			
 
				         
			
 
				         if self.asr_model.normalize is not None:
			
 
				             feats, feats_lengths = self.asr_model.normalize(feats, feats_lengths)
			
@@ -1536,14 +1541,19 @@ class Speech2TextTransducer:
 
				         
			
 
				         if isinstance(speech, np.ndarray):
			
 
				             speech = torch.tensor(speech)
			
 
				-        
			
 
				-        feats = speech.unsqueeze(0).to(getattr(torch, self.dtype))
			
 
				-        feats_lengths = feats.new_full([1], dtype=torch.long, fill_value=feats.size(1))
			
 
				+
			
 
				+        if self.frontend is not None:
			
 
				+            speech = torch.unsqueeze(speech, axis=0)
			
 
				+            speech_lengths = speech.new_full([1], dtype=torch.long, fill_value=speech.size(1))
			
 
				+            feats, feats_lengths = self.frontend(speech, speech_lengths)
			
 
				+        else:                
			
 
				+            feats = speech.unsqueeze(0).to(getattr(torch, self.dtype))
			
 
				+            feats_lengths = feats.new_full([1], dtype=torch.long, fill_value=feats.size(1))
			
 
				         
			
 
				         feats = to_device(feats, device=self.device)
			
 
				         feats_lengths = to_device(feats_lengths, device=self.device)
			
 
				         
			
 
				-        enc_out, _ = self.asr_model.encoder(feats, feats_lengths)
			
 
				+        enc_out, _, _ = self.asr_model.encoder(feats, feats_lengths)
			
 
				         
			
 
				         nbest_hyps = self.beam_search(enc_out[0])
			
 
				         
			
--- a/funasr/bin/asr_train.py
+++ b/funasr/bin/asr_train.py
@@ -46,7 +46,8 @@ if __name__ == '__main__':
 
				     args = parse_args()
			
 
				 
			
 
				     # setup local gpu_id
			
 
				-    os.environ['CUDA_VISIBLE_DEVICES'] = str(args.gpu_id)
			
 
				+    if args.ngpu > 0:
			
 
				+        os.environ['CUDA_VISIBLE_DEVICES'] = str(args.gpu_id)
			
 
				 
			
 
				     # DDP settings
			
 
				     if args.ngpu > 1:
			
@@ -57,9 +58,9 @@ if __name__ == '__main__':
 
				 
			
 
				     # re-compute batch size: when dataset type is small
			
 
				     if args.dataset_type == "small":
			
 
				-        if args.batch_size is not None:
			
 
				+        if args.batch_size is not None and args.ngpu > 0:
			
 
				             args.batch_size = args.batch_size * args.ngpu
			
 
				-        if args.batch_bins is not None:
			
 
				+        if args.batch_bins is not None and args.ngpu > 0:
			
 
				             args.batch_bins = args.batch_bins * args.ngpu
			
 
				 
			
 
				     main(args=args)
			
--- a/funasr/tasks/abs_task.py
+++ b/funasr/tasks/abs_task.py
@@ -1376,25 +1376,10 @@ class AbsTask(ABC):
 
				 
			
 
				             # 7. Build iterator factories
			
 
				             if args.dataset_type == "large":
			
 
				-                from funasr.datasets.large_datasets.build_dataloader import ArkDataLoader
			
 
				-                train_iter_factory = ArkDataLoader(args.train_data_file, args.token_list, args.dataset_conf,
			
 
				-                                                   frontend_conf=args.frontend_conf if hasattr(args,
			
 
				-                                                                                               "frontend_conf") else None,
			
 
				-                                                   seg_dict_file=args.seg_dict_file if hasattr(args,
			
 
				-                                                                                               "seg_dict_file") else None,
			
 
				-                                                   punc_dict_file=args.punc_list if hasattr(args,
			
 
				-                                                                                            "punc_list") else None,
			
 
				-                                                   bpemodel_file=args.bpemodel if hasattr(args, "bpemodel") else None,
			
 
				-                                                   mode="train")
			
 
				-                valid_iter_factory = ArkDataLoader(args.valid_data_file, args.token_list, args.dataset_conf,
			
 
				-                                                   frontend_conf=args.frontend_conf if hasattr(args,
			
 
				-                                                                                               "frontend_conf") else None,
			
 
				-                                                   seg_dict_file=args.seg_dict_file if hasattr(args,
			
 
				-                                                                                               "seg_dict_file") else None,
			
 
				-                                                   punc_dict_file=args.punc_list if hasattr(args,
			
 
				-                                                                                            "punc_list") else None,
			
 
				-                                                   bpemodel_file=args.bpemodel if hasattr(args, "bpemodel") else None,
			
 
				-                                                   mode="eval")
			
 
				+                from funasr.datasets.large_datasets.build_dataloader import LargeDataLoader
			
 
				+                train_iter_factory = LargeDataLoader(args, mode="train")
			
 
				+                valid_iter_factory = LargeDataLoader(args, mode="eval")
			
 
				+
			
 
				             elif args.dataset_type == "small":
			
 
				                 train_iter_factory = cls.build_iter_factory(
			
 
				                     args=args,
			
--- a/funasr/tasks/asr.py
+++ b/funasr/tasks/asr.py
@@ -363,12 +363,6 @@ class ASRTask(AbsTask):
 
				             default=get_default_kwargs(CTC),
			
 
				             help="The keyword arguments for CTC class.",
			
 
				         )
			
 
				-        group.add_argument(
			
 
				-            "--joint_network_conf",
			
 
				-            action=NestedDictAction,
			
 
				-            default=None,
			
 
				-            help="The keyword arguments for joint network class.",
			
 
				-        )
			
 
				 
			
 
				         group = parser.add_argument_group(description="Preprocess related")
			
 
				         group.add_argument(
			
@@ -1379,6 +1373,7 @@ class ASRTransducerTask(ASRTask):
 
				     num_optimizers: int = 1
			
 
				 
			
 
				     class_choices_list = [
			
 
				+        model_choices,
			
 
				         frontend_choices,
			
 
				         specaug_choices,
			
 
				         normalize_choices,
			
@@ -1476,7 +1471,7 @@ class ASRTransducerTask(ASRTask):
 
				         try:
			
 
				             model_class = model_choices.get_class(args.model)
			
 
				         except AttributeError:
			
 
				-            model_class = model_choices.get_class("asr")
			
 
				+            model_class = model_choices.get_class("rnnt_unified")
			
 
				 
			
 
				         model = model_class(
			
 
				             vocab_size=vocab_size,