2 лет назад · 167bab54bb
--- a/egs/wenetspeech/conformer/conf/train_asr_conformer.yaml
+++ b/egs/wenetspeech/conformer/conf/train_asr_conformer.yaml
@@ -90,7 +90,7 @@ specaug_conf:
 
				 
			
 
				 dataset_conf:
			
 
				     data_names: speech,text
			
 
				-    data_types: sound,text
			
 
				+    data_types: sound,text_nospace
			
 
				     shuffle: True
			
 
				     shuffle_conf:
			
 
				         shuffle_size: 2048
			
--- a/funasr/datasets/large_datasets/dataset.py
+++ b/funasr/datasets/large_datasets/dataset.py
@@ -148,6 +148,12 @@ class AudioDataset(IterableDataset):
 
				                         if "key" not in sample_dict:
			
 
				                             sample_dict["key"] = segs[0]
			
 
				                         sample_dict['hw_tag'] = 1
			
 
				+                    elif data_type == "text_nospace":
			
 
				+                        text = item
			
 
				+                        segs = text.strip().split(maxsplit=1)
			
 
				+                        sample_dict[data_name] = [x for x in segs[1]]
			
 
				+                        if "key" not in sample_dict:
			
 
				+                            sample_dict["key"] = segs[0]
			
 
				                     else:
			
 
				                         text = item
			
 
				                         segs = text.strip().split()