2 лет назад · ccac6ceea9
--- a/funasr/datasets/preprocessor.py
+++ b/funasr/datasets/preprocessor.py
@@ -664,26 +664,6 @@ class CodeMixTokenizerCommonPreprocessor(CommonPreprocessor):
 
				         if self.seg_jieba:
			
 
				             jieba.load_userdict(seg_dict_file)
			
 
				 
			
 
				-    @classmethod
			
 
				-    def split_words(cls, text: str):
			
 
				-        words = []
			
 
				-        segs = text.split()
			
 
				-        for seg in segs:
			
 
				-            # There is no space in seg.
			
 
				-            current_word = ""
			
 
				-            for c in seg:
			
 
				-                if len(c.encode()) == 1:
			
 
				-                    # This is an ASCII char.
			
 
				-                    current_word += c
			
 
				-                else:
			
 
				-                    # This is a Chinese char.
			
 
				-                    if len(current_word) > 0:
			
 
				-                        words.append(current_word)
			
 
				-                        current_word = ""
			
 
				-                    words.append(c)
			
 
				-            if len(current_word) > 0:
			
 
				-                words.append(current_word)
			
 
				-        return words
			
 
				 
			
 
				     @classmethod
			
 
				     def isEnglish(cls, text:str):