2 years ago · 7a7ead00bc
--- a/funasr/bin/vad_inference.py
+++ b/funasr/bin/vad_inference.py
@@ -109,7 +109,7 @@ class Speech2VadSegment:
 
				             fbanks, fbanks_len = self.frontend.forward_fbank(speech, speech_lengths)
			
 
				             feats, feats_len = self.frontend.forward_lfr_cmvn(fbanks, fbanks_len)
			
 
				             fbanks = to_device(fbanks, device=self.device)
			
 
				-            # feats = to_device(feats, device=self.device)
			
 
				+            feats = to_device(feats, device=self.device)
			
 
				             feats_len = feats_len.int()
			
 
				         else:
			
 
				             raise Exception("Need to extract feats first, please configure frontend configuration")
			
@@ -131,7 +131,7 @@ class Speech2VadSegment:
 
				                 "in_cache": in_cache
			
 
				             }
			
 
				             # a. To device
			
 
				-            batch = to_device(batch, device=self.device)
			
 
				+            #batch = to_device(batch, device=self.device)
			
 
				             segments_part, in_cache = self.vad_model(**batch)
			
 
				             if segments_part:
			
 
				                 for batch_num in range(0, self.batch_size):
			
--- a/funasr/models/frontend/wav_frontend.py
+++ b/funasr/models/frontend/wav_frontend.py
@@ -34,7 +34,7 @@ def load_cmvn(cmvn_file):
 
				     means = np.array(means_list).astype(np.float)
			
 
				     vars = np.array(vars_list).astype(np.float)
			
 
				     cmvn = np.array([means, vars])
			
 
				-    cmvn = torch.as_tensor(cmvn, dype=torch.float32)
			
 
				+    cmvn = torch.as_tensor(cmvn, dtype=torch.float32)
			
 
				     return cmvn