Przeglądaj źródła

Merge branch 'main' of https://github.com/alibaba-damo-academy/FunASR into main

雾聪 2 lat temu
rodzic
commit
720c048410

+ 2 - 2
funasr/bin/asr_infer.py

@@ -493,9 +493,9 @@ class Speech2TextParaformer:
             else:
                 if pre_token_length[i] == 0:
                     yseq = torch.tensor(
-                        [self.asr_model.sos] + [self.asr_model.eos], device=yseq.device
+                        [self.asr_model.sos] + [self.asr_model.eos], device=pre_acoustic_embeds.device
                     )
-                    score = torch.tensor(0.0, device=yseq.device)
+                    score = torch.tensor(0.0, device=pre_acoustic_embeds.device)
                 else:
                     yseq = am_scores.argmax(dim=-1)
                     score = am_scores.max(dim=-1)[0]

+ 8 - 7
funasr/models/e2e_vad.py

@@ -296,13 +296,14 @@ class E2EVadModel(nn.Module):
         self.sil_frame = 0
         self.frame_probs = []
 
-        assert self.output_data_buf[-1].contain_seg_end_point == True
-        drop_frames = int(self.output_data_buf[-1].end_ms / self.vad_opts.frame_in_ms)
-        real_drop_frames = drop_frames - self.last_drop_frames
-        self.last_drop_frames = drop_frames
-        self.data_buf_all = self.data_buf_all[real_drop_frames * int(self.vad_opts.frame_in_ms * self.vad_opts.sample_rate / 1000):]
-        self.decibel = self.decibel[real_drop_frames:]
-        self.scores = self.scores[:, real_drop_frames:, :]
+        if self.output_data_buf:
+            assert self.output_data_buf[-1].contain_seg_end_point == True
+            drop_frames = int(self.output_data_buf[-1].end_ms / self.vad_opts.frame_in_ms)
+            real_drop_frames = drop_frames - self.last_drop_frames
+            self.last_drop_frames = drop_frames
+            self.data_buf_all = self.data_buf_all[real_drop_frames * int(self.vad_opts.frame_in_ms * self.vad_opts.sample_rate / 1000):]
+            self.decibel = self.decibel[real_drop_frames:]
+            self.scores = self.scores[:, real_drop_frames:, :]
 
     def ComputeDecibel(self) -> None:
         frame_sample_length = int(self.vad_opts.frame_length_ms * self.vad_opts.sample_rate / 1000)

+ 1 - 1
funasr/runtime/html5/readme.md

@@ -41,7 +41,7 @@ python h5Server.py --host 0.0.0.0 --port 1337
 `Tips:` asr service and html5 service should be deployed on the same device.
 ```shell
 cd ../python/websocket
-python wss_srv_asr.py --port 1095
+python wss_srv_asr.py --port 10095
 ```
 
 

+ 1 - 1
funasr/runtime/html5/readme_cn.md

@@ -49,7 +49,7 @@ python h5Server.py --host 0.0.0.0 --port 1337
 #### wss方式
 ```shell
 cd ../python/websocket
-python wss_srv_asr.py --port 1095
+python wss_srv_asr.py --port 10095
 ```
 
 ### 浏览器打开地址

+ 2 - 1
funasr/runtime/html5/static/index.html

@@ -19,7 +19,7 @@
 			<div class="div_class_recordControl">
 				asr服务器地址(必填):
 				<br>
-				<input id="wssip" type="text" style=" width: 100%;height:100%" value="wss://127.0.0.1:1095/"/>
+				<input id="wssip" type="text" style=" width: 100%;height:100%" value="wss://127.0.0.1:10095/"/>
 				<br>
 				<br>
 				<div style="border:2px solid #ccc;">
@@ -36,6 +36,7 @@
 				<br>
                 <div id="info_div">请点击开始</div>
 				<div class="div_class_buttons">
+					<button id="btnConnect">连接</button>
 					<button id="btnStart">开始</button>
 					<button id="btnStop">停止</button>
  

+ 33 - 14
funasr/runtime/html5/static/main.js

@@ -23,12 +23,14 @@ var rec = Recorder({
 var sampleBuf=new Int16Array();
 // 定义按钮响应事件
 var btnStart = document.getElementById('btnStart');
-btnStart.onclick = start;
+btnStart.onclick = record;
 var btnStop = document.getElementById('btnStop');
 btnStop.onclick = stop;
 btnStop.disabled = true;
+btnStart.disabled = true;
  
-
+btnConnect= document.getElementById('btnConnect');
+btnConnect.onclick = start;
  
 var rec_text="";
 var offline_text="";
@@ -38,7 +40,7 @@ var info_div = document.getElementById('info_div');
 //now_ipaddress=now_ipaddress.replace("https://","wss://");
 //now_ipaddress=now_ipaddress.replace("static/index.html","");
 //document.getElementById('wssip').value=now_ipaddress;
-
+ 
 function getAsrMode(){
 
             var item = null;
@@ -64,16 +66,18 @@ function getJsonMessage( jsonMsg ) {
 	var asrmodel=JSON.parse(jsonMsg.data)['mode'];
 	if(asrmodel=="2pass-offline")
 	{
-		offline_text=offline_text+rectxt.replace(/ +/g,"");
+		offline_text=offline_text+rectxt; //.replace(/ +/g,"");
 		rec_text=offline_text;
 	}
 	else
 	{
-		rec_text=rec_text+rectxt.replace(/ +/g,"");
+		rec_text=rec_text+rectxt; //.replace(/ +/g,"");
 	}
 	var varArea=document.getElementById('varArea');
 	
 	varArea.value=rec_text;
+	console.log( "offline_text: " + asrmodel+","+offline_text);
+	console.log( "rec_text: " + rec_text);
 	 
  
 }
@@ -82,11 +86,14 @@ function getJsonMessage( jsonMsg ) {
 function getConnState( connState ) {
 	if ( connState === 0 ) {
  
-		rec.open( function(){
-			rec.start();
-			console.log("开始录音");
+		//rec.open( function(){
+		//	rec.start();
+		//	console.log("开始录音");
  
-		});
+		//});
+		btnStart.disabled = false;
+		btnConnect.disabled = true;
+		info_div.innerHTML='连接成功!请点击开始';
 	} else if ( connState === 1 ) {
 		//stop();
 	} else if ( connState === 2 ) {
@@ -95,11 +102,19 @@ function getConnState( connState ) {
 		 
 		alert("连接地址"+document.getElementById('wssip').value+"失败,请检查asr地址和端口,并确保h5服务和asr服务在同一个域内。或换个浏览器试试。");
 		btnStart.disabled = true;
-		info_div.innerHTML='请点击开始';
+
+		info_div.innerHTML='请点击连接';
 	}
 }
 
-
+function record()
+{
+		 rec.open( function(){
+		 rec.start();
+		 console.log("开始");
+		 btnStart.disabled = true;
+		 });
+}
 // 识别启动、停止、清空操作
 function start() {
 	
@@ -107,14 +122,15 @@ function start() {
 	clear();
 	//控件状态更新
  	    
-
+    info_div.innerHTML="正在连接asr服务器,请等待...";
 	//启动连接
 	var ret=wsconnecter.wsStart();
 	if(ret==1){
 		isRec = true;
 		btnStart.disabled = true;
 		btnStop.disabled = false;
-	    info_div.innerHTML="正在连接asr服务器,请等待...";
+		btnConnect.disabled=true;
+
 	}
 }
 
@@ -147,7 +163,10 @@ function stop() {
 	btnStop.disabled = true;
 	setTimeout(function(){
 		console.log("call stop ws!");
-		wsconnecter.wsStop();btnStart.disabled = false;info_div.innerHTML="请点击开始";}, 3000 );
+		wsconnecter.wsStop();
+		btnStart.disabled = true;
+		btnConnect.disabled=false;
+		info_div.innerHTML="请点击连接";}, 3000 );
 	rec.stop(function(blob,duration){
   
 		console.log(blob);

+ 1 - 1
funasr/runtime/python/websocket/parse_args.py

@@ -33,7 +33,7 @@ parser.add_argument("--ngpu",
                     help="0 for cpu, 1 for gpu")
 parser.add_argument("--ncpu",
                     type=int,
-                    default=1,
+                    default=4,
                     help="cpu cores")
 parser.add_argument("--certfile",
                     type=str,

+ 3 - 3
funasr/runtime/python/websocket/wss_srv_asr.py

@@ -67,9 +67,9 @@ async def ws_reset(websocket):
     websocket.param_dict_asr_online = {"cache": dict()}
     websocket.param_dict_vad = {'in_cache': dict(), "is_final": True}
     websocket.param_dict_asr_online["is_final"]=True
-    audio_in=b''.join(np.zeros(int(16000),dtype=np.int16))
-    inference_pipeline_vad(audio_in=audio_in, param_dict=websocket.param_dict_vad)
-    inference_pipeline_asr_online(audio_in=audio_in, param_dict=websocket.param_dict_asr_online)
+    # audio_in=b''.join(np.zeros(int(16000),dtype=np.int16))
+    # inference_pipeline_vad(audio_in=audio_in, param_dict=websocket.param_dict_vad)
+    # inference_pipeline_asr_online(audio_in=audio_in, param_dict=websocket.param_dict_asr_online)
     await websocket.close()
     
     

+ 1 - 1
funasr/version.txt

@@ -1 +1 @@
-0.5.8
+0.6.1