| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112 |
- import pyaudio
- import grpc
- import json
- import webrtcvad
- import time
- import asyncio
- import argparse
- from grpc_client import transcribe_audio_bytes
- from paraformer_pb2_grpc import ASRStub
- async def deal_chunk(sig_mic):
- global stub,SPEAKING,asr_user,language,sample_rate
- if vad.is_speech(sig_mic, sample_rate): #speaking
- SPEAKING = True
- response = transcribe_audio_bytes(stub, sig_mic, user=asr_user, language=language, speaking = True, isEnd = False) #speaking, send audio to server.
- else: #silence
- begin_time = 0
- if SPEAKING: #means we have some audio recorded, send recognize order to server.
- SPEAKING = False
- begin_time = int(round(time.time() * 1000))
- response = transcribe_audio_bytes(stub, None, user=asr_user, language=language, speaking = False, isEnd = False) #speak end, call server for recognize one sentence
- resp = response.next()
- if "decoding" == resp.action:
- resp = response.next() #TODO, blocking operation may leads to miss some audio clips. C++ multi-threading is preferred.
- if "finish" == resp.action:
- end_time = int(round(time.time() * 1000))
- print (json.loads(resp.sentence))
- print ("delay in ms: %d " % (end_time - begin_time))
- else:
- pass
-
- async def record(host,port,sample_rate,mic_chunk,record_seconds,asr_user,language):
- with grpc.insecure_channel('{}:{}'.format(host, port)) as channel:
- global stub
- stub = ASRStub(channel)
- for i in range(0, int(sample_rate / mic_chunk * record_seconds)):
-
- sig_mic = stream.read(mic_chunk,exception_on_overflow = False)
- await asyncio.create_task(deal_chunk(sig_mic))
- #end grpc
- response = transcribe_audio_bytes(stub, None, user=asr_user, language=language, speaking = False, isEnd = True)
- print (response.next().action)
- if __name__ == '__main__':
- parser = argparse.ArgumentParser()
- parser.add_argument("--host",
- type=str,
- default="127.0.0.1",
- required=True,
- help="grpc server host ip")
-
- parser.add_argument("--port",
- type=int,
- default=10095,
- required=True,
- help="grpc server port")
-
- parser.add_argument("--user_allowed",
- type=str,
- default="project1_user1",
- help="allowed user for grpc client")
-
- parser.add_argument("--sample_rate",
- type=int,
- default=16000,
- help="audio sample_rate from client")
- parser.add_argument("--mic_chunk",
- type=int,
- default=160,
- help="chunk size for mic")
- parser.add_argument("--record_seconds",
- type=int,
- default=120,
- help="run specified seconds then exit ")
- args = parser.parse_args()
-
- SPEAKING = False
- asr_user = args.user_allowed
- sample_rate = args.sample_rate
- language = 'zh-CN'
-
- vad = webrtcvad.Vad()
- vad.set_mode(1)
- FORMAT = pyaudio.paInt16
- CHANNELS = 1
- p = pyaudio.PyAudio()
-
- stream = p.open(format=FORMAT,
- channels=CHANNELS,
- rate=args.sample_rate,
- input=True,
- frames_per_buffer=args.mic_chunk)
-
- print("* recording")
- asyncio.run(record(args.host,args.port,args.sample_rate,args.mic_chunk,args.record_seconds,args.user_allowed,language))
- stream.stop_stream()
- stream.close()
- p.terminate()
- print("recording stop")
-
|