grpc_main_client_mic.py 4.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112
  1. import pyaudio
  2. import grpc
  3. import json
  4. import webrtcvad
  5. import time
  6. import asyncio
  7. import argparse
  8. from grpc_client import transcribe_audio_bytes
  9. from paraformer_pb2_grpc import ASRStub
  10. async def deal_chunk(sig_mic):
  11. global stub,SPEAKING,asr_user,language,sample_rate
  12. if vad.is_speech(sig_mic, sample_rate): #speaking
  13. SPEAKING = True
  14. response = transcribe_audio_bytes(stub, sig_mic, user=asr_user, language=language, speaking = True, isEnd = False) #speaking, send audio to server.
  15. else: #silence
  16. begin_time = 0
  17. if SPEAKING: #means we have some audio recorded, send recognize order to server.
  18. SPEAKING = False
  19. begin_time = int(round(time.time() * 1000))
  20. response = transcribe_audio_bytes(stub, None, user=asr_user, language=language, speaking = False, isEnd = False) #speak end, call server for recognize one sentence
  21. resp = response.next()
  22. if "decoding" == resp.action:
  23. resp = response.next() #TODO, blocking operation may leads to miss some audio clips. C++ multi-threading is preferred.
  24. if "finish" == resp.action:
  25. end_time = int(round(time.time() * 1000))
  26. print (json.loads(resp.sentence))
  27. print ("delay in ms: %d " % (end_time - begin_time))
  28. else:
  29. pass
  30. async def record(host,port,sample_rate,mic_chunk,record_seconds,asr_user,language):
  31. with grpc.insecure_channel('{}:{}'.format(host, port)) as channel:
  32. global stub
  33. stub = ASRStub(channel)
  34. for i in range(0, int(sample_rate / mic_chunk * record_seconds)):
  35. sig_mic = stream.read(mic_chunk,exception_on_overflow = False)
  36. await asyncio.create_task(deal_chunk(sig_mic))
  37. #end grpc
  38. response = transcribe_audio_bytes(stub, None, user=asr_user, language=language, speaking = False, isEnd = True)
  39. print (response.next().action)
  40. if __name__ == '__main__':
  41. parser = argparse.ArgumentParser()
  42. parser.add_argument("--host",
  43. type=str,
  44. default="127.0.0.1",
  45. required=True,
  46. help="grpc server host ip")
  47. parser.add_argument("--port",
  48. type=int,
  49. default=10095,
  50. required=True,
  51. help="grpc server port")
  52. parser.add_argument("--user_allowed",
  53. type=str,
  54. default="project1_user1",
  55. help="allowed user for grpc client")
  56. parser.add_argument("--sample_rate",
  57. type=int,
  58. default=16000,
  59. help="audio sample_rate from client")
  60. parser.add_argument("--mic_chunk",
  61. type=int,
  62. default=160,
  63. help="chunk size for mic")
  64. parser.add_argument("--record_seconds",
  65. type=int,
  66. default=120,
  67. help="run specified seconds then exit ")
  68. args = parser.parse_args()
  69. SPEAKING = False
  70. asr_user = args.user_allowed
  71. sample_rate = args.sample_rate
  72. language = 'zh-CN'
  73. vad = webrtcvad.Vad()
  74. vad.set_mode(1)
  75. FORMAT = pyaudio.paInt16
  76. CHANNELS = 1
  77. p = pyaudio.PyAudio()
  78. stream = p.open(format=FORMAT,
  79. channels=CHANNELS,
  80. rate=args.sample_rate,
  81. input=True,
  82. frames_per_buffer=args.mic_chunk)
  83. print("* recording")
  84. asyncio.run(record(args.host,args.port,args.sample_rate,args.mic_chunk,args.record_seconds,args.user_allowed,language))
  85. stream.stop_stream()
  86. stream.close()
  87. p.terminate()
  88. print("recording stop")