grpc_server.py 5.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116
  1. from concurrent import futures
  2. import grpc
  3. import json
  4. import time
  5. from modelscope.pipelines import pipeline
  6. from modelscope.utils.constant import Tasks
  7. import paraformer_pb2_grpc
  8. from paraformer_pb2 import Response
  9. class ASRServicer(paraformer_pb2_grpc.ASRServicer):
  10. def __init__(self, user_allowed, model, sample_rate):
  11. print("ASRServicer init")
  12. self.init_flag = 0
  13. self.client_buffers = {}
  14. self.client_transcription = {}
  15. self.auth_user = user_allowed.split("|")
  16. self.inference_16k_pipeline = pipeline(task=Tasks.auto_speech_recognition, model=model)
  17. self.sample_rate = sample_rate
  18. def clear_states(self, user):
  19. self.clear_buffers(user)
  20. self.clear_transcriptions(user)
  21. def clear_buffers(self, user):
  22. if user in self.client_buffers:
  23. del self.client_buffers[user]
  24. def clear_transcriptions(self, user):
  25. if user in self.client_transcription:
  26. del self.client_transcription[user]
  27. def disconnect(self, user):
  28. self.clear_states(user)
  29. print("Disconnecting user: %s" % str(user))
  30. def Recognize(self, request_iterator, context):
  31. for req in request_iterator:
  32. if req.user not in self.auth_user:
  33. result = {}
  34. result["success"] = False
  35. result["detail"] = "Not Authorized user: %s " % req.user
  36. result["text"] = ""
  37. yield Response(sentence=json.dumps(result), user=req.user, action="terminate", language=req.language)
  38. elif req.isEnd: #end grpc
  39. print("asr end")
  40. self.disconnect(req.user)
  41. result = {}
  42. result["success"] = True
  43. result["detail"] = "asr end"
  44. result["text"] = ""
  45. yield Response(sentence=json.dumps(result), user=req.user, action="terminate",language=req.language)
  46. elif req.speaking: #continue speaking
  47. if req.audio_data is not None and len(req.audio_data) > 0:
  48. if req.user in self.client_buffers:
  49. self.client_buffers[req.user] += req.audio_data #append audio
  50. else:
  51. self.client_buffers[req.user] = req.audio_data
  52. result = {}
  53. result["success"] = True
  54. result["detail"] = "speaking"
  55. result["text"] = ""
  56. yield Response(sentence=json.dumps(result), user=req.user, action="speaking", language=req.language)
  57. elif not req.speaking: #silence
  58. if req.user not in self.client_buffers:
  59. result = {}
  60. result["success"] = True
  61. result["detail"] = "waiting_for_more_voice"
  62. result["text"] = ""
  63. yield Response(sentence=json.dumps(result), user=req.user, action="waiting", language=req.language)
  64. else:
  65. begin_time = int(round(time.time() * 1000))
  66. tmp_data = self.client_buffers[req.user]
  67. self.clear_states(req.user)
  68. result = {}
  69. result["success"] = True
  70. result["detail"] = "decoding data: %d bytes" % len(tmp_data)
  71. result["text"] = ""
  72. yield Response(sentence=json.dumps(result), user=req.user, action="decoding", language=req.language)
  73. if len(tmp_data) < 9600: #min input_len for asr model , 300ms
  74. end_time = int(round(time.time() * 1000))
  75. delay_str = str(end_time - begin_time)
  76. result = {}
  77. result["success"] = True
  78. result["detail"] = "waiting_for_more_voice"
  79. result["server_delay_ms"] = delay_str
  80. result["text"] = ""
  81. print ("user: %s , delay(ms): %s, info: %s " % (req.user, delay_str, "waiting_for_more_voice"))
  82. yield Response(sentence=json.dumps(result), user=req.user, action="waiting", language=req.language)
  83. else:
  84. asr_result = self.inference_16k_pipeline(audio_in=tmp_data, audio_fs = self.sample_rate)
  85. if "text" in asr_result:
  86. asr_result = asr_result['text']
  87. else:
  88. asr_result = ""
  89. end_time = int(round(time.time() * 1000))
  90. delay_str = str(end_time - begin_time)
  91. print ("user: %s , delay(ms): %s, text: %s " % (req.user, delay_str, asr_result))
  92. result = {}
  93. result["success"] = True
  94. result["detail"] = "finish_sentence"
  95. result["server_delay_ms"] = delay_str
  96. result["text"] = asr_result
  97. yield Response(sentence=json.dumps(result), user=req.user, action="finish", language=req.language)
  98. else:
  99. result = {}
  100. result["success"] = False
  101. result["detail"] = "error, no condition matched! Unknown reason."
  102. result["text"] = ""
  103. self.disconnect(req.user)
  104. yield Response(sentence=json.dumps(result), user=req.user, action="terminate", language=req.language)