demo_vad_online.py 969 B

12345678910111213141516171819202122232425262728
  1. import soundfile
  2. from funasr_onnx import Fsmn_vad_online
  3. model_dir = "/mnt/ailsa.zly/tfbase/espnet_work/FunASR_dev_zly/export/damo/speech_fsmn_vad_zh-cn-16k-common-pytorch"
  4. wav_path = "/mnt/ailsa.zly/tfbase/espnet_work/FunASR_dev_zly/egs_modelscope/vad/speech_fsmn_vad_zh-cn-16k-common/vad_example_16k.wav"
  5. model = Fsmn_vad_online(model_dir)
  6. ##online vad
  7. speech, sample_rate = soundfile.read(wav_path)
  8. speech_length = speech.shape[0]
  9. #
  10. sample_offset = 0
  11. step = 1600
  12. param_dict = {'in_cache': []}
  13. for sample_offset in range(0, speech_length, min(step, speech_length - sample_offset)):
  14. if sample_offset + step >= speech_length - 1:
  15. step = speech_length - sample_offset
  16. is_final = True
  17. else:
  18. is_final = False
  19. param_dict['is_final'] = is_final
  20. segments_result = model(audio_in=speech[sample_offset: sample_offset + step],
  21. param_dict=param_dict)
  22. if segments_result:
  23. print(segments_result)