infer.py 1.5 KB

12345678910111213141516171819202122232425262728293031323334
  1. """
  2. Author: Speech Lab, Alibaba Group, China
  3. SOND: Speaker Overlap-aware Neural Diarization for Multi-party Meeting Analysis
  4. https://arxiv.org/abs/2211.10243
  5. """
  6. from modelscope.pipelines import pipeline
  7. from modelscope.utils.constant import Tasks
  8. # initialize the pipeline for inference
  9. # when using the raw waveform files to inference, please use the config file `sond.yaml`
  10. # and set mode to `sond_demo`
  11. inference_diar_pipline = pipeline(
  12. mode="sond_demo",
  13. num_workers=0,
  14. task=Tasks.speaker_diarization,
  15. diar_model_config="sond.yaml",
  16. model='damo/speech_diarization_sond-zh-cn-alimeeting-16k-n16k4-pytorch',
  17. sv_model="damo/speech_xvector_sv-zh-cn-cnceleb-16k-spk3465-pytorch",
  18. sv_model_revision="v1.2.2",
  19. )
  20. # use audio_list as the input, where the first one is the record to be detected
  21. # and the following files are enrollments for different speakers
  22. audio_list = [
  23. "https://isv-data.oss-cn-hangzhou.aliyuncs.com/ics/MaaS/ASR/test_data/speaker_diarization/record.wav",
  24. "https://isv-data.oss-cn-hangzhou.aliyuncs.com/ics/MaaS/ASR/test_data/speaker_diarization/spk1.wav",
  25. "https://isv-data.oss-cn-hangzhou.aliyuncs.com/ics/MaaS/ASR/test_data/speaker_diarization/spk2.wav",
  26. "https://isv-data.oss-cn-hangzhou.aliyuncs.com/ics/MaaS/ASR/test_data/speaker_diarization/spk3.wav",
  27. "https://isv-data.oss-cn-hangzhou.aliyuncs.com/ics/MaaS/ASR/test_data/speaker_diarization/spk4.wav",
  28. ]
  29. results = inference_diar_pipline(audio_in=audio_list)
  30. print(results)