test_sv_inference_pipeline.py 1.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748
  1. import unittest
  2. import numpy as np
  3. from modelscope.pipelines import pipeline
  4. from modelscope.utils.constant import Tasks
  5. from modelscope.utils.logger import get_logger
  6. logger = get_logger()
  7. class TestXVectorInferencePipelines(unittest.TestCase):
  8. def test_funasr_path(self):
  9. import funasr
  10. import os
  11. logger.info("run_dir:{0} ; funasr_path: {1}".format(os.getcwd(), funasr.__file__))
  12. def test_inference_pipeline(self):
  13. inference_sv_pipline = pipeline(
  14. task=Tasks.speaker_verification,
  15. model='damo/speech_xvector_sv-zh-cn-cnceleb-16k-spk3465-pytorch'
  16. )
  17. # 提取不同句子的说话人嵌入码
  18. rec_result = inference_sv_pipline(
  19. audio_in='https://isv-data.oss-cn-hangzhou.aliyuncs.com/ics/MaaS/ASR/test_audio/sv_example_enroll.wav')
  20. enroll = rec_result["spk_embedding"]
  21. rec_result = inference_sv_pipline(
  22. audio_in='https://isv-data.oss-cn-hangzhou.aliyuncs.com/ics/MaaS/ASR/test_audio/sv_example_same.wav')
  23. same = rec_result["spk_embedding"]
  24. rec_result = inference_sv_pipline(
  25. audio_in='https://isv-data.oss-cn-hangzhou.aliyuncs.com/ics/MaaS/ASR/test_audio/sv_example_different.wav')
  26. different = rec_result["spk_embedding"]
  27. # 对相同的说话人计算余弦相似度
  28. sv_threshold = 0.9465
  29. same_cos = np.sum(enroll * same) / (np.linalg.norm(enroll) * np.linalg.norm(same))
  30. same_cos = max(same_cos - sv_threshold, 0.0) / (1.0 - sv_threshold) * 100.0
  31. logger.info("Similarity: {}".format(same_cos))
  32. # 对不同的说话人计算余弦相似度
  33. diff_cos = np.sum(enroll * different) / (np.linalg.norm(enroll) * np.linalg.norm(different))
  34. diff_cos = max(diff_cos - sv_threshold, 0.0) / (1.0 - sv_threshold) * 100.0
  35. logger.info("Similarity: {}".format(diff_cos))
  36. if __name__ == '__main__':
  37. unittest.main()