speech_asr 3 years ago
parent
commit
85c1848286

+ 3 - 4
egs_modelscope/speaker_diarization/speech_diarization_sond-zh-cn-alimeeting-16k-n16k4-pytorch/unit_test.py

@@ -14,13 +14,12 @@ inference_diar_pipline = pipeline(
 )
 
 # 以 audio_list 作为输入,其中第一个音频为待检测语音,后面的音频为不同说话人的声纹注册语音
-audio_list = [[
+audio_list = [
     "https://isv-data.oss-cn-hangzhou.aliyuncs.com/ics/MaaS/ASR/test_data/record.wav",
     "https://isv-data.oss-cn-hangzhou.aliyuncs.com/ics/MaaS/ASR/test_data/spk_A.wav",
     "https://isv-data.oss-cn-hangzhou.aliyuncs.com/ics/MaaS/ASR/test_data/spk_B.wav",
     "https://isv-data.oss-cn-hangzhou.aliyuncs.com/ics/MaaS/ASR/test_data/spk_B1.wav"
-]]
+]
 
 results = inference_diar_pipline(audio_in=audio_list)
-for rst in results:
-    print(rst["value"])
+print(results)

+ 1 - 1
funasr/bin/eend_ola_inference.py

@@ -210,7 +210,7 @@ def inference_modelscope(
         if data_path_and_name_and_type is None and raw_inputs is not None:
             if isinstance(raw_inputs, torch.Tensor):
                 raw_inputs = raw_inputs.numpy()
-            data_path_and_name_and_type = [raw_inputs, "speech", "waveform"]
+            data_path_and_name_and_type = [raw_inputs[0], "speech", "bytes"]
         logger.info(data_path_and_name_and_type)
         loader = EENDOLADiarTask.build_streaming_iterator(
             data_path_and_name_and_type,