游雁 2 лет назад
Родитель
Сommit
61c24582f9

+ 1 - 1
funasr/runtime/docs/SDK_advanced_guide_offline_zh.md

@@ -36,7 +36,7 @@ wget https://isv-data.oss-cn-hangzhou.aliyuncs.com/ics/MaaS/ASR/sample/funasr_sa
 ```
 ```
 我们以Python语言客户端为例,进行说明,支持多种音频格式输入(.wav, .pcm, .mp3等),也支持视频输入(.mp4等),以及多文件列表wav.scp输入,其他版本客户端请参考文档([点击此处](#客户端用法详解)),定制服务部署请参考[如何定制服务部署](#如何定制服务部署)
 我们以Python语言客户端为例,进行说明,支持多种音频格式输入(.wav, .pcm, .mp3等),也支持视频输入(.mp4等),以及多文件列表wav.scp输入,其他版本客户端请参考文档([点击此处](#客户端用法详解)),定制服务部署请参考[如何定制服务部署](#如何定制服务部署)
 ```shell
 ```shell
-python3 wss_client_asr.py --host "127.0.0.1" --port 10095 --mode offline --audio_in "../audio/asr_example.wav" --output_dir "./results"
+python3 wss_client_asr.py --host "127.0.0.1" --port 10095 --mode offline --audio_in "../audio/asr_example.wav"
 ```
 ```
 
 
 ------------------
 ------------------

+ 8 - 0
funasr/runtime/python/websocket/README.md

@@ -47,6 +47,14 @@ git clone https://github.com/alibaba/FunASR.git && cd FunASR
 cd funasr/runtime/python/websocket
 cd funasr/runtime/python/websocket
 pip install -r requirements_client.txt
 pip install -r requirements_client.txt
 ```
 ```
+If you want infer from videos, you should install `ffmpeg`
+```shell
+apt-get install -y ffmpeg #ubuntu
+# yum install -y ffmpeg # centos
+# brew install ffmpeg # mac
+# winget install ffmpeg # wins
+pip3 install websockets ffmpeg-python
+```
 
 
 ### Start client
 ### Start client
 #### API-reference
 #### API-reference

+ 11 - 2
funasr/runtime/python/websocket/funasr_wss_client.py

@@ -138,8 +138,17 @@ async def record_from_scp(chunk_begin, chunk_size):
                 frames = wav_file.readframes(wav_file.getnframes())
                 frames = wav_file.readframes(wav_file.getnframes())
                 audio_bytes = bytes(frames)
                 audio_bytes = bytes(frames)
         else:
         else:
-            raise NotImplementedError(
-                f'Not supported audio type')
+            import ffmpeg
+            try:
+                # This launches a subprocess to decode audio while down-mixing and resampling as necessary.
+                # Requires the ffmpeg CLI and `ffmpeg-python` package to be installed.
+                audio_bytes, _ = (
+                    ffmpeg.input(wav_path, threads=0)
+                    .output("-", format="s16le", acodec="pcm_s16le", ac=1, ar=16000)
+                    .run(cmd=["ffmpeg", "-nostdin"], capture_stdout=True, capture_stderr=True)
+                )
+            except ffmpeg.Error as e:
+                raise RuntimeError(f"Failed to load audio: {e.stderr.decode()}") from e
 
 
         # stride = int(args.chunk_size/1000*16000*2)
         # stride = int(args.chunk_size/1000*16000*2)
         stride = int(60 * args.chunk_size[1] / args.chunk_interval / 1000 * 16000 * 2)
         stride = int(60 * args.chunk_size[1] / args.chunk_interval / 1000 * 16000 * 2)