2 éve · 7520b250fd
--- a/docker-compose.yaml
+++ b/docker-compose.yaml
@@ -0,0 +1,23 @@
 
				+version: '3'  
			
 
				+services:  
			
 
				+  funasr:  
			
 
				+    image: registry.cn-hangzhou.aliyuncs.com/funasr_repo/funasr:funasr-runtime-sdk-cpu-0.4.4  
			
 
				+    ports:  
			
 
				+      - "10095:10095"  
			
 
				+    volumes:  
			
 
				+      - /home/user/program/modelscope-whisper/funasr-runtime-resources/models:/workspace/models  
			
 
				+    privileged: true  
			
 
				+    working_dir: /workspace/FunASR/runtime
			
 
				+    command: >  
			
 
				+      /bin/bash -c "  
			
 
				+      chmod +x /workspace/FunASR/runtime/run_server.sh &&  
			
 
				+      touch /workspace/log.txt &&  
			
 
				+      /workspace/FunASR/runtime/run_server.sh \  
			
 
				+      --download-model-dir /workspace/models \  
			
 
				+      --vad-dir damo/speech_fsmn_vad_zh-cn-16k-common-onnx \  
			
 
				+      --model-dir damo/speech_paraformer-large-vad-punc_asr_nat-zh-cn-16k-common-vocab8404-onnx \  
			
 
				+      --punc-dir damo/punc_ct-transformer_cn-en-common-vocab471067-large-onnx \  
			
 
				+      --lm-dir damo/speech_ngram_lm_zh-cn-ai-wesp-fst \  
			
 
				+      --itn-dir thuduj12/fst_itn_zh \  
			
 
				+      --certfile 0 \  
			
 
				+      --hotword /workspace/models/hotwords.txt > /workspace/log.txt 2>&1 && tail -f /workspace/log.txt"
			
--- a/funasr_README_en.md
+++ b/funasr_README_en.md
@@ -1,6 +1,6 @@
 
				 [//]: # (<div align="left"><img src="docs/images/funasr_logo.jpg" width="400"/></div>)
			
 
				 
			
 
				-([简体中文](./README_zh.md)|English)
			
 
				+([简体中文](./funasr_README_zh.md)|English)
			
 
				 
			
 
				 # FunASR: A Fundamental End-to-End Speech Recognition Toolkit
			
 
				 
			
--- a/funasr_README_zh.md
+++ b/funasr_README_zh.md
@@ -1,6 +1,6 @@
 
				 [//]: # (<div align="left"><img src="docs/images/funasr_logo.jpg" width="400"/></div>)
			
 
				 
			
 
				-(简体中文|[English](./README.md))
			
 
				+(简体中文|[English](./funasr_README_en.md))
			
 
				 
			
 
				 # FunASR: A Fundamental End-to-End Speech Recognition Toolkit
			
 
				 
			
--- a/readme.md
+++ b/readme.md
@@ -0,0 +1,131 @@
 
				+# 简介
			
 
				+这是阿里云魔搭社区的语音识别模型[FunAsr](https://www.modelscope.cn/models/iic/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-pytorch/summary)，运行速度极快，1分钟语音，识别仅需 3~5 秒，模型大小仅 950M。错误率 1%
			
 
				+
			
 
				+官方部署和使用说明在 [github](https://github.com/alibaba-damo-academy/FunASR)
			
 
				+
			
 
				+# 使用
			
 
				+在 [## docker-compose（推荐）](##docker-compose（推荐）) 中完成 docker 一键部署，并且监听了 10095 端口。`dk ps` 可以查看 funasr 是否在运行。
			
 
				+
			
 
				+## 语音识别
			
 
				+**Linux**
			
 
				+```sh
			
 
				+# 下载客户端程序
			
 
				+curl -o funasr_wss_client https://gogs.sv-v.magong.site/attachments/536199ae-a529-4c95-bea9-4eefc6a4e540
			
 
				+# 赋予文件执行权限
			
 
				+chmod +x funasr_wss_client
			
 
				+# audio_in 是你的音频文件，可以是 mp3 或者 mp4 ，❗ 路径不能包含空格
			
 
				+funasr_wss_client --host "10.0.0.32" --port 10095 --mode offline --output_dir "./results" --ssl 0 --audio_in "/home/user/program/modelscope-whisper/funasr-runtime-resources/models/speech_paraformer-large-vad-punc-spk_asr_nat-zh-cn/example/asr_example.wav"
			
 
				+```
			
 
				+
			
 
				+**Windows**
			
 
				+```shell
			
 
				+# 下载客户端程序
			
 
				+curl -o funasr_wss_client.exe https://gogs.sv-v.magong.site/attachments/6a635686-c203-4df0-b1dd-d83d2e7a1ffe
			
 
				+# Windows 上，电脑上发起语音转文请求， audio_in 填写你电脑上的媒体文件路径，可以是 mp3 或者 mp4
			
 
				+# ❗ 路径不能包含空格
			
 
				+funasr_wss_client.exe --host "10.0.0.32" --port 10095 --mode offline   --output_dir "./results" --ssl 0  --audio_in "C:\Users\mrh\Downloads\Screenrecorder-2023-11-10-01-25-02-39.mp4" 
			
 
				+```
			
 
				+
			
 
				+说明：
			
 
				+- 部署了语音识别的主机加入了 WireGuard 域网络，地址为 10.0.0.32 
			
 
				+- 在该主机上使用 `ifconfig` 可以看到 WireGuard 虚拟网卡 ip ，`sudo wg` 查看 Wireguard 网络状态
			
 
				+- 最新版本的 funasr_wss_client 发布在 [release](https://gogs.sv-v.magong.site/siweilian/FunASR/releases)
			
 
				+
			
 
				+## 客户端请求工具说明
			
 
				+
			
 
				+funasr_wss_client 是在 Windows、Linux 上用 pyinstaller 将 funasr_wss_client.py 文件打包。不过在 Windows 上需要修改一下文件，因为它使用子进程执行，不然导致无法正常传参。[查看具体修改](https://gogs.sv-v.magong.site/siweilian/FunASR/commit/9b1543fa444cba5bad7f3429c609421192cd6c09)）
			
 
				+
			
 
				+打包指令：
			
 
				+```shell
			
 
				+pip install pyinstaller
			
 
				+# 在 Windows 平台上打包，会生成 exe 文件，在 Linux 上打包会生成 Linux 二进制文件，生成到 dist 目录
			
 
				+pyinstaller -F /home/user/program/modelscope-whisper/FunASR/runtime/python/websocket/funasr_wss_client.py
			
 
				+```
			
 
				+
			
 
				+# 部署
			
 
				+## docker-compose（推荐）
			
 
				+```shell
			
 
				+# 在仓库 FunASR 目录中，使用 docker-compose.yaml 来启动容器
			
 
				+docker-compose up -d 
			
 
				+# 查看日志，是否运行成功
			
 
				+dc logs -f
			
 
				+```
			
 
				+
			
 
				+## 自己部署
			
 
				+官方的部署方法，修改版，比较复杂
			
 
				+```sh
			
 
				+docker pull registry.cn-hangzhou.aliyuncs.com/funasr_repo/funasr:funasr-runtime-sdk-cpu-0.4.4
			
 
				+sudo docker run -p 10095:10095 --rm -it --name funasr --workdir /workspace/FunASR/runtime/ --privileged=true \
			
 
				+  -v /home/user/program/modelscope-whisper/funasr-runtime-resources/models:/workspace/models \
			
 
				+  registry.cn-hangzhou.aliyuncs.com/funasr_repo/funasr:funasr-runtime-sdk-cpu-0.4.4 \
			
 
				+   bash -c "nohup /workspace/FunASR/runtime/run_server.sh \
			
 
				+  --download-model-dir /workspace/models \
			
 
				+  --vad-dir damo/speech_fsmn_vad_zh-cn-16k-common-onnx \
			
 
				+  --model-dir damo/speech_paraformer-large-vad-punc_asr_nat-zh-cn-16k-common-vocab8404-onnx  \
			
 
				+  --punc-dir damo/punc_ct-transformer_cn-en-common-vocab471067-large-onnx \
			
 
				+  --lm-dir damo/speech_ngram_lm_zh-cn-ai-wesp-fst \
			
 
				+  --itn-dir thuduj12/fst_itn_zh \
			
 
				+  --certfile 0 \
			
 
				+  --hotword /workspace/models/hotwords.txt > log.txt 2>&1 "
			
 
				+
			
 
				+dk exec -it funasr nohup bash run_server.sh \
			
 
				+  --download-model-dir /workspace/models \
			
 
				+  --vad-dir damo/speech_fsmn_vad_zh-cn-16k-common-onnx \
			
 
				+  --model-dir damo/speech_paraformer-large-vad-punc_asr_nat-zh-cn-16k-common-vocab8404-onnx  \
			
 
				+  --punc-dir damo/punc_ct-transformer_cn-en-common-vocab471067-large-onnx \
			
 
				+  --lm-dir damo/speech_ngram_lm_zh-cn-ai-wesp-fst \
			
 
				+  --itn-dir thuduj12/fst_itn_zh \
			
 
				+  --certfile 0 \
			
 
				+  --hotword /workspace/models/hotwords.txt > log.txt 2>&1 &
			
 
				+
			
 
				+```
			
 
				+
			
 
				+
			
 
				+## 官方版
			
 
				+参考： https://github.com/alibaba-damo-academy/FunASR/blob/main/runtime/docs/SDK_advanced_guide_offline_zh.md
			
 
				+
			
 
				+环境安装
			
 
				+```shell
			
 
				+sudo docker pull \
			
 
				+  registry.cn-hangzhou.aliyuncs.com/funasr_repo/funasr:funasr-runtime-sdk-cpu-0.4.4
			
 
				+mkdir -p ./funasr-runtime-resources/models
			
 
				+sudo docker run -p 10095:10095 -it --privileged=true \
			
 
				+  -v $PWD/funasr-runtime-resources/models:/workspace/models \
			
 
				+  -
			
 
				+  registry.cn-hangzhou.aliyuncs.com/funasr_repo/funasr:funasr-runtime-sdk-cpu-0.4.4
			
 
				+```
			
 
				+
			
 
				+启动服务
			
 
				+```shell
			
 
				+cd FunASR/runtime;
			
 
				+nohup bash /workspace/FunASR/runtime/run_server.sh \
			
 
				+  --download-model-dir /workspace/models \
			
 
				+  --vad-dir damo/speech_fsmn_vad_zh-cn-16k-common-onnx \
			
 
				+  --model-dir damo/speech_paraformer-large-vad-punc_asr_nat-zh-cn-16k-common-vocab8404-onnx  \
			
 
				+  --punc-dir damo/punc_ct-transformer_cn-en-common-vocab471067-large-onnx \
			
 
				+  --lm-dir damo/speech_ngram_lm_zh-cn-ai-wesp-fst \
			
 
				+  --itn-dir thuduj12/fst_itn_zh \
			
 
				+  --certfile 0 \
			
 
				+  --hotword /workspace/models/hotwords.txt > log.txt 2>&1 &
			
 
				+
			
 
				+```
			
 
				+--download-model-dir 模型下载地址，通过设置model ID从Modelscope下载模型
			
 
				+--vad-dir  modelscope model ID 或者 本地模型路径。语音端点检测
			
 
				+--model-dir  modelscope model ID 或者 本地模型路径。语音识别模型
			
 
				+--punc-dir  modelscope model ID 或者 本地模型路径。标点符号恢复
			
 
				+--lm-dir modelscope model ID 或者 本地模型路径。Ngram语言模型，据说是用于预测下一个单词
			
 
				+--itn-dir modelscope model ID 或者 本地模型路径
			
 
				+--port  服务端监听的端口号，默认为 10095
			
 
				+--decoder-thread-num  服务端线程池个数(支持的最大并发路数)，
			
 
				+                      脚本会根据服务器线程数自动配置decoder-thread-num、io-thread-num
			
 
				+--io-thread-num  服务端启动的IO线程数
			
 
				+--model-thread-num  每路识别的内部线程数(控制ONNX模型的并行)，默认为 1，
			
 
				+                    其中建议 decoder-thread-num*model-thread-num 等于总线程数
			
 
				+--certfile  ssl的证书文件，默认为：../../../ssl_key/server.crt，如果需要关闭ssl，参数设置为0
			
 
				+--keyfile   ssl的密钥文件，默认为：../../../ssl_key/server.key
			
 
				+--hotword   热词文件路径，每行一个热词，格式：热词 权重(例如:阿里巴巴 20)，
			
 
				+            如果客户端提供热词，则与客户端提供的热词合并一起使用，服务端热词全局生效，客户端热词只针对对应客户端生效。
			
 
				+
			
 
				+
			
 
				+
			
 
				+