modelscope_common_infer.sh 2.3 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576
  1. #!/usr/bin/env bash
  2. set -e
  3. set -u
  4. set -o pipefail
  5. model_name=speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-pytorch # pre-trained model, download from modelscope
  6. model_revision="v1.0.4" # please do not modify the model revision
  7. data_dir= # wav list, ${data_dir}/wav.scp
  8. exp_dir="exp"
  9. gpuid_list="0"
  10. ngpu=$(echo $gpuid_list | awk -F "," '{print NF}')
  11. njob=1
  12. gpu_inference=true
  13. decode_cmd=utils/run.pl
  14. . utils/parse_options.sh
  15. if ${gpu_inference}; then
  16. inference_nj=$[${ngpu}*${njob}]
  17. _ngpu=1
  18. else
  19. inference_nj=${njob}
  20. _ngpu=0
  21. fi
  22. # LM configs
  23. use_lm=false
  24. beam_size=1
  25. lm_weight=0.0
  26. python modelscope_utils/download_model.py \
  27. --model_name ${model_name} --model_revision ${model_revision}
  28. if [ -d ${exp_dir} ]; then
  29. echo "${exp_dir} is already exists. if you want to decode again, please delete ${exp_dir} first."
  30. exit 1
  31. else
  32. mkdir -p ${exp_dir}/${model_name}
  33. cp ${HOME}/.cache/modelscope/hub/damo/${model_name}/* ${exp_dir}/${model_name}/. -r
  34. _dir=${exp_dir}/decode_asr
  35. _logdir=${_dir}/logdir
  36. mkdir -p "${_dir}"
  37. mkdir -p "${_logdir}"
  38. fi
  39. for n in $(seq "${inference_nj}"); do
  40. split_scps+=" ${_logdir}/keys.${n}.scp"
  41. done
  42. # shellcheck disable=SC2086
  43. utils/split_scp.pl "${data_dir}/wav.scp" ${split_scps}
  44. if "${use_lm}"; then
  45. cp ${exp_dir}/${model_name}/decoding.yaml ${exp_dir}/${model_name}/decoding.yaml.back
  46. sed -i "s#beam_size: [0-9]*#beam_size: `echo $beam_size`#g" ${exp_dir}/${model_name}/decoding.yaml
  47. sed -i "s#lm_weight: 0.[0-9]*#lm_weight: `echo $lm_weight`#g" ${exp_dir}/${model_name}/decoding.yaml
  48. fi
  49. echo "Decoding started... log: '${_logdir}/asr_inference.*.log'"
  50. # shellcheck disable=SC2086
  51. ${decode_cmd} --max-jobs-run "${inference_nj}" JOB=1:"${inference_nj}" "${_logdir}"/asr_inference.JOB.log \
  52. python -m funasr.bin.modelscope_infer \
  53. --local_model_path ${exp_dir}/${model_name} \
  54. --wav_list ${_logdir}/keys.JOB.scp \
  55. --output_file ${_logdir}/text.JOB \
  56. --gpuid_list ${gpuid_list} \
  57. --njob ${njob} \
  58. --ngpu ${_ngpu} \
  59. for i in $(seq ${inference_nj}); do
  60. cat ${_logdir}/text.${i}
  61. done | sort -k1 >${_dir}/text
  62. if "${use_lm}"; then
  63. mv ${exp_dir}/${model_name}/decoding.yaml.back ${exp_dir}/${model_name}/decoding.yaml
  64. fi