modelscope_infer.sh 2.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990
  1. #!/usr/bin/env bash
  2. set -e
  3. set -u
  4. set -o pipefail
  5. data_dir=
  6. exp_dir=
  7. model_name=
  8. inference_nj=32
  9. gpuid_list="0,1,2,3"
  10. njob=32
  11. gpu_inference=true
  12. test_sets="dev test"
  13. decode_cmd=utils/run.pl
  14. # LM configs
  15. use_lm=false
  16. beam_size=1
  17. lm_weight=0.0
  18. . utils/parse_options.sh
  19. if ${gpu_inference}; then
  20. _ngpu=1
  21. else
  22. _ngpu=0
  23. fi
  24. # download model from modelscope
  25. python modelscope_utils/download_model.py \
  26. --model_name ${model_name}
  27. modelscope_dir=${HOME}/.cache/modelscope/hub/damo/${model_name}
  28. for dset in ${test_sets}; do
  29. _dir=${exp_dir}/${model_name}/decode_asr/${dset}
  30. _logdir=${_dir}/logdir
  31. _data=${data_dir}/${dset}
  32. if [ -d ${_dir} ]; then
  33. echo "${_dir} is already exists. if you want to decode again, please delete ${_dir} first."
  34. exit 1
  35. else
  36. mkdir -p "${_dir}"
  37. mkdir -p "${_logdir}"
  38. fi
  39. if "${use_lm}"; then
  40. cp ${modelscope_dir}/decode_asr_transformer.yaml ${modelscope_dir}/decode_asr_transformer.yaml.back
  41. cp ${modelscope_dir}/decode_asr_transformer_wav.yaml ${modelscope_dir}/decode_asr_transformer_wav.yaml.back
  42. sed -i "s#beam_size: [0-9]*#beam_size: `echo $beam_size`#g" ${modelscope_dir}/decode_asr_transformer.yaml
  43. sed -i "s#beam_size: [0-9]*#beam_size: `echo $beam_size`#g" ${modelscope_dir}/decode_asr_transformer_wav.yaml
  44. sed -i "s#lm_weight: 0.[0-9]*#lm_weight: `echo $lm_weight`#g" ${modelscope_dir}/decode_asr_transformer.yaml
  45. sed -i "s#lm_weight: 0.[0-9]*#lm_weight: `echo $lm_weight`#g" ${modelscope_dir}/decode_asr_transformer_wav.yaml
  46. fi
  47. for n in $(seq "${inference_nj}"); do
  48. split_scps+=" ${_logdir}/keys.${n}.scp"
  49. done
  50. # shellcheck disable=SC2086
  51. utils/split_scp.pl "${data_dir}/${dset}/wav.scp" ${split_scps}
  52. echo "Decoding started... log: '${_logdir}/asr_inference.*.log'"
  53. # shellcheck disable=SC2086
  54. ${decode_cmd} --max-jobs-run "${inference_nj}" JOB=1:"${inference_nj}" "${_logdir}"/asr_inference.JOB.log \
  55. python -m funasr.bin.modelscope_infer \
  56. --model_name ${model_name} \
  57. --wav_list ${_logdir}/keys.JOB.scp \
  58. --output_file ${_logdir}/text.JOB \
  59. --gpuid_list ${gpuid_list} \
  60. --njob ${njob} \
  61. --ngpu ${_ngpu} \
  62. for i in $(seq ${inference_nj}); do
  63. cat ${_logdir}/text.${i}
  64. done | sort -k1 >${_dir}/text
  65. python utils/proce_text.py ${_dir}/text ${_dir}/text.proc
  66. python utils/proce_text.py ${_data}/text ${_data}/text.proc
  67. python utils/compute_wer.py ${_data}/text.proc ${_dir}/text.proc ${_dir}/text.cer
  68. tail -n 3 ${_dir}/text.cer > ${_dir}/text.cer.txt
  69. cat ${_dir}/text.cer.txt
  70. done
  71. if "${use_lm}"; then
  72. mv ${modelscope_dir}/decode_asr_transformer.yaml.back ${modelscope_dir}/decode_asr_transformer.yaml
  73. mv ${modelscope_dir}/decode_asr_transformer_wav.yaml.back ${modelscope_dir}/decode_asr_transformer_wav.yaml
  74. fi