modelscope_infer.sh 2.5 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788
  1. #!/usr/bin/env bash
  2. set -e
  3. set -u
  4. set -o pipefail
  5. data_dir=
  6. exp_dir=
  7. model_name=
  8. model_revision=
  9. inference_nj=32
  10. gpuid_list="0,1,2,3"
  11. njob=32
  12. gpu_inference=true
  13. test_sets="dev test"
  14. decode_cmd=utils/run.pl
  15. # LM configs
  16. use_lm=false
  17. beam_size=1
  18. lm_weight=0.0
  19. . utils/parse_options.sh
  20. if ${gpu_inference}; then
  21. _ngpu=1
  22. else
  23. _ngpu=0
  24. fi
  25. # download model from modelscope
  26. python modelscope_utils/download_model.py \
  27. --model_name ${model_name} --model_revision ${model_revision}
  28. modelscope_dir=${HOME}/.cache/modelscope/hub/damo/${model_name}
  29. for dset in ${test_sets}; do
  30. _dir=${exp_dir}/${model_name}/decode_asr/${dset}
  31. _logdir=${_dir}/logdir
  32. _data=${data_dir}/${dset}
  33. if [ -d ${_dir} ]; then
  34. echo "${_dir} is already exists. if you want to decode again, please delete ${_dir} first."
  35. exit 1
  36. else
  37. mkdir -p "${_dir}"
  38. mkdir -p "${_logdir}"
  39. fi
  40. if "${use_lm}"; then
  41. cp ${modelscope_dir}/decoding.yaml ${modelscope_dir}/decoding.yaml.back
  42. sed -i "s#beam_size: [0-9]*#beam_size: `echo $beam_size`#g" ${modelscope_dir}/decoding.yaml
  43. sed -i "s#lm_weight: 0.[0-9]*#lm_weight: `echo $lm_weight`#g" ${modelscope_dir}/decoding.yaml
  44. fi
  45. for n in $(seq "${inference_nj}"); do
  46. split_scps+=" ${_logdir}/keys.${n}.scp"
  47. done
  48. # shellcheck disable=SC2086
  49. utils/split_scp.pl "${data_dir}/${dset}/wav.scp" ${split_scps}
  50. echo "Decoding started... log: '${_logdir}/asr_inference.*.log'"
  51. # shellcheck disable=SC2086
  52. ${decode_cmd} --max-jobs-run "${inference_nj}" JOB=1:"${inference_nj}" "${_logdir}"/asr_inference.JOB.log \
  53. python -m funasr.bin.modelscope_infer \
  54. --model_name ${model_name} \
  55. --model_revision ${model_revision} \
  56. --wav_list ${_logdir}/keys.JOB.scp \
  57. --output_file ${_logdir}/text.JOB \
  58. --gpuid_list ${gpuid_list} \
  59. --njob ${njob} \
  60. --ngpu ${_ngpu} \
  61. for i in $(seq ${inference_nj}); do
  62. cat ${_logdir}/text.${i}
  63. done | sort -k1 >${_dir}/text
  64. python utils/proce_text.py ${_dir}/text ${_dir}/text.proc
  65. python utils/proce_text.py ${_data}/text ${_data}/text.proc
  66. python utils/compute_wer.py ${_data}/text.proc ${_dir}/text.proc ${_dir}/text.cer
  67. tail -n 3 ${_dir}/text.cer > ${_dir}/text.cer.txt
  68. cat ${_dir}/text.cer.txt
  69. done
  70. if "${use_lm}"; then
  71. mv ${modelscope_dir}/decoding.yaml.back ${modelscope_dir}/decoding.yaml
  72. fi