modelscope_common_infer_after_finetune.sh 2.0 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465
  1. #!/usr/bin/env bash
  2. set -e
  3. set -u
  4. set -o pipefail
  5. pretrained_model_name=speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-pytorch # pre-trained model, download from modelscope
  6. data_dir= # wav list, ${data_dir}/wav.scp
  7. finetune_model_name= # fine-tuning model name
  8. finetune_exp_dir= # fine-tuning model experiment result path
  9. gpuid_list="0"
  10. ngpu=$(echo $gpuid_list | awk -F "," '{print NF}')
  11. njob=1
  12. gpu_inference=true
  13. decode_cmd=utils/run.pl
  14. . utils/parse_options.sh
  15. if ${gpu_inference}; then
  16. inference_nj=$[${ngpu}*${njob}]
  17. _ngpu=1
  18. else
  19. inference_nj=${njob}
  20. _ngpu=0
  21. fi
  22. if [ ! -d ${HOME}/.cache/modelscope/hub/damo/${pretrained_model_name} ]; then
  23. echo "${HOME}/.cache/modelscope/hub/damo/${pretrained_model_name} must exist."
  24. exit 1
  25. else
  26. exp_dir=${finetune_exp_dir}/${finetune_model_name}.modelscope
  27. mkdir -p $exp_dir
  28. cp ${finetune_exp_dir}/${finetune_model_name} ${exp_dir}/${finetune_model_name}.modelscope
  29. cp ${HOME}/.cache/modelscope/hub/damo/${pretrained_model_name}/* ${exp_dir}/. -r
  30. fi
  31. _dir=${exp_dir}/decode_asr
  32. _logdir=${_dir}/logdir
  33. if [ -d ${_dir} ]; then
  34. echo "${_dir} is already exists. if you want to decode again, please delete ${_dir} first."
  35. else
  36. mkdir -p "${_dir}"
  37. mkdir -p "${_logdir}"
  38. fi
  39. for n in $(seq "${inference_nj}"); do
  40. split_scps+=" ${_logdir}/keys.${n}.scp"
  41. done
  42. # shellcheck disable=SC2086
  43. utils/split_scp.pl "${data_dir}/wav.scp" ${split_scps}
  44. echo "Decoding started... log: '${_logdir}/asr_inference.*.log'"
  45. # shellcheck disable=SC2086
  46. ${decode_cmd} --max-jobs-run "${inference_nj}" JOB=1:"${inference_nj}" "${_logdir}"/asr_inference.JOB.log \
  47. python -m funasr.bin.modelscope_infer \
  48. --local_model_path ${exp_dir} \
  49. --wav_list ${_logdir}/keys.JOB.scp \
  50. --output_file ${_logdir}/text.JOB \
  51. --gpuid_list ${gpuid_list} \
  52. --njob ${njob} \
  53. --ngpu ${_ngpu} \
  54. for i in $(seq ${inference_nj}); do
  55. cat ${_logdir}/text.${i}
  56. done | sort -k1 >${_dir}/text