paraformer_large_infer.sh 2.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172
  1. #!/usr/bin/env bash
  2. set -e
  3. set -u
  4. set -o pipefail
  5. ori_data=
  6. data_dir=
  7. exp_dir=
  8. model_name=speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-pytorch
  9. model_revision="v1.0.4" # please do not modify the model revision
  10. inference_nj=32
  11. gpuid_list="0" # set gpus, e.g., gpuid_list="0,1"
  12. ngpu=$(echo $gpuid_list | awk -F "," '{print NF}')
  13. njob=1 # the number of jobs for each gpu
  14. gpu_inference=true # Whether to perform gpu decoding, set false for cpu decoding
  15. if ${gpu_inference}; then
  16. inference_nj=$[${ngpu}*${njob}]
  17. else
  18. inference_nj=$njob
  19. fi
  20. # LM configs
  21. use_lm=false
  22. beam_size=1
  23. lm_weight=0.0
  24. test_sets="dev test"
  25. . utils/parse_options.sh
  26. aishell_audio_dir=$ori_data/data_aishell/wav
  27. aishell_text=$ori_data/data_aishell/transcript/aishell_transcript_v0.8.txt
  28. dev_dir=${data_dir}/aishell/dev
  29. test_dir=${data_dir}/aishell/test
  30. tmp_dir=${data_dir}/aishell/tmp
  31. mkdir -p ${dev_dir}
  32. mkdir -p ${test_dir}
  33. mkdir -p ${tmp_dir}
  34. find $aishell_audio_dir -iname "*.wav" > $tmp_dir/wav.flist
  35. grep -i "wav/dev" $tmp_dir/wav.flist > $dev_dir/wav.flist || exit 1;
  36. grep -i "wav/test" $tmp_dir/wav.flist > $test_dir/wav.flist || exit 1;
  37. rm -r $tmp_dir
  38. for dir in $dev_dir $test_dir; do
  39. sed -e 's/\.wav//' $dir/wav.flist | awk -F '/' '{print $NF}' > $dir/utt.list
  40. paste -d' ' $dir/utt.list $dir/wav.flist > $dir/wav.scp_all
  41. utils/filter_scp.pl -f 1 $dir/utt.list $aishell_text > $dir/transcripts.txt
  42. awk '{print $1}' $dir/transcripts.txt > $dir/utt.list
  43. utils/filter_scp.pl -f 1 $dir/utt.list $dir/wav.scp_all | sort -u > $dir/wav.scp
  44. sort -u $dir/transcripts.txt > $dir/text
  45. done
  46. mkdir -p ${exp_dir}/aishell
  47. modelscope_utils/modelscope_infer.sh \
  48. --data_dir ${data_dir}/aishell \
  49. --exp_dir ${exp_dir}/aishell \
  50. --test_sets "${test_sets}" \
  51. --model_name ${model_name} \
  52. --model_revision ${model_revision} \
  53. --inference_nj ${inference_nj} \
  54. --gpuid_list ${gpuid_list} \
  55. --njob ${njob} \
  56. --gpu_inference ${gpu_inference} \
  57. --use_lm ${use_lm} \
  58. --beam_size ${beam_size} \
  59. --lm_weight ${lm_weight}