| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990 |
- #!/usr/bin/env bash
- set -e
- set -u
- set -o pipefail
- data_dir=
- exp_dir=
- model_name=
- inference_nj=32
- gpuid_list="0,1,2,3"
- njob=32
- gpu_inference=true
- test_sets="dev test"
- decode_cmd=utils/run.pl
- # LM configs
- use_lm=false
- beam_size=1
- lm_weight=0.0
- . utils/parse_options.sh
- if ${gpu_inference}; then
- _ngpu=1
- else
- _ngpu=0
- fi
- # download model from modelscope
- python modelscope_utils/download_model.py \
- --model_name ${model_name}
- modelscope_dir=${HOME}/.cache/modelscope/hub/damo/${model_name}
- for dset in ${test_sets}; do
- _dir=${exp_dir}/${model_name}/decode_asr/${dset}
- _logdir=${_dir}/logdir
- _data=${data_dir}/${dset}
- if [ -d ${_dir} ]; then
- echo "${_dir} is already exists. if you want to decode again, please delete ${_dir} first."
- exit 1
- else
- mkdir -p "${_dir}"
- mkdir -p "${_logdir}"
- fi
- if "${use_lm}"; then
- cp ${modelscope_dir}/decode_asr_transformer.yaml ${modelscope_dir}/decode_asr_transformer.yaml.back
- cp ${modelscope_dir}/decode_asr_transformer_wav.yaml ${modelscope_dir}/decode_asr_transformer_wav.yaml.back
- sed -i "s#beam_size: [0-9]*#beam_size: `echo $beam_size`#g" ${modelscope_dir}/decode_asr_transformer.yaml
- sed -i "s#beam_size: [0-9]*#beam_size: `echo $beam_size`#g" ${modelscope_dir}/decode_asr_transformer_wav.yaml
- sed -i "s#lm_weight: 0.[0-9]*#lm_weight: `echo $lm_weight`#g" ${modelscope_dir}/decode_asr_transformer.yaml
- sed -i "s#lm_weight: 0.[0-9]*#lm_weight: `echo $lm_weight`#g" ${modelscope_dir}/decode_asr_transformer_wav.yaml
- fi
- for n in $(seq "${inference_nj}"); do
- split_scps+=" ${_logdir}/keys.${n}.scp"
- done
- # shellcheck disable=SC2086
- utils/split_scp.pl "${data_dir}/${dset}/wav.scp" ${split_scps}
- echo "Decoding started... log: '${_logdir}/asr_inference.*.log'"
- # shellcheck disable=SC2086
- ${decode_cmd} --max-jobs-run "${inference_nj}" JOB=1:"${inference_nj}" "${_logdir}"/asr_inference.JOB.log \
- python -m funasr.bin.modelscope_infer \
- --model_name ${model_name} \
- --wav_list ${_logdir}/keys.JOB.scp \
- --output_file ${_logdir}/text.JOB \
- --gpuid_list ${gpuid_list} \
- --njob ${njob} \
- --ngpu ${_ngpu} \
- for i in $(seq ${inference_nj}); do
- cat ${_logdir}/text.${i}
- done | sort -k1 >${_dir}/text
- python utils/proce_text.py ${_dir}/text ${_dir}/text.proc
- python utils/proce_text.py ${_data}/text ${_data}/text.proc
- python utils/compute_wer.py ${_data}/text.proc ${_dir}/text.proc ${_dir}/text.cer
- tail -n 3 ${_dir}/text.cer > ${_dir}/text.cer.txt
- cat ${_dir}/text.cer.txt
- done
- if "${use_lm}"; then
- mv ${modelscope_dir}/decode_asr_transformer.yaml.back ${modelscope_dir}/decode_asr_transformer.yaml
- mv ${modelscope_dir}/decode_asr_transformer_wav.yaml.back ${modelscope_dir}/decode_asr_transformer_wav.yaml
- fi
|