|
@@ -21,8 +21,8 @@ type=sound
|
|
|
scp=wav.scp
|
|
scp=wav.scp
|
|
|
speed_perturb="0.9 1.0 1.1"
|
|
speed_perturb="0.9 1.0 1.1"
|
|
|
dataset_type=large
|
|
dataset_type=large
|
|
|
-stage=3
|
|
|
|
|
-stop_stage=4
|
|
|
|
|
|
|
+stage=0
|
|
|
|
|
+stop_stage=5
|
|
|
|
|
|
|
|
skip_extract_embed=false
|
|
skip_extract_embed=false
|
|
|
bert_model_name="bert-base-chinese"
|
|
bert_model_name="bert-base-chinese"
|
|
@@ -32,8 +32,8 @@ feats_dim=80
|
|
|
nj=64
|
|
nj=64
|
|
|
|
|
|
|
|
# data
|
|
# data
|
|
|
-tr_dir=/nfs/wangjiaming.wjm/asr_data/aishell2/AISHELL-2/iOS/data
|
|
|
|
|
-dev_tst_dir=/nfs/wangjiaming.wjm/asr_data/aishell2/AISHELL-DEV-TEST-SET
|
|
|
|
|
|
|
+tr_dir=
|
|
|
|
|
+dev_tst_dir=
|
|
|
|
|
|
|
|
# exp tag
|
|
# exp tag
|
|
|
tag="exp1"
|
|
tag="exp1"
|
|
@@ -108,10 +108,16 @@ if [ ${stage} -le 2 ] && [ ${stop_stage} -ge 2 ]; then
|
|
|
echo "<unk>" >> ${token_list}
|
|
echo "<unk>" >> ${token_list}
|
|
|
fi
|
|
fi
|
|
|
|
|
|
|
|
-# Training Stage
|
|
|
|
|
|
|
+# LM Training Stage
|
|
|
world_size=$gpu_num # run on one machine
|
|
world_size=$gpu_num # run on one machine
|
|
|
if [ ${stage} -le 3 ] && [ ${stop_stage} -ge 3 ]; then
|
|
if [ ${stage} -le 3 ] && [ ${stop_stage} -ge 3 ]; then
|
|
|
- echo "stage 3: Training"
|
|
|
|
|
|
|
+ echo "stage 3: LM Training"
|
|
|
|
|
+fi
|
|
|
|
|
+
|
|
|
|
|
+# ASR Training Stage
|
|
|
|
|
+world_size=$gpu_num # run on one machine
|
|
|
|
|
+if [ ${stage} -le 4 ] && [ ${stop_stage} -ge 4 ]; then
|
|
|
|
|
+ echo "stage 4: ASR Training"
|
|
|
if ! "${skip_extract_embed}"; then
|
|
if ! "${skip_extract_embed}"; then
|
|
|
echo "extract embeddings..."
|
|
echo "extract embeddings..."
|
|
|
local/extract_embeds.sh \
|
|
local/extract_embeds.sh \
|
|
@@ -160,8 +166,8 @@ if [ ${stage} -le 3 ] && [ ${stop_stage} -ge 3 ]; then
|
|
|
fi
|
|
fi
|
|
|
|
|
|
|
|
# Testing Stage
|
|
# Testing Stage
|
|
|
-if [ ${stage} -le 4 ] && [ ${stop_stage} -ge 4 ]; then
|
|
|
|
|
- echo "stage 4: Inference"
|
|
|
|
|
|
|
+if [ ${stage} -le 5 ] && [ ${stop_stage} -ge 5 ]; then
|
|
|
|
|
+ echo "stage 5: Inference"
|
|
|
for dset in ${test_sets}; do
|
|
for dset in ${test_sets}; do
|
|
|
asr_exp=${exp_dir}/exp/${model_dir}
|
|
asr_exp=${exp_dir}/exp/${model_dir}
|
|
|
inference_tag="$(basename "${inference_config}" .yaml)"
|
|
inference_tag="$(basename "${inference_config}" .yaml)"
|