Browse Source

update repo

嘉渊 2 years ago
parent
commit
eb43576ed0

+ 1 - 1
egs/aishell/paraformer/conf/train_asr_paraformer_conformer_12e_6d_2048_256.yaml

@@ -84,7 +84,7 @@ specaug_conf:
     - 40
     num_time_mask: 2
 
-predictor: cif_predictor_v2
+predictor: cif_predictor
 predictor_conf:
     idim: 256
     threshold: 1.0

+ 24 - 13
egs/aishell/paraformerbert/conf/train_asr_paraformerbert_conformer_12e_6d_2048_256.yaml

@@ -29,6 +29,17 @@ decoder_conf:
     self_attention_dropout_rate: 0.0
     src_attention_dropout_rate: 0.0
 
+# frontend related
+frontend: wav_frontend
+frontend_conf:
+    fs: 16000
+    window: hamming
+    n_mels: 80
+    frame_length: 25
+    frame_shift: 10
+    lfr_m: 1
+    lfr_n: 1
+
 # hybrid CTC/attention
 model: paraformer_bert
 model_conf:
@@ -41,19 +52,10 @@ model_conf:
     embed_dims: 768
     embeds_loss_weight: 2.0
 
-
-
-# minibatch related
-#batch_type: length
-#batch_bins: 40000
-batch_type: numel
-batch_bins: 2000000
-num_workers: 16
-
 # optimization related
-accum_grad: 4
+accum_grad: 1
 grad_clip: 5
-max_epoch: 50
+max_epoch: 150
 val_scheduler_criterion:
     - valid
     - acc
@@ -92,8 +94,17 @@ predictor_conf:
     threshold: 1.0
     l_order: 1
     r_order: 1
+    tail_threshold: 0.45
 
+dataset_conf:
+    shuffle: True
+    shuffle_conf:
+        shuffle_size: 2048
+        sort_size: 500
+    batch_conf:
+        batch_type: token
+        batch_size: 25000
+    num_workers: 8
 
 log_interval: 50
-normalize: None
-allow_variable_data_keys: true
+normalize: None

+ 6 - 6
egs/aishell/paraformerbert/run.sh

@@ -111,12 +111,12 @@ fi
 world_size=$gpu_num  # run on one machine
 if [ ${stage} -le 3 ] && [ ${stop_stage} -ge 3 ]; then
     echo "stage 3: Training"
-    if ! "${skip_extract_embed}"; then
-        echo "extract embeddings..."
-        local/extract_embeds.sh \
-            --bert_model_name ${bert_model_name} \
-            --raw_dataset_path ${feats_dir}
-    fi
+#    if ! "${skip_extract_embed}"; then
+#        echo "extract embeddings..."
+#        local/extract_embeds.sh \
+#            --bert_model_name ${bert_model_name} \
+#            --raw_dataset_path ${feats_dir}
+#    fi
     mkdir -p ${exp_dir}/exp/${model_dir}
     mkdir -p ${exp_dir}/exp/${model_dir}/log
     INIT_FILE=${exp_dir}/exp/${model_dir}/ddp_init