|
|
@@ -29,6 +29,18 @@ decoder_conf:
|
|
|
self_attention_dropout_rate: 0.0
|
|
|
src_attention_dropout_rate: 0.0
|
|
|
|
|
|
+# frontend related
|
|
|
+frontend: wav_frontend
|
|
|
+frontend_conf:
|
|
|
+ fs: 16000
|
|
|
+ window: hamming
|
|
|
+ n_mels: 80
|
|
|
+ frame_length: 25
|
|
|
+ frame_shift: 10
|
|
|
+ lfr_m: 1
|
|
|
+ lfr_n: 1
|
|
|
+
|
|
|
+
|
|
|
# hybrid CTC/attention
|
|
|
model: paraformer
|
|
|
model_conf:
|
|
|
@@ -42,7 +54,7 @@ model_conf:
|
|
|
accum_grad: 2
|
|
|
grad_clip: 5
|
|
|
patience: none
|
|
|
-max_epoch: 50
|
|
|
+max_epoch: 150
|
|
|
val_scheduler_criterion:
|
|
|
- valid
|
|
|
- acc
|
|
|
@@ -82,10 +94,8 @@ predictor_conf:
|
|
|
l_order: 1
|
|
|
r_order: 1
|
|
|
|
|
|
-log_interval: 50
|
|
|
-normalize: None
|
|
|
-
|
|
|
dataset_conf:
|
|
|
+ data_types: sound,text
|
|
|
shuffle: True
|
|
|
shuffle_conf:
|
|
|
shuffle_size: 2048
|
|
|
@@ -93,4 +103,7 @@ dataset_conf:
|
|
|
batch_conf:
|
|
|
batch_type: token
|
|
|
batch_size: 25000
|
|
|
- num_workers: 8
|
|
|
+ num_workers: 8
|
|
|
+
|
|
|
+log_interval: 50
|
|
|
+normalize: None
|