|
|
@@ -94,13 +94,16 @@ scheduler_conf:
|
|
|
dataset: AudioDataset
|
|
|
dataset_conf:
|
|
|
index_ds: IndexDSJsonl
|
|
|
- batch_sampler: DynamicBatchLocalShuffleSampler
|
|
|
+ batch_sampler: RankFullLocalShuffleBatchSampler
|
|
|
batch_type: example # example or length
|
|
|
- batch_size: 1 # if batch_type is example, batch_size is the numbers of samples; if length, batch_size is source_token_len+target_token_len;
|
|
|
+ batch_size: 32 # if batch_type is example, batch_size is the numbers of samples; if length, batch_size is source_token_len+target_token_len;
|
|
|
max_token_length: 2048 # filter samples if source_token_len+target_token_len > max_token_length,
|
|
|
- buffer_size: 500
|
|
|
+ buffer_size: 1024
|
|
|
shuffle: True
|
|
|
num_workers: 4
|
|
|
+ preprocessor_speech: SpeechPreprocessSpeedPerturb
|
|
|
+ preprocessor_speech_conf:
|
|
|
+ speed_perturb: [0.9, 1.0, 1.1]
|
|
|
|
|
|
tokenizer: CharTokenizer
|
|
|
tokenizer_conf:
|