train_asr_transformer.yaml 1.8 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788
  1. # network architecture
  2. # encoder related
  3. encoder: transformer
  4. encoder_conf:
  5. output_size: 256 # dimension of attention
  6. attention_heads: 4
  7. linear_units: 2048 # the number of units of position-wise feed forward
  8. num_blocks: 12 # the number of encoder blocks
  9. dropout_rate: 0.1
  10. positional_dropout_rate: 0.1
  11. attention_dropout_rate: 0.0
  12. input_layer: conv2d # encoder architecture type
  13. normalize_before: true
  14. # decoder related
  15. decoder: transformer
  16. decoder_conf:
  17. attention_heads: 4
  18. linear_units: 2048
  19. num_blocks: 6
  20. dropout_rate: 0.1
  21. positional_dropout_rate: 0.1
  22. self_attention_dropout_rate: 0.0
  23. src_attention_dropout_rate: 0.0
  24. # frontend related
  25. frontend: wav_frontend
  26. frontend_conf:
  27. fs: 16000
  28. window: hamming
  29. n_mels: 80
  30. frame_length: 25
  31. frame_shift: 10
  32. lfr_m: 1
  33. lfr_n: 1
  34. # hybrid CTC/attention
  35. model_conf:
  36. ctc_weight: 0.3
  37. lsm_weight: 0.1 # label smoothing option
  38. length_normalized_loss: false
  39. # optimization related
  40. accum_grad: 1
  41. grad_clip: 5
  42. patience: none
  43. max_epoch: 60
  44. val_scheduler_criterion:
  45. - valid
  46. - acc
  47. best_model_criterion:
  48. - - valid
  49. - acc
  50. - max
  51. keep_nbest_models: 10
  52. # NoamLR is deprecated. Use WarmupLR.
  53. # The following is equivalent setting for NoamLR:
  54. #
  55. # optim: adam
  56. # optim_conf:
  57. # lr: 10.
  58. # scheduler: noamlr
  59. # scheduler_conf:
  60. # model_size: 256
  61. # warmup_steps: 25000
  62. #
  63. optim: adam
  64. optim_conf:
  65. lr: 0.002
  66. scheduler: warmuplr # pytorch v1.1.0+ required
  67. scheduler_conf:
  68. warmup_steps: 25000
  69. dataset_conf:
  70. data_names: speech,text
  71. data_types: sound,text
  72. shuffle: True
  73. shuffle_conf:
  74. shuffle_size: 2048
  75. sort_size: 500
  76. batch_conf:
  77. batch_type: token
  78. batch_size: 25000
  79. num_workers: 8
  80. log_interval: 50
  81. normalize: None