template.yaml 2.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122
  1. # This is an example that demonstrates how to configure a model file.
  2. # You can modify the configuration according to your own requirements.
  3. # to print the register_table:
  4. # from funasr.register import tables
  5. # tables.print()
  6. # network architecture
  7. model: Paraformer
  8. model_conf:
  9. ctc_weight: 0.0
  10. lsm_weight: 0.1
  11. length_normalized_loss: true
  12. predictor_weight: 1.0
  13. predictor_bias: 1
  14. sampling_ratio: 0.75
  15. # encoder
  16. encoder: SANMEncoder
  17. encoder_conf:
  18. output_size: 512
  19. attention_heads: 4
  20. linear_units: 2048
  21. num_blocks: 50
  22. dropout_rate: 0.1
  23. positional_dropout_rate: 0.1
  24. attention_dropout_rate: 0.1
  25. input_layer: pe
  26. pos_enc_class: SinusoidalPositionEncoder
  27. normalize_before: true
  28. kernel_size: 11
  29. sanm_shfit: 0
  30. selfattention_layer_type: sanm
  31. # decoder
  32. decoder: ParaformerSANMDecoder
  33. decoder_conf:
  34. attention_heads: 4
  35. linear_units: 2048
  36. num_blocks: 16
  37. dropout_rate: 0.1
  38. positional_dropout_rate: 0.1
  39. self_attention_dropout_rate: 0.1
  40. src_attention_dropout_rate: 0.1
  41. att_layer_num: 16
  42. kernel_size: 11
  43. sanm_shfit: 0
  44. predictor: CifPredictorV2
  45. predictor_conf:
  46. idim: 512
  47. threshold: 1.0
  48. l_order: 1
  49. r_order: 1
  50. tail_threshold: 0.45
  51. # frontend related
  52. frontend: WavFrontend
  53. frontend_conf:
  54. fs: 16000
  55. window: hamming
  56. n_mels: 80
  57. frame_length: 25
  58. frame_shift: 10
  59. lfr_m: 7
  60. lfr_n: 6
  61. specaug: SpecAugLFR
  62. specaug_conf:
  63. apply_time_warp: false
  64. time_warp_window: 5
  65. time_warp_mode: bicubic
  66. apply_freq_mask: true
  67. freq_mask_width_range:
  68. - 0
  69. - 30
  70. lfr_rate: 6
  71. num_freq_mask: 1
  72. apply_time_mask: true
  73. time_mask_width_range:
  74. - 0
  75. - 12
  76. num_time_mask: 1
  77. train_conf:
  78. accum_grad: 1
  79. grad_clip: 5
  80. max_epoch: 150
  81. keep_nbest_models: 10
  82. avg_nbest_model: 5
  83. log_interval: 50
  84. optim: adam
  85. optim_conf:
  86. lr: 0.0005
  87. scheduler: warmuplr
  88. scheduler_conf:
  89. warmup_steps: 30000
  90. dataset: AudioDataset
  91. dataset_conf:
  92. index_ds: IndexDSJsonl
  93. batch_sampler: DynamicBatchLocalShuffleSampler
  94. batch_type: example # example or length
  95. batch_size: 1 # if batch_type is example, batch_size is the numbers of samples; if length, batch_size is source_token_len+target_token_len;
  96. max_token_length: 2048 # filter samples if source_token_len+target_token_len > max_token_length,
  97. buffer_size: 500
  98. shuffle: True
  99. num_workers: 0
  100. tokenizer: CharTokenizer
  101. tokenizer_conf:
  102. unk_symbol: <unk>
  103. split_with_space: true
  104. ctc_conf:
  105. dropout_rate: 0.0
  106. ctc_type: builtin
  107. reduce: true
  108. ignore_nan_grad: true
  109. normalize: null