template.yaml 1.4 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162
  1. # This is an example that demonstrates how to configure a model file.
  2. # You can modify the configuration according to your own requirements.
  3. # to print the register_table:
  4. # from funasr.register import tables
  5. # tables.print()
  6. # network architecture
  7. model: FsmnVADStreaming
  8. model_conf:
  9. sample_rate: 16000
  10. detect_mode: 1
  11. snr_mode: 0
  12. max_end_silence_time: 800
  13. max_start_silence_time: 3000
  14. do_start_point_detection: True
  15. do_end_point_detection: True
  16. window_size_ms: 200
  17. sil_to_speech_time_thres: 150
  18. speech_to_sil_time_thres: 150
  19. speech_2_noise_ratio: 1.0
  20. do_extend: 1
  21. lookback_time_start_point: 200
  22. lookahead_time_end_point: 100
  23. max_single_segment_time: 60000
  24. snr_thres: -100.0
  25. noise_frame_num_used_for_snr: 100
  26. decibel_thres: -100.0
  27. speech_noise_thres: 0.6
  28. fe_prior_thres: 0.0001
  29. silence_pdf_num: 1
  30. sil_pdf_ids: [0]
  31. speech_noise_thresh_low: -0.1
  32. speech_noise_thresh_high: 0.3
  33. output_frame_probs: False
  34. frame_in_ms: 10
  35. frame_length_ms: 25
  36. encoder: FSMN
  37. encoder_conf:
  38. input_dim: 400
  39. input_affine_dim: 140
  40. fsmn_layers: 4
  41. linear_dim: 250
  42. proj_dim: 128
  43. lorder: 20
  44. rorder: 0
  45. lstride: 1
  46. rstride: 0
  47. output_affine_dim: 140
  48. output_dim: 248
  49. frontend: WavFrontend
  50. frontend_conf:
  51. fs: 16000
  52. window: hamming
  53. n_mels: 80
  54. frame_length: 25
  55. frame_shift: 10
  56. dither: 0.0
  57. lfr_m: 5
  58. lfr_n: 1