WindowDetector.cs 4.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156
  1. using System;
  2. using System.Collections.Generic;
  3. using System.Linq;
  4. using System.Text;
  5. using System.Threading.Tasks;
  6. namespace AliFsmnVadSharp
  7. {
  8. public enum FrameState
  9. {
  10. kFrameStateInvalid = -1,
  11. kFrameStateSpeech = 1,
  12. kFrameStateSil = 0
  13. }
  14. /// <summary>
  15. /// final voice/unvoice state per frame
  16. /// </summary>
  17. public enum AudioChangeState
  18. {
  19. kChangeStateSpeech2Speech = 0,
  20. kChangeStateSpeech2Sil = 1,
  21. kChangeStateSil2Sil = 2,
  22. kChangeStateSil2Speech = 3,
  23. kChangeStateNoBegin = 4,
  24. kChangeStateInvalid = 5
  25. }
  26. internal class WindowDetector
  27. {
  28. private int _window_size_ms = 0; //window_size_ms;
  29. private int _sil_to_speech_time = 0; //sil_to_speech_time;
  30. private int _speech_to_sil_time = 0; //speech_to_sil_time;
  31. private int _frame_size_ms = 0; //frame_size_ms;
  32. private int _win_size_frame = 0;
  33. private int _win_sum = 0;
  34. private int[] _win_state = new int[0];// * _win_size_frame; // 初始化窗
  35. private int _cur_win_pos = 0;
  36. private int _pre_frame_state = (int)FrameState.kFrameStateSil;
  37. private int _cur_frame_state = (int)FrameState.kFrameStateSil;
  38. private int _sil_to_speech_frmcnt_thres = 0; //int(sil_to_speech_time / frame_size_ms);
  39. private int _speech_to_sil_frmcnt_thres = 0; //int(speech_to_sil_time / frame_size_ms);
  40. private int _voice_last_frame_count = 0;
  41. private int _noise_last_frame_count = 0;
  42. private int _hydre_frame_count = 0;
  43. public WindowDetector()
  44. {
  45. }
  46. public WindowDetector(int window_size_ms, int sil_to_speech_time, int speech_to_sil_time, int frame_size_ms)
  47. {
  48. _window_size_ms = window_size_ms;
  49. _sil_to_speech_time = sil_to_speech_time;
  50. _speech_to_sil_time = speech_to_sil_time;
  51. _frame_size_ms = frame_size_ms;
  52. _win_size_frame = (int)(window_size_ms / frame_size_ms);
  53. _win_sum = 0;
  54. _win_state = new int[_win_size_frame];//[0] * _win_size_frame; // 初始化窗
  55. _cur_win_pos = 0;
  56. _pre_frame_state = (int)FrameState.kFrameStateSil;
  57. _cur_frame_state = (int)FrameState.kFrameStateSil;
  58. _sil_to_speech_frmcnt_thres = (int)(sil_to_speech_time / frame_size_ms);
  59. _speech_to_sil_frmcnt_thres = (int)(speech_to_sil_time / frame_size_ms);
  60. _voice_last_frame_count = 0;
  61. _noise_last_frame_count = 0;
  62. _hydre_frame_count = 0;
  63. }
  64. public void Reset()
  65. {
  66. _cur_win_pos = 0;
  67. _win_sum = 0;
  68. _win_state = new int[_win_size_frame];
  69. _pre_frame_state = (int)FrameState.kFrameStateSil;
  70. _cur_frame_state = (int)FrameState.kFrameStateSil;
  71. _voice_last_frame_count = 0;
  72. _noise_last_frame_count = 0;
  73. _hydre_frame_count = 0;
  74. }
  75. public int GetWinSize()
  76. {
  77. return _win_size_frame;
  78. }
  79. public AudioChangeState DetectOneFrame(FrameState frameState, int frame_count)
  80. {
  81. _cur_frame_state = (int)FrameState.kFrameStateSil;
  82. if (frameState == FrameState.kFrameStateSpeech)
  83. {
  84. _cur_frame_state = 1;
  85. }
  86. else if (frameState == FrameState.kFrameStateSil)
  87. {
  88. _cur_frame_state = 0;
  89. }
  90. else
  91. {
  92. return AudioChangeState.kChangeStateInvalid;
  93. }
  94. _win_sum -= _win_state[_cur_win_pos];
  95. _win_sum += _cur_frame_state;
  96. _win_state[_cur_win_pos] = _cur_frame_state;
  97. _cur_win_pos = (_cur_win_pos + 1) % _win_size_frame;
  98. if (_pre_frame_state == (int)FrameState.kFrameStateSil && _win_sum >= _sil_to_speech_frmcnt_thres)
  99. {
  100. _pre_frame_state = (int)FrameState.kFrameStateSpeech;
  101. return AudioChangeState.kChangeStateSil2Speech;
  102. }
  103. if (_pre_frame_state == (int)FrameState.kFrameStateSpeech && _win_sum <= _speech_to_sil_frmcnt_thres)
  104. {
  105. _pre_frame_state = (int)FrameState.kFrameStateSil;
  106. return AudioChangeState.kChangeStateSpeech2Sil;
  107. }
  108. if (_pre_frame_state == (int)FrameState.kFrameStateSil)
  109. {
  110. return AudioChangeState.kChangeStateSil2Sil;
  111. }
  112. if (_pre_frame_state == (int)FrameState.kFrameStateSpeech)
  113. {
  114. return AudioChangeState.kChangeStateSpeech2Speech;
  115. }
  116. return AudioChangeState.kChangeStateInvalid;
  117. }
  118. private int FrameSizeMs()
  119. {
  120. return _frame_size_ms;
  121. }
  122. }
  123. }