vad_utils.py 1.1 KB

12345678910111213141516171819202122232425262728293031
  1. import torch
  2. from torch.nn.utils.rnn import pad_sequence
  3. def slice_padding_fbank(speech, speech_lengths, vad_segments):
  4. speech_list = []
  5. speech_lengths_list = []
  6. for i, segment in enumerate(vad_segments):
  7. bed_idx = int(segment[0][0]*16)
  8. end_idx = min(int(segment[0][1]*16), speech_lengths[0])
  9. speech_i = speech[0, bed_idx: end_idx]
  10. speech_lengths_i = end_idx-bed_idx
  11. speech_list.append(speech_i)
  12. speech_lengths_list.append(speech_lengths_i)
  13. feats_pad = pad_sequence(speech_list, batch_first=True, padding_value=0.0)
  14. speech_lengths_pad = torch.Tensor(speech_lengths_list).int()
  15. return feats_pad, speech_lengths_pad
  16. def slice_padding_audio_samples(speech, speech_lengths, vad_segments):
  17. speech_list = []
  18. speech_lengths_list = []
  19. for i, segment in enumerate(vad_segments):
  20. bed_idx = int(segment[0][0] * 16)
  21. end_idx = min(int(segment[0][1] * 16), speech_lengths)
  22. speech_i = speech[bed_idx: end_idx]
  23. speech_lengths_i = end_idx - bed_idx
  24. speech_list.append(speech_i)
  25. speech_lengths_list.append(speech_lengths_i)
  26. return speech_list, speech_lengths_list