vad_utils.py 638 B

123456789101112131415161718
  1. import torch
  2. from torch.nn.utils.rnn import pad_sequence
  3. def slice_padding_fbank(speech, speech_lengths, vad_segments):
  4. speech_list = []
  5. speech_lengths_list = []
  6. for i, segment in enumerate(vad_segments):
  7. bed_idx = int(segment[0][0]*16)
  8. end_idx = min(int(segment[0][1]*16), speech_lengths[0])
  9. speech_i = speech[0, bed_idx: end_idx]
  10. speech_lengths_i = end_idx-bed_idx
  11. speech_list.append(speech_i)
  12. speech_lengths_list.append(speech_lengths_i)
  13. feats_pad = pad_sequence(speech_list, batch_first=True, padding_value=0.0)
  14. speech_lengths_pad = torch.Tensor(speech_lengths_list).int()
  15. return feats_pad, speech_lengths_pad