Просмотр исходного кода

Fix some issue to make batch inference easy for predictor and decoder.

TeaPoly 3 лет назад
Родитель
Сommit
1b9ac4f7a2
2 измененных файлов с 2 добавлено и 2 удалено
  1. 1 1
      funasr/bin/asr_inference_paraformer.py
  2. 1 1
      funasr/models/predictor/cif.py

+ 1 - 1
funasr/bin/asr_inference_paraformer.py

@@ -189,7 +189,7 @@ class Speech2Text:
 
 
         predictor_outs = self.asr_model.calc_predictor(enc, enc_len)
         predictor_outs = self.asr_model.calc_predictor(enc, enc_len)
         pre_acoustic_embeds, pre_token_length = predictor_outs[0], predictor_outs[1]
         pre_acoustic_embeds, pre_token_length = predictor_outs[0], predictor_outs[1]
-        pre_token_length = torch.tensor([pre_acoustic_embeds.size(1)], device=pre_acoustic_embeds.device)
+        pre_token_length = pre_token_length.long()
         decoder_outs = self.asr_model.cal_decoder_with_predictor(enc, enc_len, pre_acoustic_embeds, pre_token_length)
         decoder_outs = self.asr_model.cal_decoder_with_predictor(enc, enc_len, pre_acoustic_embeds, pre_token_length)
         decoder_out, ys_pad_lens = decoder_outs[0], decoder_outs[1]
         decoder_out, ys_pad_lens = decoder_outs[0], decoder_outs[1]
 
 

+ 1 - 1
funasr/models/predictor/cif.py

@@ -147,7 +147,7 @@ class CifPredictorV2(nn.Module):
         b, t, d = hidden.size()
         b, t, d = hidden.size()
         tail_threshold = self.tail_threshold
         tail_threshold = self.tail_threshold
         tail_threshold = torch.tensor([tail_threshold], dtype=alphas.dtype).to(alphas.device)
         tail_threshold = torch.tensor([tail_threshold], dtype=alphas.dtype).to(alphas.device)
-        tail_threshold = torch.reshape(tail_threshold, (1, 1))
+        tail_threshold = tail_threshold.unsqueeze(0).repeat(b, 1)
         alphas = torch.cat([alphas, tail_threshold], dim=1)
         alphas = torch.cat([alphas, tail_threshold], dim=1)
         zeros = torch.zeros((b, 1, d), dtype=hidden.dtype).to(hidden.device)
         zeros = torch.zeros((b, 1, d), dtype=hidden.dtype).to(hidden.device)
         hidden = torch.cat([hidden, zeros], dim=1)
         hidden = torch.cat([hidden, zeros], dim=1)