@@ -72,6 +72,7 @@ class SANMEncoder(nn.Module):
else:
xs_pad = self.embed(speech)
+ # xs_pad = xs_pad / scale
encoder_outs = self.model.encoders0(xs_pad, mask)
xs_pad, masks = encoder_outs[0], encoder_outs[1]
@@ -24,6 +24,7 @@ class EncoderLayerSANM(nn.Module):
residual = x
x = self.norm1(x)
x = self.self_attn(x, mask)
+ # x = x / scale
if self.in_size == self.size:
x = x + residual