diff --git a/src/transformers/models/encodec/modeling_encodec.py b/src/transformers/models/encodec/modeling_encodec.py index bd56661b1980..5a299b601b47 100644 --- a/src/transformers/models/encodec/modeling_encodec.py +++ b/src/transformers/models/encodec/modeling_encodec.py @@ -111,14 +111,27 @@ def __init__( elif self.norm_type == "time_group_norm": self.norm = nn.GroupNorm(1, out_channels) - @staticmethod + kernel_size = self.conv.kernel_size[0] + stride = torch.tensor(self.conv.stride[0], dtype=torch.int64) + dilation = self.conv.dilation[0] + + # Effective kernel size with dilations. + kernel_size = torch.tensor((kernel_size - 1) * dilation + 1, dtype=torch.int64) + + self.register_buffer("stride", stride, persistent=False) + self.register_buffer("kernel_size", kernel_size, persistent=False) + self.register_buffer("padding_total", torch.tensor(kernel_size - stride, dtype=torch.int64), persistent=False) + def _get_extra_padding_for_conv1d( - hidden_states: torch.Tensor, kernel_size: int, stride: int, padding_total: int = 0 - ) -> int: + self, + hidden_states: torch.Tensor, + ) -> torch.Tensor: """See `pad_for_conv1d`.""" length = hidden_states.shape[-1] - n_frames = (length - kernel_size + padding_total) / stride + 1 - ideal_length = (math.ceil(n_frames) - 1) * stride + (kernel_size - padding_total) + n_frames = (length - self.kernel_size + self.padding_total) / self.stride + 1 + n_frames = torch.ceil(n_frames).to(torch.int64) - 1 + ideal_length = n_frames * self.stride + self.kernel_size - self.padding_total + return ideal_length - length @staticmethod @@ -141,20 +154,15 @@ def _pad1d(hidden_states: torch.Tensor, paddings: Tuple[int, int], mode: str = " return padded[..., :end] def forward(self, hidden_states): - kernel_size = self.conv.kernel_size[0] - stride = self.conv.stride[0] - dilation = self.conv.dilation[0] - kernel_size = (kernel_size - 1) * dilation + 1 # effective kernel size with dilations - padding_total = kernel_size - stride - extra_padding = self._get_extra_padding_for_conv1d(hidden_states, kernel_size, stride, padding_total) + extra_padding = self._get_extra_padding_for_conv1d(hidden_states) if self.causal: # Left padding for causal - hidden_states = self._pad1d(hidden_states, (padding_total, extra_padding), mode=self.pad_mode) + hidden_states = self._pad1d(hidden_states, (self.padding_total, extra_padding), mode=self.pad_mode) else: # Asymmetric padding required for odd strides - padding_right = padding_total // 2 - padding_left = padding_total - padding_right + padding_right = self.padding_total // 2 + padding_left = self.padding_total - padding_right hidden_states = self._pad1d( hidden_states, (padding_left, padding_right + extra_padding), mode=self.pad_mode )