From 90297a406681358b823b152dd77406e3340dcd28 Mon Sep 17 00:00:00 2001 From: fxmarty <9808326+fxmarty@users.noreply.github.com> Date: Wed, 27 Mar 2024 17:25:54 +0100 Subject: [PATCH 1/4] fix encodec onnx export for musicgen --- .../models/encodec/modeling_encodec.py | 34 +++++++++++-------- 1 file changed, 19 insertions(+), 15 deletions(-) diff --git a/src/transformers/models/encodec/modeling_encodec.py b/src/transformers/models/encodec/modeling_encodec.py index bd56661b1980..f4300e540f73 100644 --- a/src/transformers/models/encodec/modeling_encodec.py +++ b/src/transformers/models/encodec/modeling_encodec.py @@ -111,15 +111,24 @@ def __init__( elif self.norm_type == "time_group_norm": self.norm = nn.GroupNorm(1, out_channels) - @staticmethod + kernel_size = self.conv.kernel_size[0] + stride = torch.tensor(self.conv.stride[0]) + dilation = self.conv.dilation[0] + kernel_size = torch.tensor((kernel_size - 1) * dilation + 1) # effective kernel size with dilations + + self.register_buffer("stride", stride, persistent=False) + self.register_buffer("kernel_size", kernel_size, persistent=False) + self.register_buffer("padding_total", torch.tensor(kernel_size - stride), persistent=False) + def _get_extra_padding_for_conv1d( - hidden_states: torch.Tensor, kernel_size: int, stride: int, padding_total: int = 0 - ) -> int: + self, hidden_states: torch.Tensor, + ) -> torch.Tensor: """See `pad_for_conv1d`.""" length = hidden_states.shape[-1] - n_frames = (length - kernel_size + padding_total) / stride + 1 - ideal_length = (math.ceil(n_frames) - 1) * stride + (kernel_size - padding_total) - return ideal_length - length + n_frames = (length - self.kernel_size + self.padding_total) / self.stride + 1 + ideal_length = ((torch.ceil(n_frames) - 1) * self.stride + (self.kernel_size - self.padding_total)) / 1. # Fixes Cannot input a tensor of type Float as an integral argument + return (ideal_length - length).to(torch.int64) + @staticmethod def _pad1d(hidden_states: torch.Tensor, paddings: Tuple[int, int], mode: str = "zero", value: float = 0.0): @@ -141,20 +150,15 @@ def _pad1d(hidden_states: torch.Tensor, paddings: Tuple[int, int], mode: str = " return padded[..., :end] def forward(self, hidden_states): - kernel_size = self.conv.kernel_size[0] - stride = self.conv.stride[0] - dilation = self.conv.dilation[0] - kernel_size = (kernel_size - 1) * dilation + 1 # effective kernel size with dilations - padding_total = kernel_size - stride - extra_padding = self._get_extra_padding_for_conv1d(hidden_states, kernel_size, stride, padding_total) + extra_padding = self._get_extra_padding_for_conv1d(hidden_states) if self.causal: # Left padding for causal - hidden_states = self._pad1d(hidden_states, (padding_total, extra_padding), mode=self.pad_mode) + hidden_states = self._pad1d(hidden_states, (self.padding_total, extra_padding), mode=self.pad_mode) else: # Asymmetric padding required for odd strides - padding_right = padding_total // 2 - padding_left = padding_total - padding_right + padding_right = self.padding_total // 2 + padding_left = self.padding_total - padding_right hidden_states = self._pad1d( hidden_states, (padding_left, padding_right + extra_padding), mode=self.pad_mode ) From 1ec46d796c88a801895cb3d5ba46c66d3c4ba2f1 Mon Sep 17 00:00:00 2001 From: fxmarty <9808326+fxmarty@users.noreply.github.com> Date: Wed, 27 Mar 2024 17:33:58 +0100 Subject: [PATCH 2/4] simplification --- src/transformers/models/encodec/modeling_encodec.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/src/transformers/models/encodec/modeling_encodec.py b/src/transformers/models/encodec/modeling_encodec.py index f4300e540f73..aab2b2c968f4 100644 --- a/src/transformers/models/encodec/modeling_encodec.py +++ b/src/transformers/models/encodec/modeling_encodec.py @@ -112,13 +112,13 @@ def __init__( self.norm = nn.GroupNorm(1, out_channels) kernel_size = self.conv.kernel_size[0] - stride = torch.tensor(self.conv.stride[0]) + stride = torch.tensor(self.conv.stride[0], dtype=torch.int64) dilation = self.conv.dilation[0] - kernel_size = torch.tensor((kernel_size - 1) * dilation + 1) # effective kernel size with dilations + kernel_size = torch.tensor((kernel_size - 1) * dilation + 1, dtype=torch.int64) # effective kernel size with dilations self.register_buffer("stride", stride, persistent=False) self.register_buffer("kernel_size", kernel_size, persistent=False) - self.register_buffer("padding_total", torch.tensor(kernel_size - stride), persistent=False) + self.register_buffer("padding_total", torch.tensor(kernel_size - stride, dtype=torch.int64), persistent=False) def _get_extra_padding_for_conv1d( self, hidden_states: torch.Tensor, @@ -126,8 +126,8 @@ def _get_extra_padding_for_conv1d( """See `pad_for_conv1d`.""" length = hidden_states.shape[-1] n_frames = (length - self.kernel_size + self.padding_total) / self.stride + 1 - ideal_length = ((torch.ceil(n_frames) - 1) * self.stride + (self.kernel_size - self.padding_total)) / 1. # Fixes Cannot input a tensor of type Float as an integral argument - return (ideal_length - length).to(torch.int64) + ideal_length = ((torch.ceil(n_frames).to(torch.int64) - 1) * self.stride + (self.kernel_size - self.padding_total)) + return ideal_length - length @staticmethod From 63a26419bf569ce0d0dd812f57ff0654cef9aa2f Mon Sep 17 00:00:00 2001 From: fxmarty <9808326+fxmarty@users.noreply.github.com> Date: Wed, 27 Mar 2024 18:14:16 +0100 Subject: [PATCH 3/4] fix quality --- src/transformers/models/encodec/modeling_encodec.py | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/src/transformers/models/encodec/modeling_encodec.py b/src/transformers/models/encodec/modeling_encodec.py index aab2b2c968f4..aca7cf1ae568 100644 --- a/src/transformers/models/encodec/modeling_encodec.py +++ b/src/transformers/models/encodec/modeling_encodec.py @@ -114,22 +114,26 @@ def __init__( kernel_size = self.conv.kernel_size[0] stride = torch.tensor(self.conv.stride[0], dtype=torch.int64) dilation = self.conv.dilation[0] - kernel_size = torch.tensor((kernel_size - 1) * dilation + 1, dtype=torch.int64) # effective kernel size with dilations + kernel_size = torch.tensor( + (kernel_size - 1) * dilation + 1, dtype=torch.int64 + ) # effective kernel size with dilations self.register_buffer("stride", stride, persistent=False) self.register_buffer("kernel_size", kernel_size, persistent=False) self.register_buffer("padding_total", torch.tensor(kernel_size - stride, dtype=torch.int64), persistent=False) def _get_extra_padding_for_conv1d( - self, hidden_states: torch.Tensor, + self, + hidden_states: torch.Tensor, ) -> torch.Tensor: """See `pad_for_conv1d`.""" length = hidden_states.shape[-1] n_frames = (length - self.kernel_size + self.padding_total) / self.stride + 1 - ideal_length = ((torch.ceil(n_frames).to(torch.int64) - 1) * self.stride + (self.kernel_size - self.padding_total)) + ideal_length = (torch.ceil(n_frames).to(torch.int64) - 1) * self.stride + ( + self.kernel_size - self.padding_total + ) return ideal_length - length - @staticmethod def _pad1d(hidden_states: torch.Tensor, paddings: Tuple[int, int], mode: str = "zero", value: float = 0.0): """Tiny wrapper around torch.nn.functional.pad, just to allow for reflect padding on small input. From b81c2544b6927e3f16e12479e52b818f3e4df5bf Mon Sep 17 00:00:00 2001 From: fxmarty <9808326+fxmarty@users.noreply.github.com> Date: Wed, 27 Mar 2024 18:17:11 +0100 Subject: [PATCH 4/4] better style --- src/transformers/models/encodec/modeling_encodec.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/src/transformers/models/encodec/modeling_encodec.py b/src/transformers/models/encodec/modeling_encodec.py index aca7cf1ae568..5a299b601b47 100644 --- a/src/transformers/models/encodec/modeling_encodec.py +++ b/src/transformers/models/encodec/modeling_encodec.py @@ -114,9 +114,9 @@ def __init__( kernel_size = self.conv.kernel_size[0] stride = torch.tensor(self.conv.stride[0], dtype=torch.int64) dilation = self.conv.dilation[0] - kernel_size = torch.tensor( - (kernel_size - 1) * dilation + 1, dtype=torch.int64 - ) # effective kernel size with dilations + + # Effective kernel size with dilations. + kernel_size = torch.tensor((kernel_size - 1) * dilation + 1, dtype=torch.int64) self.register_buffer("stride", stride, persistent=False) self.register_buffer("kernel_size", kernel_size, persistent=False) @@ -129,9 +129,9 @@ def _get_extra_padding_for_conv1d( """See `pad_for_conv1d`.""" length = hidden_states.shape[-1] n_frames = (length - self.kernel_size + self.padding_total) / self.stride + 1 - ideal_length = (torch.ceil(n_frames).to(torch.int64) - 1) * self.stride + ( - self.kernel_size - self.padding_total - ) + n_frames = torch.ceil(n_frames).to(torch.int64) - 1 + ideal_length = n_frames * self.stride + self.kernel_size - self.padding_total + return ideal_length - length @staticmethod