From 4f09440fe5444e834ae8a89d70a61729bd7b5b07 Mon Sep 17 00:00:00 2001 From: chi Date: Mon, 30 Oct 2023 07:05:58 +0530 Subject: [PATCH 1/2] Removed the redundant SiLUActivation class and now use nn.functional.silu directly. --- src/transformers/activations.py | 17 ++--------------- 1 file changed, 2 insertions(+), 15 deletions(-) diff --git a/src/transformers/activations.py b/src/transformers/activations.py index 587dc2e59964..e09af668f1b7 100644 --- a/src/transformers/activations.py +++ b/src/transformers/activations.py @@ -137,19 +137,6 @@ def forward(self, input: Tensor) -> Tensor: return 0.5 * input * (1 + torch.tanh(self.precomputed_constant * (input + 0.044715 * torch.pow(input, 3)))) -class SiLUActivation(nn.Module): - """ - See Gaussian Error Linear Units (Hendrycks et al., https://arxiv.org/abs/1606.08415) where the SiLU (Sigmoid Linear - Unit) was originally introduced and coined, and see Sigmoid-Weighted Linear Units for Neural Network Function - Approximation in Reinforcement Learning (Elfwing et al., https://arxiv.org/abs/1702.03118) and Swish: a Self-Gated - Activation Function (Ramachandran et al., https://arxiv.org/abs/1710.05941v1) where the SiLU was experimented with - later. - """ - - def forward(self, input: Tensor) -> Tensor: - return nn.functional.silu(input) - - class MishActivation(nn.Module): """ See Mish: A Self-Regularized Non-Monotonic Activation Function (Misra., https://arxiv.org/abs/1908.08681). Also @@ -226,8 +213,8 @@ def __getitem__(self, key): "relu2": ReLUSquaredActivation, "relu6": nn.ReLU6, "sigmoid": nn.Sigmoid, - "silu": SiLUActivation, - "swish": SiLUActivation, + "silu": nn.functional.silu, + "swish": nn.functional.silu, "tanh": nn.Tanh, } ACT2FN = ClassInstantier(ACT2CLS) From 1a5fa4f3cefb7bacf403fd2a8acb6353351ccc32 Mon Sep 17 00:00:00 2001 From: chi Date: Mon, 30 Oct 2023 07:51:27 +0530 Subject: [PATCH 2/2] I apologize for adding torch.functional.silu. I have replaced it with nn.SiLU. --- src/transformers/activations.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/transformers/activations.py b/src/transformers/activations.py index e09af668f1b7..be26825f4bad 100644 --- a/src/transformers/activations.py +++ b/src/transformers/activations.py @@ -213,8 +213,8 @@ def __getitem__(self, key): "relu2": ReLUSquaredActivation, "relu6": nn.ReLU6, "sigmoid": nn.Sigmoid, - "silu": nn.functional.silu, - "swish": nn.functional.silu, + "silu": nn.SiLU, + "swish": nn.SiLU, "tanh": nn.Tanh, } ACT2FN = ClassInstantier(ACT2CLS)