From 4991216841968e11b78e16379c42e9fc1011db88 Mon Sep 17 00:00:00 2001
From: Chi <iamchi@skiff.com>
Date: Thu, 2 Nov 2023 23:43:57 +0530
Subject: [PATCH] Removed the redundant SiLUActivation class. (#27136)

* Removed the redundant SiLUActivation class and now use nn.functional.silu directly.

* I apologize for adding torch.functional.silu. I have replaced it with nn.SiLU.
---
 src/transformers/activations.py | 17 ++---------------
 1 file changed, 2 insertions(+), 15 deletions(-)

diff --git a/src/transformers/activations.py b/src/transformers/activations.py
index 587dc2e59964..be26825f4bad 100644
--- a/src/transformers/activations.py
+++ b/src/transformers/activations.py
@@ -137,19 +137,6 @@ def forward(self, input: Tensor) -> Tensor:
         return 0.5 * input * (1 + torch.tanh(self.precomputed_constant * (input + 0.044715 * torch.pow(input, 3))))
 
 
-class SiLUActivation(nn.Module):
-    """
-    See Gaussian Error Linear Units (Hendrycks et al., https://arxiv.org/abs/1606.08415) where the SiLU (Sigmoid Linear
-    Unit) was originally introduced and coined, and see Sigmoid-Weighted Linear Units for Neural Network Function
-    Approximation in Reinforcement Learning (Elfwing et al., https://arxiv.org/abs/1702.03118) and Swish: a Self-Gated
-    Activation Function (Ramachandran et al., https://arxiv.org/abs/1710.05941v1) where the SiLU was experimented with
-    later.
-    """
-
-    def forward(self, input: Tensor) -> Tensor:
-        return nn.functional.silu(input)
-
-
 class MishActivation(nn.Module):
     """
     See Mish: A Self-Regularized Non-Monotonic Activation Function (Misra., https://arxiv.org/abs/1908.08681). Also
@@ -226,8 +213,8 @@ def __getitem__(self, key):
     "relu2": ReLUSquaredActivation,
     "relu6": nn.ReLU6,
     "sigmoid": nn.Sigmoid,
-    "silu": SiLUActivation,
-    "swish": SiLUActivation,
+    "silu": nn.SiLU,
+    "swish": nn.SiLU,
     "tanh": nn.Tanh,
 }
 ACT2FN = ClassInstantier(ACT2CLS)