From aadf16a75fc2fb2552e9b24916e1ef55af08a3bf Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Fernando=20P=C3=A9rez-Garc=C3=ADa?= Date: Tue, 20 Feb 2024 10:56:13 +0000 Subject: [PATCH] Fix drop path not being used --- src/transformers/models/dinov2/modeling_dinov2.py | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/src/transformers/models/dinov2/modeling_dinov2.py b/src/transformers/models/dinov2/modeling_dinov2.py index ddf70f08b750..accdf0a9b23b 100644 --- a/src/transformers/models/dinov2/modeling_dinov2.py +++ b/src/transformers/models/dinov2/modeling_dinov2.py @@ -380,7 +380,7 @@ def __init__(self, config: Dinov2Config) -> None: self.norm1 = nn.LayerNorm(config.hidden_size, eps=config.layer_norm_eps) self.attention = Dinov2Attention(config) self.layer_scale1 = Dinov2LayerScale(config) - self.drop_path1 = Dinov2DropPath(config.drop_path_rate) if config.drop_path_rate > 0.0 else nn.Identity() + self.drop_path = Dinov2DropPath(config.drop_path_rate) if config.drop_path_rate > 0.0 else nn.Identity() self.norm2 = nn.LayerNorm(config.hidden_size, eps=config.layer_norm_eps) @@ -389,7 +389,6 @@ def __init__(self, config: Dinov2Config) -> None: else: self.mlp = Dinov2MLP(config) self.layer_scale2 = Dinov2LayerScale(config) - self.drop_path2 = Dinov2DropPath(config.drop_path_rate) if config.drop_path_rate > 0.0 else nn.Identity() def forward( self, @@ -408,7 +407,7 @@ def forward( outputs = self_attention_outputs[1:] # add self attentions if we output attention weights # first residual connection - hidden_states = attention_output + hidden_states + hidden_states = self.drop_path(attention_output) + hidden_states # in Dinov2, layernorm is also applied after self-attention layer_output = self.norm2(hidden_states) @@ -416,7 +415,7 @@ def forward( layer_output = self.layer_scale2(layer_output) # second residual connection - layer_output = layer_output + hidden_states + layer_output = self.drop_path(layer_output) + hidden_states outputs = (layer_output,) + outputs