From 0bc4b0b170d8a4a8e1774a1491948a937347fe91 Mon Sep 17 00:00:00 2001 From: Chengcheng Pei Date: Tue, 17 Sep 2024 16:51:23 -0700 Subject: [PATCH 1/2] add back self.max_position_embeddings = config.max_position_embeddings --- src/transformers/models/qwen2/modeling_qwen2.py | 1 + 1 file changed, 1 insertion(+) diff --git a/src/transformers/models/qwen2/modeling_qwen2.py b/src/transformers/models/qwen2/modeling_qwen2.py index d0ea8ef0e376..93a11ab283b9 100644 --- a/src/transformers/models/qwen2/modeling_qwen2.py +++ b/src/transformers/models/qwen2/modeling_qwen2.py @@ -310,6 +310,7 @@ def __init__(self, config: Qwen2Config, layer_idx: Optional[int] = None): self.head_dim = self.hidden_size // self.num_heads self.num_key_value_heads = config.num_key_value_heads self.num_key_value_groups = self.num_heads // self.num_key_value_heads + self.max_position_embeddings = config.max_position_embeddings self.rope_theta = config.rope_theta self.is_causal = True self.attention_dropout = config.attention_dropout From 97ce518368f9130e4f5dea541bae945b3c644dbe Mon Sep 17 00:00:00 2001 From: Chengcheng Pei Date: Tue, 17 Sep 2024 16:57:44 -0700 Subject: [PATCH 2/2] fix-copies --- src/transformers/models/qwen2_moe/modeling_qwen2_moe.py | 1 + 1 file changed, 1 insertion(+) diff --git a/src/transformers/models/qwen2_moe/modeling_qwen2_moe.py b/src/transformers/models/qwen2_moe/modeling_qwen2_moe.py index 6f483e50cde0..825b16dff27e 100644 --- a/src/transformers/models/qwen2_moe/modeling_qwen2_moe.py +++ b/src/transformers/models/qwen2_moe/modeling_qwen2_moe.py @@ -388,6 +388,7 @@ def __init__(self, config: Qwen2MoeConfig, layer_idx: Optional[int] = None): self.head_dim = self.hidden_size // self.num_heads self.num_key_value_heads = config.num_key_value_heads self.num_key_value_groups = self.num_heads // self.num_key_value_heads + self.max_position_embeddings = config.max_position_embeddings self.rope_theta = config.rope_theta self.is_causal = True self.attention_dropout = config.attention_dropout