Skip to content

Commit

Permalink
Fix rope theta for OpenLlama (#29893)
Browse files Browse the repository at this point in the history
fix: rope_theta for open llama
  • Loading branch information
jla524 authored Mar 30, 2024
1 parent 5ad7f17 commit 6fd93fe
Show file tree
Hide file tree
Showing 2 changed files with 5 additions and 0 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,8 @@ class OpenLlamaConfig(PretrainedConfig):
relevant if `config.is_decoder=True`.
tie_word_embeddings(`bool`, *optional*, defaults to `False`):
Whether to tie weight embeddings
rope_theta (`float`, *optional*, defaults to 10000.0):
The base period of the RoPE embeddings.
rope_scaling (`Dict`, *optional*):
Dictionary containing the scaling configuration for the RoPE embeddings. Currently supports two scaling
strategies: linear and dynamic. Their scaling factor must be a float greater than 1. The expected format is
Expand Down Expand Up @@ -113,6 +115,7 @@ def __init__(
attention_dropout_prob=0.1,
use_stable_embedding=True,
shared_input_output_embedding=True,
rope_theta=10000.0,
rope_scaling=None,
**kwargs,
):
Expand All @@ -133,6 +136,7 @@ def __init__(
self.attention_dropout_prob = attention_dropout_prob
self.use_stable_embedding = use_stable_embedding
self.shared_input_output_embedding = shared_input_output_embedding
self.rope_theta = rope_theta
self.rope_scaling = rope_scaling
self._rope_scaling_validation()

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -214,6 +214,7 @@ def __init__(self, config: OpenLlamaConfig):
self.head_dim = self.hidden_size // self.num_heads
self.max_position_embeddings = config.max_position_embeddings
self.dropout_prob = config.attention_dropout_prob
self.rope_theta = config.rope_theta

if (self.head_dim * self.num_heads) != self.hidden_size:
raise ValueError(
Expand Down

0 comments on commit 6fd93fe

Please sign in to comment.