From ac7ac8799c114e056a9a5897254068195778724d Mon Sep 17 00:00:00 2001 From: Pedro Cuenca Date: Tue, 13 Feb 2024 01:28:26 +0100 Subject: [PATCH] Fix bos and eos token ids in the model configuration (#3) --- src/transformers/models/gemma/configuration_gemma.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/transformers/models/gemma/configuration_gemma.py b/src/transformers/models/gemma/configuration_gemma.py index ef40a1a9a14f..6b759cab3e8c 100644 --- a/src/transformers/models/gemma/configuration_gemma.py +++ b/src/transformers/models/gemma/configuration_gemma.py @@ -74,9 +74,9 @@ class GemmaConfig(PretrainedConfig): relevant if `config.is_decoder=True`. pad_token_id (`int`, *optional*): Padding token id. - bos_token_id (`int`, *optional*, defaults to 1): + bos_token_id (`int`, *optional*, defaults to 2): Beginning of stream token id. - eos_token_id (`int`, *optional*, defaults to 2): + eos_token_id (`int`, *optional*, defaults to 1): End of stream token id. pretraining_tp (`int`, *optional*, defaults to 1): Experimental feature. Tensor parallelism rank used during pretraining. Please refer to [this @@ -131,8 +131,8 @@ def __init__( rms_norm_eps=1e-6, use_cache=True, pad_token_id=None, - bos_token_id=1, - eos_token_id=2, + bos_token_id=2, + eos_token_id=1, tie_word_embeddings=True, rope_theta=10000.0, rope_scaling=None,