Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix use_parallel_residual and qkv_bias for StableLM GGUF config extraction #34450

Merged
merged 3 commits into from
Nov 5, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 11 additions & 0 deletions src/transformers/modeling_gguf_pytorch_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -106,6 +106,17 @@ def load_gguf_checkpoint(gguf_checkpoint_path, return_tensors=False):
if "qwen2moe" in architecture:
updated_architecture = "qwen2_moe"

# For stablelm architecture, we need to set qkv_bias and use_parallel_residual from tensors
# If `qkv_bias=True`, qkv_proj with bias will be present in the tensors
# If `use_parallel_residual=False`, ffn_norm will be present in the tensors
if "stablelm" in architecture:
attn_bias_name = {"attn_q.bias", "attn_k.bias", "attn_v.bias"}
ffn_norm_name = "ffn_norm"
qkv_bias = any(bias_name in tensor.name for tensor in reader.tensors for bias_name in attn_bias_name)
use_parallel_residual = any(ffn_norm_name in tensor.name for tensor in reader.tensors)
parsed_parameters["config"]["qkv_bias"] = qkv_bias
parsed_parameters["config"]["use_parallel_residual"] = not use_parallel_residual

model_size = ""
# extract the number of params from file name as architectures can differ ;
# eg. for falcon : `...falcon-7b-...`
Expand Down
8 changes: 0 additions & 8 deletions tests/quantization/ggml/test_ggml.py
Original file line number Diff line number Diff line change
Expand Up @@ -673,10 +673,6 @@ def test_stablelm_fp16(self):
self.stablelm2_model_id,
gguf_file=self.fp16_stablelm2_model_id,
torch_dtype=torch.float16,
# for precise comparison it is required to use the original model config
# as quantized one is different in parameters: use_parallel_residual and use_qkv_bias
# and it highly influences on the output results
config=original_model.config,
)

tokenizer = AutoTokenizer.from_pretrained(self.stablelm2_model_id, gguf_file=self.fp16_stablelm2_model_id)
Expand All @@ -703,10 +699,6 @@ def test_stablelm_weights_conversion_fp16(self):
gguf_file=self.fp16_stablelm2_model_id,
device_map="auto",
torch_dtype=torch.float16,
# for precise comparison it is required to use the original model config
# as quantized one is different in parameters: use_parallel_residual and use_qkv_bias
# and it highly influences on the output results
config=original_model.config,
)

converted_state_dict = converted_model.state_dict()
Expand Down
Loading