LLM: fix mpt load_low_bit issue (intel#10075)

* fix * retry * retry
Jasonzzt · Feb 19, 2024 · 3fa67c0 · 3fa67c0
1 parent 49a09fb
commit 3fa67c0
Showing 1 changed file with 5 additions and 1 deletion.
diff --git a/python/llm/src/bigdl/llm/transformers/model.py b/python/llm/src/bigdl/llm/transformers/model.py
@@ -361,7 +361,11 @@ def load_convert(cls, q_k, optimize_model, *args, **kwargs):
                                      cpu_embedding=cpu_embedding, lightweight_bmm=lightweight_bmm,
                                      torch_dtype=kwargs.get("torch_dtype", 'auto'))
         model.config.update({"bigdl_transformers_low_bit": q_k})
-        model.config.update({"tie_word_embeddings": False})
+
+        # enable tie_word_embeddings for MPT
+        # refer to https://huggingface.co/mosaicml/mpt-7b-chat/blob/main/modeling_mpt.py#L232
+        if model.config.architectures[0] != 'MPTForCausalLM':
+            model.config.update({"tie_word_embeddings": False})
 
         # add save_low_bit to pretrained model dynamically
         import types