支持MiniCPM3-4B导出并使用C++ Tokenizer

TylunasLi · Dec 1, 2024 · e6d4e56 · e6d4e56
1 parent 02de5cb
commit e6d4e56
Show file tree

Hide file tree

Showing 6 changed files with 22 additions and 12 deletions.
diff --git a/docs/models.md b/docs/models.md
@@ -130,6 +130,9 @@
 |  |  |  |  |
 | openbmb/MiniCPM-2B-sft-fp16 | [✔](#其它模型) | [✔](#minicpm模型导出) |  |
 | openbmb/MiniCPM-2B-dpo-fp16 | [✔](#其它模型) | [✔](#minicpm模型导出) |  |
+| openbmb/MiniCPM3-4B | [✔](#其它模型) | [✔](#minicpm模型导出) |  |
+|  |  |  |  |
+| microsoft/Phi-3-mini-4k-instruct |  |  | ✔ |
 
 
 ### 加载后转换（两行加速模式）(convert on-the-fly)
@@ -265,6 +268,7 @@ python3 tools/llamalike2flm.py qwen1.5-7b-int4.flm int4 "qwen/Qwen1.5-14B-Chat"
 # 需要先安装MiniCPM环境（transformers >= 4.36.0） 
 # 默认脚本导出iniCPM-2B-dpo-fp16模型
 cd build 
-python tools/minicpm2flm.py minicpm-2b-float16.flm #导出dpo-float16模型
+python tools/minicpm2flm.py minicpm-2b-fp16.flm #导出dpo-float16模型
+python tools/minicpm2flm.py minicpm3-4b-fp16.flm openbmb/MiniCPM3-4B #导出minicpm3-float16模型
 ./main -p minicpm-2b-float16.flm # 执行模型
 ```
diff --git a/src/models/minicpm3.cpp b/src/models/minicpm3.cpp
@@ -80,6 +80,8 @@ namespace fastllm {
         if (this->weight.dicts.find("kv_lora_rank") != this->weight.dicts.end()) {
             this->kv_lora_rank = std::stoi(this->weight.dicts["kv_lora_rank"]);
         }
+        weight.tokenizer.SetSpecialTokens({{"<s>", 2}, {"<s>", 1}, {"<unk>", 0}, {"<|im_start|>", 73441}, {"<|im_end|>", 73440}, {"<|tool_call|>", 73442}, 
+                                          {"<|execute_start|>", 73443}, {"<|execute_end|>", 73444}, {"<|fim_prefix|>", 73445}, {"<|fim_middle|>", 73446}, {"<|fim_suffix|>", 73447}});
     }
 
     int MiniCpm3Model::Forward(const fastllm::Data &inputIds, const fastllm::Data &attentionMask,

diff --git a/tools/fastllm_pytools/hf_model.py b/tools/fastllm_pytools/hf_model.py
@@ -101,8 +101,8 @@ def create(model,
         modelInfo["tokenizer_class"] = tokenizer.name;
     if "rope_scaling" in modelInfo and isinstance(modelInfo["rope_scaling"], builtins.dict):
         rope_scaling = modelInfo.pop("rope_scaling")
-        modelInfo["rope_scaling.type"] = rope_scaling["type"]
-        modelInfo["rope_scaling.factor"] = rope_scaling["factor"]
+        for key, value in rope_scaling.items():
+            modelInfo["rope_scaling." + key] = value
     if eos_id:
         modelInfo["eos_token_id"] = str(eos_id)
 

diff --git a/tools/fastllm_pytools/torch2flm.py b/tools/fastllm_pytools/torch2flm.py
@@ -186,8 +186,8 @@ def tofile(exportPath,
         modelInfo["tokenizer_class"] = tokenizer.name;
     if "rope_scaling" in modelInfo and isinstance(modelInfo["rope_scaling"], builtins.dict):
         rope_scaling = modelInfo.pop("rope_scaling")
-        modelInfo["rope_scaling.type"] = rope_scaling["type"]
-        modelInfo["rope_scaling.factor"] = rope_scaling["factor"]
+        for key, value in rope_scaling.items():
+            modelInfo["rope_scaling." + key] = value
     if eos_id:
         modelInfo["eos_token_id"] = str(eos_id)
 

diff --git a/tools/scripts/chatglm_export.py b/tools/scripts/chatglm_export.py
@@ -3,8 +3,9 @@
 from ftllm import torch2flm
 
 if __name__ == "__main__":
-    tokenizer = AutoTokenizer.from_pretrained("THUDM/chatglm2-6b", trust_remote_code=True)
-    model = AutoModel.from_pretrained("THUDM/chatglm2-6b", trust_remote_code=True)
+    modelNameOrPath = sys.argv[3] if len(sys.argv) >= 4 else 'THUDM/chatglm2-6b'
+    tokenizer = AutoTokenizer.from_pretrained(modelNameOrPath, trust_remote_code=True)
+    model = AutoModel.from_pretrained(modelNameOrPath, trust_remote_code=True)
     model = model.eval()
 
     dtype = sys.argv[2] if len(sys.argv) >= 3 else "float16"

diff --git a/tools/scripts/minicpm2flm.py b/tools/scripts/minicpm2flm.py
@@ -10,10 +10,13 @@
     model = AutoModelForCausalLM.from_pretrained(modelNameOrPath, trust_remote_code=True, torch_dtype=torch.float16)
     model = model.eval()
 
-    model.config.__dict__['model_type'] = 'minicpm'
-
     dtype = sys.argv[2] if len(sys.argv) >= 3 else "float16"
     exportPath = sys.argv[1] if len(sys.argv) >= 2 else "minicpm-2b-" + dtype + ".flm"
-    torch2flm.tofile(exportPath, model, tokenizer, pre_prompt = "<s>", 
-                     user_role = "<用户>", bot_role = "<AI>", 
-                     history_sep = "", dtype = dtype)
+
+    if model.config.architectures == ["MiniCPMForCausalLM"]:
+        model.config.model_type = "minicpm"
+        torch2flm.tofile(exportPath, model, tokenizer, pre_prompt = "<s>", user_role = "<用户>",
+                         bot_role = "<AI>", history_sep = "", dtype = dtype)
+    else:
+        torch2flm.tofile(exportPath, model, tokenizer, pre_prompt="", user_role="<|im_start|>user\n",
+                         bot_role="<|im_end|>\n<|im_start|>assistant\n", history_sep="<|im_end|>\n", eos_id = tokenizer.eos_token_id, dtype = dtype)