From 32b6d1293887bee564143cdd233bc83ac553da14 Mon Sep 17 00:00:00 2001 From: RunningLeon Date: Thu, 4 Jul 2024 20:19:19 +0800 Subject: [PATCH 1/3] update internlm2 --- convert_hf_to_gguf.py | 70 +++++++++++++++++++++++++--------- prompts/chat-with-internlm.txt | 4 ++ 2 files changed, 57 insertions(+), 17 deletions(-) create mode 100644 prompts/chat-with-internlm.txt diff --git a/convert_hf_to_gguf.py b/convert_hf_to_gguf.py index 455eea883b93b..89685d6d90dc3 100755 --- a/convert_hf_to_gguf.py +++ b/convert_hf_to_gguf.py @@ -2145,6 +2145,9 @@ def set_vocab(self): toktype = SentencePieceTokenTypes.UNUSED elif tokenizer.IsByte(token_id): toktype = SentencePieceTokenTypes.BYTE + # take care of ununsed raw token + if piece.startswith('[UNUSED'): + toktype = SentencePieceTokenTypes.UNKNOWN tokens.append(text) scores.append(score) @@ -2160,6 +2163,47 @@ def set_vocab(self): scores.append(-1000.0) toktypes.append(SentencePieceTokenTypes.USER_DEFINED) + chat_eos_token = '<|im_end|>' + chat_eos_token_id = None + + tokenizer_config_file = self.dir_model / 'tokenizer_config.json' + if tokenizer_config_file.is_file(): + with open(tokenizer_config_file, "r", encoding="utf-8") as f: + tokenizer_config_json = json.load(f) + added_tokens_decoder = tokenizer_config_json.get("added_tokens_decoder", {}) + for token_id, foken_data in added_tokens_decoder.items(): + token_id = int(token_id) + token = foken_data["content"] + if token == chat_eos_token: + chat_eos_token_id = token_id + token = token.encode("utf-8") + if toktypes[token_id] != SentencePieceTokenTypes.UNKNOWN: + assert(tokens[token_id] == token) + tokens[token_id] = token + scores[token_id] = -1000.0 + toktypes[token_id] = SentencePieceTokenTypes.USER_DEFINED + if foken_data.get("special"): + toktypes[token_id] = SentencePieceTokenTypes.CONTROL + + tokenizer_file = self.dir_model / 'tokenizer.json' + if tokenizer_file.is_file(): + with open(tokenizer_file, "r", encoding="utf-8") as f: + tokenizer_json = json.load(f) + added_tokens = tokenizer_json.get("added_tokens", []) + for foken_data in added_tokens: + token_id = int(foken_data["id"]) + token = foken_data["content"] + if token == chat_eos_token: + chat_eos_token_id = token_id + token = token.encode("utf-8") + if toktypes[token_id] != SentencePieceTokenTypes.UNKNOWN: + assert(tokens[token_id] == token) + tokens[token_id] = token + scores[token_id] = -1000.0 + toktypes[token_id] = SentencePieceTokenTypes.USER_DEFINED + if foken_data.get("special"): + toktypes[token_id] = SentencePieceTokenTypes.CONTROL + self.gguf_writer.add_tokenizer_model("llama") self.gguf_writer.add_tokenizer_pre("default") self.gguf_writer.add_token_list(tokens) @@ -2169,28 +2213,16 @@ def set_vocab(self): special_vocab = gguf.SpecialVocab(self.dir_model, n_vocab=len(tokens)) old_eos = special_vocab.special_token_ids["eos"] - if "chat" in os.path.basename(self.dir_model.absolute()): + if chat_eos_token_id is not None: # For the chat model, we replace the eos with '<|im_end|>'. # TODO: this is a hack, should be fixed # https://github.com/ggerganov/llama.cpp/pull/6745#issuecomment-2067687048 - special_vocab.special_token_ids["eos"] = self._try_get_sft_eos(tokenizer) - logger.warning(f"Replace eos:{old_eos} with a special token:{special_vocab.special_token_ids['eos']} \ -in chat mode so that the conversation can end normally.") + special_vocab.special_token_ids["eos"] = chat_eos_token_id + logger.warning(f"Replace eos:{old_eos} with a special token:{chat_eos_token_id}" + " in chat mode so that the conversation can end normally.") special_vocab.add_to_gguf(self.gguf_writer) - def _try_get_sft_eos(self, tokenizer): - unused_145_list = tokenizer.Encode('[UNUSED_TOKEN_145]') - im_end_list = tokenizer.Encode('<|im_end|>') - eos_token = None - assert (len(unused_145_list) == 1) ^ (len(im_end_list) == 1) - if len(unused_145_list) == 1: - eos_token = unused_145_list[0] - if len(im_end_list) == 1: - eos_token = im_end_list[0] - assert eos_token - return eos_token - def _hf_permute_qk(self, weights, n_head: int, n_head_kv: int): if n_head_kv is not None and n_head != n_head_kv: n_head = n_head_kv @@ -2209,7 +2241,11 @@ def set_gguf_parameters(self): self.gguf_writer.add_layer_norm_rms_eps(self.hparams["rms_norm_eps"]) self.gguf_writer.add_head_count_kv(self.hparams["num_key_value_heads"]) self.gguf_writer.add_file_type(self.ftype) - + if self.hparams.get("rope_scaling") is not None and "factor" in self.hparams["rope_scaling"]: + if self.hparams["rope_scaling"].get("type") == "linear": + self.gguf_writer.add_rope_scaling_type(gguf.RopeScalingType.LINEAR) + self.gguf_writer.add_rope_scaling_factor(self.hparams["rope_scaling"]["factor"]) + def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iterable[tuple[str, Tensor]]: num_heads = self.hparams["num_attention_heads"] num_kv_heads = self.hparams["num_key_value_heads"] diff --git a/prompts/chat-with-internlm.txt b/prompts/chat-with-internlm.txt new file mode 100644 index 0000000000000..996508a23c215 --- /dev/null +++ b/prompts/chat-with-internlm.txt @@ -0,0 +1,4 @@ +You are an AI assistant whose name is InternLM (书生·浦语).\n- InternLM (书生·浦语) is a conversational language model that is developed by Shanghai AI Laboratory (上海人工智能实验室). It is designed to be helpful, honest, and harmless.\n- InternLM (书生·浦语) can understand and communicate fluently in the language chosen by the user such as English and 中文. + +User: Hello! Who are you? +InternLM: From 32c97de747b42ae518ec0e74e505f6af58879371 Mon Sep 17 00:00:00 2001 From: RunningLeon Date: Fri, 5 Jul 2024 18:25:06 +0800 Subject: [PATCH 2/3] remove unused file --- prompts/chat-with-internlm.txt | 4 ---- 1 file changed, 4 deletions(-) delete mode 100644 prompts/chat-with-internlm.txt diff --git a/prompts/chat-with-internlm.txt b/prompts/chat-with-internlm.txt deleted file mode 100644 index 996508a23c215..0000000000000 --- a/prompts/chat-with-internlm.txt +++ /dev/null @@ -1,4 +0,0 @@ -You are an AI assistant whose name is InternLM (书生·浦语).\n- InternLM (书生·浦语) is a conversational language model that is developed by Shanghai AI Laboratory (上海人工智能实验室). It is designed to be helpful, honest, and harmless.\n- InternLM (书生·浦语) can understand and communicate fluently in the language chosen by the user such as English and 中文. - -User: Hello! Who are you? -InternLM: From 3d04d337caf4eef74b4b6d15b995161132811218 Mon Sep 17 00:00:00 2001 From: RunningLeon Date: Mon, 8 Jul 2024 10:32:57 +0800 Subject: [PATCH 3/3] fix lint --- convert_hf_to_gguf.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/convert_hf_to_gguf.py b/convert_hf_to_gguf.py index 89685d6d90dc3..d9d9be61b438d 100755 --- a/convert_hf_to_gguf.py +++ b/convert_hf_to_gguf.py @@ -2145,7 +2145,7 @@ def set_vocab(self): toktype = SentencePieceTokenTypes.UNUSED elif tokenizer.IsByte(token_id): toktype = SentencePieceTokenTypes.BYTE - # take care of ununsed raw token + # take care of ununsed raw token if piece.startswith('[UNUSED'): toktype = SentencePieceTokenTypes.UNKNOWN @@ -2245,7 +2245,7 @@ def set_gguf_parameters(self): if self.hparams["rope_scaling"].get("type") == "linear": self.gguf_writer.add_rope_scaling_type(gguf.RopeScalingType.LINEAR) self.gguf_writer.add_rope_scaling_factor(self.hparams["rope_scaling"]["factor"]) - + def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iterable[tuple[str, Tensor]]: num_heads = self.hparams["num_attention_heads"] num_kv_heads = self.hparams["num_key_value_heads"]