From cc50e78fbe0928c9b2073503fe70174f143efede Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Stanis=C5=82aw=20Szymczyk?= <sszymczy@gmail.com>
Date: Tue, 30 Jul 2024 16:57:47 +0200
Subject: [PATCH 1/4] llama-vocab, llama : handle <|eom_id|> Llama-3.1 token

---
 src/llama-vocab.cpp |  7 ++++++-
 src/llama-vocab.h   |  2 ++
 src/llama.cpp       | 17 +++++++++++++++++
 3 files changed, 25 insertions(+), 1 deletion(-)

diff --git a/src/llama-vocab.cpp b/src/llama-vocab.cpp
index 133094904c2d2..9be076f6d7c52 100644
--- a/src/llama-vocab.cpp
+++ b/src/llama-vocab.cpp
@@ -1444,7 +1444,8 @@ llama_token_attr llama_token_get_attr_impl(const struct llama_vocab & vocab, lla
 bool llama_token_is_eog_impl(const struct llama_vocab & vocab, llama_token token) {
     return token != -1 && (
         token == llama_token_eos_impl(vocab) ||
-        token == llama_token_eot_impl(vocab)
+        token == llama_token_eot_impl(vocab) ||
+        token == llama_token_eom_impl(vocab)
     );
 }
 
@@ -1500,6 +1501,10 @@ llama_token llama_token_eot_impl(const struct llama_vocab & vocab) {
     return vocab.special_eot_id;
 }
 
+llama_token llama_token_eom_impl(const struct llama_vocab & vocab) {
+    return vocab.special_eom_id;
+}
+
 int32_t llama_tokenize_impl(
     const struct llama_vocab & vocab,
                   const char * text,
diff --git a/src/llama-vocab.h b/src/llama-vocab.h
index 30b565d55dad5..7adfc16da3af3 100644
--- a/src/llama-vocab.h
+++ b/src/llama-vocab.h
@@ -45,6 +45,7 @@ struct llama_vocab {
     id special_suffix_id = -1;
     id special_middle_id = -1;
     id special_eot_id    = -1; // TODO: move above after "eos_id", and here add "file separator" token
+    id special_eom_id    = -1;
 
     // tokenizer flags
     bool tokenizer_add_space_prefix = false;
@@ -101,6 +102,7 @@ llama_token llama_token_prefix_impl(const struct llama_vocab & vocab);
 llama_token llama_token_middle_impl(const struct llama_vocab & vocab);
 llama_token llama_token_suffix_impl(const struct llama_vocab & vocab);
 llama_token llama_token_eot_impl   (const struct llama_vocab & vocab);
+llama_token llama_token_eom_impl   (const struct llama_vocab & vocab);
 
 int32_t llama_tokenize_impl(
         const struct llama_vocab & vocab,
diff --git a/src/llama.cpp b/src/llama.cpp
index a207451f58507..e37bd5c14e433 100644
--- a/src/llama.cpp
+++ b/src/llama.cpp
@@ -362,6 +362,7 @@ enum llm_kv {
     LLM_KV_TOKENIZER_SUFFIX_ID,
     LLM_KV_TOKENIZER_MIDDLE_ID,
     LLM_KV_TOKENIZER_EOT_ID,
+    LLM_KV_TOKENIZER_EOM_ID,
 
     LLM_KV_ADAPTER_TYPE,
     LLM_KV_ADAPTER_LORA_ALPHA,
@@ -459,6 +460,7 @@ static const std::map<llm_kv, const char *> LLM_KV_NAMES = {
     { LLM_KV_TOKENIZER_SUFFIX_ID,            "tokenizer.ggml.suffix_token_id"          },
     { LLM_KV_TOKENIZER_MIDDLE_ID,            "tokenizer.ggml.middle_token_id"          },
     { LLM_KV_TOKENIZER_EOT_ID,               "tokenizer.ggml.eot_token_id"             },
+    { LLM_KV_TOKENIZER_EOM_ID,               "tokenizer.ggml.eom_token_id"             },
 
     { LLM_KV_ADAPTER_TYPE,                  "adapter.type"       },
     { LLM_KV_ADAPTER_LORA_ALPHA,            "adapter.lora.alpha" },
@@ -5585,6 +5587,7 @@ static void llm_load_vocab(
             { LLM_KV_TOKENIZER_SUFFIX_ID, vocab.special_suffix_id },
             { LLM_KV_TOKENIZER_MIDDLE_ID, vocab.special_middle_id },
             { LLM_KV_TOKENIZER_EOT_ID,    vocab.special_eot_id    },
+            { LLM_KV_TOKENIZER_EOM_ID,    vocab.special_eom_id    },
         };
 
         for (const auto & it : special_token_types) {
@@ -5637,6 +5640,20 @@ static void llm_load_vocab(
                 }
             }
         }
+
+        // find EOM token: "<|eom_id|>"
+        //
+        // TODO: convert scripts should provide this token through the KV metadata LLAMA_KV_TOKENIZER_EOM_ID
+        //       for now, we apply this workaround to find the EOM token based on its text
+        if (vocab.special_eom_id == -1) {
+            for (const auto & t : vocab.token_to_id) {
+                if (t.first == "<|eom_id|>") {
+                    vocab.special_eom_id = t.second;
+                    break;
+                }
+            }
+        }
+
     }
 
     // build special tokens cache

From f10b0e2c39e03ec3e4abead1573dcf46575f3e82 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Stanis=C5=82aw=20Szymczyk?= <sszymczy@gmail.com>
Date: Sun, 4 Aug 2024 20:22:48 +0200
Subject: [PATCH 2/4] gguf-py : add constants and method related to <|eom_id|>
 token

---
 gguf-py/gguf/constants.py   | 2 ++
 gguf-py/gguf/gguf_writer.py | 3 +++
 2 files changed, 5 insertions(+)

diff --git a/gguf-py/gguf/constants.py b/gguf-py/gguf/constants.py
index e343c2ef1659a..59ffd92ea00cc 100644
--- a/gguf-py/gguf/constants.py
+++ b/gguf-py/gguf/constants.py
@@ -161,6 +161,7 @@ class Tokenizer:
         SUFFIX_ID            = "tokenizer.ggml.suffix_token_id"
         MIDDLE_ID            = "tokenizer.ggml.middle_token_id"
         EOT_ID               = "tokenizer.ggml.eot_token_id"
+        EOM_ID               = "tokenizer.ggml.eom_token_id"
 
     class Adapter:
         TYPE       = "adapter.type"
@@ -1327,3 +1328,4 @@ def get_type(val: Any) -> GGUFValueType:
 KEY_TOKENIZER_SUFFIX_ID  = Keys.Tokenizer.SUFFIX_ID
 KEY_TOKENIZER_MIDDLE_ID  = Keys.Tokenizer.MIDDLE_ID
 KEY_TOKENIZER_EOT_ID     = Keys.Tokenizer.EOT_ID
+KEY_TOKENIZER_EOM_ID     = Keys.Tokenizer.EOM_ID
diff --git a/gguf-py/gguf/gguf_writer.py b/gguf-py/gguf/gguf_writer.py
index ba6f53cda25a1..29b378f6d3ebf 100644
--- a/gguf-py/gguf/gguf_writer.py
+++ b/gguf-py/gguf/gguf_writer.py
@@ -826,6 +826,9 @@ def add_middle_token_id(self, id: int) -> None:
     def add_eot_token_id(self, id: int) -> None:
         self.add_uint32(Keys.Tokenizer.EOT_ID, id)
 
+    def add_eom_token_id(self, id: int) -> None:
+        self.add_uint32(Keys.Tokenizer.EOM_ID, id)
+
     def _pack(self, fmt: str, value: Any, skip_pack_prefix: bool = False) -> bytes:
         pack_prefix = ''
         if not skip_pack_prefix:

From 0b7211387eccc6e93f2ea5e66c2dc02ae155ab67 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Stanis=C5=82aw=20Szymczyk?= <sszymczy@gmail.com>
Date: Sun, 4 Aug 2024 20:47:47 +0200
Subject: [PATCH 3/4] llama : Use token_to_id map find() method instead of
 iterating over all tokens.

---
 src/llama.cpp | 8 +++-----
 1 file changed, 3 insertions(+), 5 deletions(-)

diff --git a/src/llama.cpp b/src/llama.cpp
index 132a6ea339e13..d5abe3f58259a 100644
--- a/src/llama.cpp
+++ b/src/llama.cpp
@@ -5647,11 +5647,9 @@ static void llm_load_vocab(
         // TODO: convert scripts should provide this token through the KV metadata LLAMA_KV_TOKENIZER_EOM_ID
         //       for now, we apply this workaround to find the EOM token based on its text
         if (vocab.special_eom_id == -1) {
-            for (const auto & t : vocab.token_to_id) {
-                if (t.first == "<|eom_id|>") {
-                    vocab.special_eom_id = t.second;
-                    break;
-                }
+            const auto & t = vocab.token_to_id.find("<|eom_id|>");
+            if (t != vocab.token_to_id.end()) {
+                vocab.special_eom_id = t->second;
             }
         }
 

From 5efd8264264289198d19bc812b5ce4d8f42fe520 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Stanis=C5=82aw=20Szymczyk?= <sszymczy@gmail.com>
Date: Sun, 4 Aug 2024 21:14:43 +0200
Subject: [PATCH 4/4] llama : whitespace formatting

---
 src/llama.cpp | 1 -
 1 file changed, 1 deletion(-)

diff --git a/src/llama.cpp b/src/llama.cpp
index d5abe3f58259a..592954053ea00 100644
--- a/src/llama.cpp
+++ b/src/llama.cpp
@@ -5652,7 +5652,6 @@ static void llm_load_vocab(
                 vocab.special_eom_id = t->second;
             }
         }
-
     }
 
     // build special tokens cache