From dd15c0f93d3e416a4da0e4328024d343c7e1452f Mon Sep 17 00:00:00 2001
From: FFengIll <fengyouzheng@gmail.com>
Date: Thu, 14 Sep 2023 18:55:47 +0800
Subject: [PATCH 1/4] bugfix: `type_vocab_size` is also a hparam (can not use
 const as 2).

---
 bert.cpp | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/bert.cpp b/bert.cpp
index 2e033ba..d2dc0bd 100644
--- a/bert.cpp
+++ b/bert.cpp
@@ -23,6 +23,7 @@ struct bert_hparams
     int32_t n_intermediate = 1536;
     int32_t n_head = 12;
     int32_t n_layer = 6;
+    int32_t n_vocab_size = 2;
     int32_t f16 = 1;
 };
 
@@ -489,11 +490,13 @@ struct bert_ctx * bert_load_from_file(const char *fname)
         const int n_intermediate = hparams.n_intermediate;
         const int n_max_tokens = hparams.n_max_tokens;
         const int n_vocab = hparams.n_vocab;
+        const int n_vocab_size = hparams.n_vocab_size;
+        
 
         model.layers.resize(n_layer);
 
         model.word_embeddings = ggml_new_tensor_2d(ctx, wtype, n_embd, n_vocab);
-        model.token_type_embeddings = ggml_new_tensor_2d(ctx, wtype, n_embd, 2);
+        model.token_type_embeddings = ggml_new_tensor_2d(ctx, wtype, n_embd, n_vocab_size);
         model.position_embeddings = ggml_new_tensor_2d(ctx, wtype, n_embd, n_max_tokens);
 
         model.ln_e_w = ggml_new_tensor_1d(ctx, GGML_TYPE_F32, n_embd);

From a3c8548ec16213aea2159beb84bd355cfcd32b97 Mon Sep 17 00:00:00 2001
From: FFengIll <fengyouzheng@gmail.com>
Date: Thu, 14 Sep 2023 18:56:09 +0800
Subject: [PATCH 2/4] bugfix: add `type_vocab_size`.

---
 models/convert-to-ggml.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/models/convert-to-ggml.py b/models/convert-to-ggml.py
index 7ef5b80..c05ee25 100644
--- a/models/convert-to-ggml.py
+++ b/models/convert-to-ggml.py
@@ -61,6 +61,7 @@
 fout.write(struct.pack("i", hparams["intermediate_size"]))
 fout.write(struct.pack("i", hparams["num_attention_heads"]))
 fout.write(struct.pack("i", hparams["num_hidden_layers"]))
+fout.write(struct.pack("i", hparams["type_vocab_size"]))
 fout.write(struct.pack("i", ftype))
 
 for i in range(hparams["vocab_size"]):

From 7ef3126bb8f58a26f54ed36743c0f1d1e16954f9 Mon Sep 17 00:00:00 2001
From: FFengIll <fengyouzheng@gmail.com>
Date: Tue, 19 Sep 2023 10:35:43 +0800
Subject: [PATCH 3/4] bugfix: missing read for new n_vocab_size.

---
 bert.cpp | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/bert.cpp b/bert.cpp
index d2dc0bd..f04c6c5 100644
--- a/bert.cpp
+++ b/bert.cpp
@@ -365,6 +365,7 @@ struct bert_ctx * bert_load_from_file(const char *fname)
         fin.read((char *)&hparams.n_intermediate, sizeof(hparams.n_intermediate));
         fin.read((char *)&hparams.n_head, sizeof(hparams.n_head));
         fin.read((char *)&hparams.n_layer, sizeof(hparams.n_layer));
+        fin.read((char *)&hparams.n_vocab_size, sizeof(hparams.n_vocab_size));
         fin.read((char *)&hparams.f16, sizeof(hparams.f16));
 
         printf("%s: n_vocab = %d\n", __func__, hparams.n_vocab);
@@ -373,6 +374,7 @@ struct bert_ctx * bert_load_from_file(const char *fname)
         printf("%s: n_intermediate  = %d\n", __func__, hparams.n_intermediate);
         printf("%s: n_head  = %d\n", __func__, hparams.n_head);
         printf("%s: n_layer = %d\n", __func__, hparams.n_layer);
+        printf("%s: n_vocab_size = %d\n", __func__, hparams.n_vocab_size);
         printf("%s: f16     = %d\n", __func__, hparams.f16);
     }
 

From 79825d8ea8c387a28ecf778d26e61563a6eda57e Mon Sep 17 00:00:00 2001
From: F <fengyouzheng@gmail.com>
Date: Thu, 21 Sep 2023 04:25:39 +0000
Subject: [PATCH 4/4] bugfix: add n_vocab_size into quantize.

---
 models/quantize.cpp | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/models/quantize.cpp b/models/quantize.cpp
index 22411a1..83f2fd2 100644
--- a/models/quantize.cpp
+++ b/models/quantize.cpp
@@ -20,6 +20,7 @@ struct bert_hparams
     int32_t n_intermediate = 1536;
     int32_t n_head = 12;
     int32_t n_layer = 6;
+    int32_t n_vocab_size = 2;
     int32_t f16 = 1;
 };
 
@@ -74,6 +75,7 @@ bool bert_model_quantize(const std::string & fname_inp, const std::string & fnam
         finp.read((char *) &hparams.n_intermediate,   sizeof(hparams.n_intermediate));
         finp.read((char *) &hparams.n_head,  sizeof(hparams.n_head));
         finp.read((char *) &hparams.n_layer, sizeof(hparams.n_layer));
+        finp.read((char *) &hparams.n_vocab_size, sizeof(hparams.n_vocab_size));
         finp.read((char *) &hparams.f16,     sizeof(hparams.f16));
 
         printf("%s: n_vocab = %d\n", __func__, hparams.n_vocab);
@@ -82,6 +84,7 @@ bool bert_model_quantize(const std::string & fname_inp, const std::string & fnam
         printf("%s: n_intermediate  = %d\n", __func__, hparams.n_intermediate);
         printf("%s: n_head  = %d\n", __func__, hparams.n_head);
         printf("%s: n_layer = %d\n", __func__, hparams.n_layer);
+        printf("%s: n_vocab_size = %d\n", __func__, hparams.n_vocab_size);
         printf("%s: f16     = %d\n", __func__, hparams.f16);
 
         fout.write((char *) &hparams.n_vocab, sizeof(hparams.n_vocab));
@@ -90,6 +93,7 @@ bool bert_model_quantize(const std::string & fname_inp, const std::string & fnam
         fout.write((char *) &hparams.n_intermediate,   sizeof(hparams.n_intermediate));
         fout.write((char *) &hparams.n_head,  sizeof(hparams.n_head));
         fout.write((char *) &hparams.n_layer, sizeof(hparams.n_layer));
+        fout.write((char *) &hparams.n_vocab_size, sizeof(hparams.n_vocab_size));
         fout.write((char *) &itype,           sizeof(hparams.f16));
     }