ggerganov · lorddoskias · Nov 6, 2023 · Nov 6, 2023 · cebtenzzre · Nov 6, 2023
diff --git a/convert.py b/convert.py
@@ -250,9 +250,14 @@ def loadOriginalParamsJson(model: LazyModel, config_path: Path) -> Params:
         if config.get("rope_theta") == 1000000:
             # CodeLlama
             n_ctx = 16384
-        elif config["norm_eps"] == 1e-05:
+        elif config["norm_eps"] in (1e-05, 1e-06):
             # LLaMA v2
             n_ctx = 4096
+            # For some reason FB writes -1 to vocab size for their LLAMA2 models
+            # simply remove this bogus value and let the return statement belo
+            # figure it out
+            if config["vocab_size"] == -1:
+                del config["vocab_size"]
         else:
             # LLaMA v1
             n_ctx = 2048