ggerganov · monatis · Aug 31, 2023 · Aug 30, 2023 · Aug 30, 2023 · Aug 30, 2023
diff --git a/convert-falcon-hf-to-gguf.py b/convert-falcon-hf-to-gguf.py
@@ -1,19 +1,22 @@
 #!/usr/bin/env python3
 # HF falcon--> gguf conversion
 
-import gguf
+from __future__ import annotations
+
+import argparse
+import json
 import os
-import sys
 import struct
-import json
+import sys
+from pathlib import Path
+from typing import Any
+
+import gguf
 import numpy as np
 import torch
-import argparse
-
-from typing import Any, List
-from pathlib import Path
 from transformers import AutoTokenizer
 
+
 def bytes_to_unicode():
     # ref: https://github.com/openai/gpt-2/blob/master/src/encoder.py
     """
@@ -114,9 +117,9 @@ def parse_args() -> argparse.Namespace:
 
 print("gguf: get tokenizer metadata")
 
-tokens: List[bytearray] = []
-scores: List[float] = []
-toktypes: List[int] = []
+tokens: list[bytearray] = []
+scores: list[float] = []
+toktypes: list[int] = []
 
 tokenizer_json_file = dir_model / 'tokenizer.json'
 if not tokenizer_json_file.is_file():

diff --git a/convert-gptneox-hf-to-gguf.py b/convert-gptneox-hf-to-gguf.py
@@ -1,17 +1,19 @@
 #!/usr/bin/env python3
 # HF gptneox--> gguf conversion
 
-import gguf
+from __future__ import annotations
+
+import argparse
+import json
 import os
-import sys
 import struct
-import json
+import sys
+from pathlib import Path
+from typing import Any
+
+import gguf
 import numpy as np
 import torch
-import argparse
-
-from typing import Any, List
-from pathlib import Path
 from transformers import AutoTokenizer
 
 # ref: https://github.com/openai/gpt-2/blob/master/src/encoder.py
@@ -112,7 +114,7 @@ def parse_args() -> argparse.Namespace:
 
 print("gguf: get tokenizer metadata")
 
-tokens: List[bytearray] = []
+tokens: list[bytearray] = []
 
 tokenizer_json_file = dir_model / 'tokenizer.json'
 if not tokenizer_json_file.is_file():

diff --git a/convert-llama-7b-pth-to-gguf.py b/convert-llama-7b-pth-to-gguf.py
@@ -3,22 +3,25 @@
 # Only models with a single datafile are supported, like 7B
 # HF files required in the model dir: config.json tokenizer_config.json tokenizer.json tokenizer.model
 
-import gguf
+from __future__ import annotations
+
+import argparse
+import json
 import os
-import sys
 import struct
-import json
+import sys
+from pathlib import Path
+from typing import TYPE_CHECKING, Any
+
+import gguf
 import numpy as np
 import torch
-import argparse
-
-from typing import Any, List, TypeAlias
-from pathlib import Path
 from sentencepiece import SentencePieceProcessor
 
-#NDArray = np.ndarray[Any, Any]
-# compatible with python < 3.9
-NDArray: 'TypeAlias' = 'np.ndarray[Any, Any]'
+if TYPE_CHECKING:
+    from typing import TypeAlias
+
+NDArray: TypeAlias = 'np.ndarray[Any, Any]'
 
 
 def count_model_parts(dir_model: Path) -> int:
@@ -129,9 +132,9 @@ def parse_args() -> argparse.Namespace:
 
 print("gguf: get tokenizer metadata")
 
-tokens: List[bytes] = []
-scores: List[float] = []
-toktypes: List[int] = []
+tokens: list[bytes] = []
+scores: list[float] = []
+toktypes: list[int] = []
 
 tokenizer_model_file = dir_model / 'tokenizer.model'
 if not tokenizer_model_file.is_file():

diff --git a/convert-llama-ggmlv3-to-gguf.py b/convert-llama-ggmlv3-to-gguf.py
@@ -1,10 +1,14 @@
 #!/usr/bin/env python3
-import sys, struct, math, argparse
-from pathlib import Path
+from __future__ import annotations
 
-import numpy as np
+import argparse
+import math
+import struct
+import sys
+from pathlib import Path
 
 import gguf
+import numpy as np
 
 # Note: Does not support GGML_QKK_64
 QK_K = 256
@@ -72,7 +76,7 @@ def load(self, data, offset, n_vocab):
 class Tensor:
     def __init__(self):
         self.name = None
-        self.dims = ()
+        self.dims: tuple[int, ...] = ()
         self.dtype = None
         self.start_offset = 0
         self.len_bytes = np.int64(0)
@@ -119,7 +123,7 @@ def load(self, data, offset):
         offset += hp.load(data, offset)
         vocab = Vocab()
         offset += vocab.load(data, offset, hp.n_vocab)
-        tensors = []
+        tensors: list[Tensor] = []
         tensor_map = {}
         while offset < len(data):
             tensor = Tensor()
@@ -305,8 +309,8 @@ def handle_metadata(cfg, hp):
 
 def handle_args():
     parser = argparse.ArgumentParser(description = 'Convert GGMLv3 models to GGUF')
-    parser.add_argument('--input', '-i', type = Path, help = 'Input GGMLv3 filename')
-    parser.add_argument('--output', '-o', type = Path, help ='Output GGUF filename')
+    parser.add_argument('--input', '-i', type = Path, required = True, help = 'Input GGMLv3 filename')
+    parser.add_argument('--output', '-o', type = Path, required = True, help ='Output GGUF filename')
     parser.add_argument('--name', help = 'Set model name')
     parser.add_argument('--desc', help = 'Set model description')
     parser.add_argument('--gqa', type = int, default = 1, help = 'grouped-query attention factor (use 8 for LLaMA2 70B)')

diff --git a/convert-llama-hf-to-gguf.py b/convert-llama-hf-to-gguf.py
@@ -1,28 +1,31 @@
 #!/usr/bin/env python3
 # HF llama --> gguf conversion
 
-import gguf
+from __future__ import annotations
+
+import argparse
+import json
 import os
-import sys
 import struct
-import json
+import sys
+from pathlib import Path
+from typing import TYPE_CHECKING, Any
+
+import gguf
 import numpy as np
 import torch
-import argparse
-
-from typing import Any, List, Optional, TypeAlias
-from pathlib import Path
 from sentencepiece import SentencePieceProcessor
 
-#NDArray = np.ndarray[Any, Any]
-# compatible with python < 3.9
-NDArray: 'TypeAlias' = 'np.ndarray[Any, Any]'
+if TYPE_CHECKING:
+    from typing import TypeAlias
+
+NDArray: TypeAlias = 'np.ndarray[Any, Any]'
 
 # reverse HF permute back to original pth layout
 # https://github.com/huggingface/transformers/blob/main/src/transformers/models/llama/convert_llama_weights_to_hf.py
 
 
-def reverse_hf_permute(weights: NDArray, n_head: int, n_kv_head: Optional[int] = None) -> NDArray:
+def reverse_hf_permute(weights: NDArray, n_head: int, n_kv_head: int | None = None) -> NDArray:
     if n_kv_head is not None and n_head != n_kv_head:
         n_head //= n_kv_head
 
@@ -136,9 +139,9 @@ def parse_args() -> argparse.Namespace:
 
 print("gguf: get tokenizer metadata")
 
-tokens: List[bytes] = []
-scores: List[float] = []
-toktypes: List[int] = []
+tokens: list[bytes] = []
+scores: list[float] = []
+toktypes: list[int] = []
 
 tokenizer_model_file = dir_model / 'tokenizer.model'
 if not tokenizer_model_file.is_file():

diff --git a/convert-lora-to-ggml.py b/convert-lora-to-ggml.py
@@ -1,15 +1,17 @@
 #!/usr/bin/env python3
+from __future__ import annotations
+
 import json
 import os
 import re
 import struct
 import sys
-from typing import Any, Dict, Sequence, BinaryIO
+from typing import Any, BinaryIO, Sequence
 
 import numpy as np
 import torch
 
-NUMPY_TYPE_TO_FTYPE: Dict[str, int] = {"float32": 0, "float16": 1}
+NUMPY_TYPE_TO_FTYPE: dict[str, int] = {"float32": 0, "float16": 1}
 
 
 HF_SUBLAYER_TO_GGML = {
@@ -46,7 +48,7 @@ def translate_tensor_name(t: str) -> str:
         sys.exit(1)
 
 
-def write_file_header(fout: BinaryIO, params: Dict[str, Any]) -> None:
+def write_file_header(fout: BinaryIO, params: dict[str, Any]) -> None:
     fout.write(b"ggla"[::-1])  # magic (ggml lora)
     fout.write(struct.pack("i", 1))  # file version
     fout.write(struct.pack("i", params["r"]))