Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

convert : fix python 3.8 support, modernize type annotations #2916

Merged
merged 9 commits into from
Aug 31, 2023
23 changes: 13 additions & 10 deletions convert-falcon-hf-to-gguf.py
Original file line number Diff line number Diff line change
@@ -1,19 +1,22 @@
#!/usr/bin/env python3
# HF falcon--> gguf conversion

import gguf
from __future__ import annotations

import argparse
import json
import os
import sys
import struct
import json
import sys
from pathlib import Path
from typing import Any

import gguf
import numpy as np
import torch
import argparse

from typing import Any, List
from pathlib import Path
from transformers import AutoTokenizer


def bytes_to_unicode():
# ref: https://github.com/openai/gpt-2/blob/master/src/encoder.py
"""
Expand Down Expand Up @@ -114,9 +117,9 @@ def parse_args() -> argparse.Namespace:

print("gguf: get tokenizer metadata")

tokens: List[bytearray] = []
scores: List[float] = []
toktypes: List[int] = []
tokens: list[bytearray] = []
scores: list[float] = []
toktypes: list[int] = []

tokenizer_json_file = dir_model / 'tokenizer.json'
if not tokenizer_json_file.is_file():
Expand Down
18 changes: 10 additions & 8 deletions convert-gptneox-hf-to-gguf.py
Original file line number Diff line number Diff line change
@@ -1,17 +1,19 @@
#!/usr/bin/env python3
# HF gptneox--> gguf conversion

import gguf
from __future__ import annotations

import argparse
import json
import os
import sys
import struct
import json
import sys
from pathlib import Path
from typing import Any

import gguf
import numpy as np
import torch
import argparse

from typing import Any, List
from pathlib import Path
from transformers import AutoTokenizer

# ref: https://github.com/openai/gpt-2/blob/master/src/encoder.py
Expand Down Expand Up @@ -112,7 +114,7 @@ def parse_args() -> argparse.Namespace:

print("gguf: get tokenizer metadata")

tokens: List[bytearray] = []
tokens: list[bytearray] = []

tokenizer_json_file = dir_model / 'tokenizer.json'
if not tokenizer_json_file.is_file():
Expand Down
29 changes: 16 additions & 13 deletions convert-llama-7b-pth-to-gguf.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,22 +3,25 @@
# Only models with a single datafile are supported, like 7B
# HF files required in the model dir: config.json tokenizer_config.json tokenizer.json tokenizer.model

import gguf
from __future__ import annotations

import argparse
import json
import os
import sys
import struct
import json
import sys
from pathlib import Path
from typing import TYPE_CHECKING, Any

import gguf
import numpy as np
import torch
import argparse

from typing import Any, List, TypeAlias
from pathlib import Path
from sentencepiece import SentencePieceProcessor

#NDArray = np.ndarray[Any, Any]
# compatible with python < 3.9
NDArray: 'TypeAlias' = 'np.ndarray[Any, Any]'
if TYPE_CHECKING:
from typing import TypeAlias

NDArray: TypeAlias = 'np.ndarray[Any, Any]'


def count_model_parts(dir_model: Path) -> int:
Expand Down Expand Up @@ -129,9 +132,9 @@ def parse_args() -> argparse.Namespace:

print("gguf: get tokenizer metadata")

tokens: List[bytes] = []
scores: List[float] = []
toktypes: List[int] = []
tokens: list[bytes] = []
scores: list[float] = []
toktypes: list[int] = []

tokenizer_model_file = dir_model / 'tokenizer.model'
if not tokenizer_model_file.is_file():
Expand Down
18 changes: 11 additions & 7 deletions convert-llama-ggmlv3-to-gguf.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,14 @@
#!/usr/bin/env python3
import sys, struct, math, argparse
from pathlib import Path
from __future__ import annotations

import numpy as np
import argparse
import math
import struct
import sys
from pathlib import Path

import gguf
import numpy as np

# Note: Does not support GGML_QKK_64
QK_K = 256
Expand Down Expand Up @@ -72,7 +76,7 @@ def load(self, data, offset, n_vocab):
class Tensor:
def __init__(self):
self.name = None
self.dims = ()
self.dims: tuple[int, ...] = ()
self.dtype = None
self.start_offset = 0
self.len_bytes = np.int64(0)
Expand Down Expand Up @@ -119,7 +123,7 @@ def load(self, data, offset):
offset += hp.load(data, offset)
vocab = Vocab()
offset += vocab.load(data, offset, hp.n_vocab)
tensors = []
tensors: list[Tensor] = []
tensor_map = {}
while offset < len(data):
tensor = Tensor()
Expand Down Expand Up @@ -305,8 +309,8 @@ def handle_metadata(cfg, hp):

def handle_args():
parser = argparse.ArgumentParser(description = 'Convert GGMLv3 models to GGUF')
parser.add_argument('--input', '-i', type = Path, help = 'Input GGMLv3 filename')
parser.add_argument('--output', '-o', type = Path, help ='Output GGUF filename')
parser.add_argument('--input', '-i', type = Path, required = True, help = 'Input GGMLv3 filename')
parser.add_argument('--output', '-o', type = Path, required = True, help ='Output GGUF filename')
parser.add_argument('--name', help = 'Set model name')
parser.add_argument('--desc', help = 'Set model description')
parser.add_argument('--gqa', type = int, default = 1, help = 'grouped-query attention factor (use 8 for LLaMA2 70B)')
Expand Down
31 changes: 17 additions & 14 deletions convert-llama-hf-to-gguf.py
Original file line number Diff line number Diff line change
@@ -1,28 +1,31 @@
#!/usr/bin/env python3
# HF llama --> gguf conversion

import gguf
from __future__ import annotations

import argparse
import json
import os
import sys
import struct
import json
import sys
from pathlib import Path
from typing import TYPE_CHECKING, Any

import gguf
import numpy as np
import torch
import argparse

from typing import Any, List, Optional, TypeAlias
from pathlib import Path
from sentencepiece import SentencePieceProcessor

#NDArray = np.ndarray[Any, Any]
# compatible with python < 3.9
NDArray: 'TypeAlias' = 'np.ndarray[Any, Any]'
if TYPE_CHECKING:
from typing import TypeAlias

NDArray: TypeAlias = 'np.ndarray[Any, Any]'

# reverse HF permute back to original pth layout
# https://github.com/huggingface/transformers/blob/main/src/transformers/models/llama/convert_llama_weights_to_hf.py


def reverse_hf_permute(weights: NDArray, n_head: int, n_kv_head: Optional[int] = None) -> NDArray:
def reverse_hf_permute(weights: NDArray, n_head: int, n_kv_head: int | None = None) -> NDArray:
if n_kv_head is not None and n_head != n_kv_head:
n_head //= n_kv_head

Expand Down Expand Up @@ -136,9 +139,9 @@ def parse_args() -> argparse.Namespace:

print("gguf: get tokenizer metadata")

tokens: List[bytes] = []
scores: List[float] = []
toktypes: List[int] = []
tokens: list[bytes] = []
scores: list[float] = []
toktypes: list[int] = []

tokenizer_model_file = dir_model / 'tokenizer.model'
if not tokenizer_model_file.is_file():
Expand Down
8 changes: 5 additions & 3 deletions convert-lora-to-ggml.py
Original file line number Diff line number Diff line change
@@ -1,15 +1,17 @@
#!/usr/bin/env python3
from __future__ import annotations

import json
import os
import re
import struct
import sys
from typing import Any, Dict, Sequence, BinaryIO
from typing import Any, BinaryIO, Sequence

import numpy as np
import torch

NUMPY_TYPE_TO_FTYPE: Dict[str, int] = {"float32": 0, "float16": 1}
NUMPY_TYPE_TO_FTYPE: dict[str, int] = {"float32": 0, "float16": 1}


HF_SUBLAYER_TO_GGML = {
Expand Down Expand Up @@ -46,7 +48,7 @@ def translate_tensor_name(t: str) -> str:
sys.exit(1)


def write_file_header(fout: BinaryIO, params: Dict[str, Any]) -> None:
def write_file_header(fout: BinaryIO, params: dict[str, Any]) -> None:
fout.write(b"ggla"[::-1]) # magic (ggml lora)
fout.write(struct.pack("i", 1)) # file version
fout.write(struct.pack("i", params["r"]))
Expand Down
Loading