diff --git a/gguf-py/gguf/constants.py b/gguf-py/gguf/constants.py index 6d52972bf9503..7b39c3512c212 100644 --- a/gguf-py/gguf/constants.py +++ b/gguf-py/gguf/constants.py @@ -1,8 +1,8 @@ from __future__ import annotations import sys -from enum import Enum, IntEnum, auto -from typing import Any, NamedTuple +from enum import Enum, IntEnum, auto, StrEnum +from typing import Any, NamedTuple, Type # # constants @@ -16,63 +16,63 @@ # metadata keys # -class GeneralKeys(NamedTuple): - ARCHITECTURE = "general.architecture" - QUANTIZATION_VERSION = "general.quantization_version" - ALIGNMENT = "general.alignment" - NAME = "general.name" - AUTHOR = "general.author" - URL = "general.url" - DESCRIPTION = "general.description" - LICENSE = "general.license" - SOURCE_URL = "general.source.url" - SOURCE_HF_REPO = "general.source.huggingface.repository" - FILE_TYPE = "general.file_type" - -class AttentionKeys(NamedTuple): - HEAD_COUNT = "{arch}.attention.head_count" - HEAD_COUNT_KV = "{arch}.attention.head_count_kv" - MAX_ALIBI_BIAS = "{arch}.attention.max_alibi_bias" - CLAMP_KQV = "{arch}.attention.clamp_kqv" - LAYERNORM_EPS = "{arch}.attention.layer_norm_epsilon" - LAYERNORM_RMS_EPS = "{arch}.attention.layer_norm_rms_epsilon" - -class RopeKeys(NamedTuple): - DIMENSION_COUNT = "{arch}.rope.dimension_count" - FREQ_BASE = "{arch}.rope.freq_base" - SCALING_TYPE = "{arch}.rope.scaling.type" - SCALING_FACTOR = "{arch}.rope.scaling.factor" - SCALING_ORIG_CTX_LEN = "{arch}.rope.scaling.original_context_length" - SCALING_FINETUNED = "{arch}.rope.scaling.finetuned" - -class TokenizerKeys(NamedTuple): - MODEL = "tokenizer.ggml.model" - LIST = "tokenizer.ggml.tokens" - TOKEN_TYPE = "tokenizer.ggml.token_type" - SCORES = "tokenizer.ggml.scores" - MERGES = "tokenizer.ggml.merges" - BOS_ID = "tokenizer.ggml.bos_token_id" - EOS_ID = "tokenizer.ggml.eos_token_id" - UNK_ID = "tokenizer.ggml.unknown_token_id" - SEP_ID = "tokenizer.ggml.seperator_token_id" - PAD_ID = "tokenizer.ggml.padding_token_id" - HF_JSON = "tokenizer.huggingface.json" - RWKV = "tokenizer.rwkv.world" - -class LLMKeys(NamedTuple): - CONTEXT_LENGTH = "{arch}.context_length" - EMBEDDING_LENGTH = "{arch}.embedding_length" - BLOCK_COUNT = "{arch}.block_count" - FEED_FORWARD_LENGTH = "{arch}.feed_forward_length" - USE_PARALLEL_RESIDUAL = "{arch}.use_parallel_residual" - TENSOR_DATA_LAYOUT = "{arch}.tensor_data_layout" +class GeneralKeys(StrEnum): + ARCHITECTURE : str = "general.architecture" + QUANTIZATION_VERSION: str = "general.quantization_version" + ALIGNMENT : str = "general.alignment" + NAME : str = "general.name" + AUTHOR : str = "general.author" + URL : str = "general.url" + DESCRIPTION : str = "general.description" + LICENSE : str = "general.license" + SOURCE_URL : str = "general.source.url" + SOURCE_HF_REPO : str = "general.source.huggingface.repository" + FILE_TYPE : str = "general.file_type" + +class AttentionKeys(StrEnum): + HEAD_COUNT : str = "{arch}.attention.head_count" + HEAD_COUNT_KV : str = "{arch}.attention.head_count_kv" + MAX_ALIBI_BIAS : str = "{arch}.attention.max_alibi_bias" + CLAMP_KQV : str = "{arch}.attention.clamp_kqv" + LAYERNORM_EPS : str = "{arch}.attention.layer_norm_epsilon" + LAYERNORM_RMS_EPS: str = "{arch}.attention.layer_norm_rms_epsilon" + +class RopeKeys(StrEnum): + DIMENSION_COUNT : str = "{arch}.rope.dimension_count" + FREQ_BASE : str = "{arch}.rope.freq_base" + SCALING_TYPE : str = "{arch}.rope.scaling.type" + SCALING_FACTOR : str = "{arch}.rope.scaling.factor" + SCALING_ORIG_CTX_LEN: str = "{arch}.rope.scaling.original_context_length" + SCALING_FINETUNED : str = "{arch}.rope.scaling.finetuned" + +class TokenizerKeys(StrEnum): + MODEL : str = "tokenizer.ggml.model" + LIST : str = "tokenizer.ggml.tokens" + TOKEN_TYPE: str = "tokenizer.ggml.token_type" + SCORES : str = "tokenizer.ggml.scores" + MERGES : str = "tokenizer.ggml.merges" + BOS_ID : str = "tokenizer.ggml.bos_token_id" + EOS_ID : str = "tokenizer.ggml.eos_token_id" + UNK_ID : str = "tokenizer.ggml.unknown_token_id" + SEP_ID : str = "tokenizer.ggml.seperator_token_id" + PAD_ID : str = "tokenizer.ggml.padding_token_id" + HF_JSON : str = "tokenizer.huggingface.json" + RWKV : str = "tokenizer.rwkv.world" + +class LLMKeys(StrEnum): + CONTEXT_LENGTH : str = "{arch}.context_length" + EMBEDDING_LENGTH : str = "{arch}.embedding_length" + BLOCK_COUNT : str = "{arch}.block_count" + FEED_FORWARD_LENGTH : str = "{arch}.feed_forward_length" + USE_PARALLEL_RESIDUAL: str = "{arch}.use_parallel_residual" + TENSOR_DATA_LAYOUT : str = "{arch}.tensor_data_layout" class Keys(NamedTuple): - GENERAL = GeneralKeys() - LLM = LLMKeys() - ATTENTION = AttentionKeys() - ROPE = RopeKeys() - TOKENIZER = TokenizerKeys() + GENERAL : Type[GeneralKeys ] = GeneralKeys + LLM : Type[LLMKeys ] = LLMKeys + ATTENTION: Type[AttentionKeys] = AttentionKeys + ROPE : Type[RopeKeys ] = RopeKeys + TOKENIZER: Type[TokenizerKeys] = TokenizerKeys KEY = Keys() @@ -418,52 +418,52 @@ def get_type(val: Any) -> GGUFValueType: # Aliases for backward compatibility. # general -KEY_GENERAL_ARCHITECTURE = KEY.GENERAL.ARCHITECTURE -KEY_GENERAL_QUANTIZATION_VERSION = KEY.GENERAL.QUANTIZATION_VERSION -KEY_GENERAL_ALIGNMENT = KEY.GENERAL.ALIGNMENT -KEY_GENERAL_NAME = KEY.GENERAL.NAME -KEY_GENERAL_AUTHOR = KEY.GENERAL.AUTHOR -KEY_GENERAL_URL = KEY.GENERAL.URL -KEY_GENERAL_DESCRIPTION = KEY.GENERAL.DESCRIPTION -KEY_GENERAL_LICENSE = KEY.GENERAL.LICENSE -KEY_GENERAL_SOURCE_URL = KEY.GENERAL.SOURCE_URL -KEY_GENERAL_SOURCE_HF_REPO = KEY.GENERAL.SOURCE_HF_REPO -KEY_GENERAL_FILE_TYPE = KEY.GENERAL.FILE_TYPE +KEY_GENERAL_ARCHITECTURE : str = KEY.GENERAL.ARCHITECTURE +KEY_GENERAL_QUANTIZATION_VERSION: str = KEY.GENERAL.QUANTIZATION_VERSION +KEY_GENERAL_ALIGNMENT : str = KEY.GENERAL.ALIGNMENT +KEY_GENERAL_NAME : str = KEY.GENERAL.NAME +KEY_GENERAL_AUTHOR : str = KEY.GENERAL.AUTHOR +KEY_GENERAL_URL : str = KEY.GENERAL.URL +KEY_GENERAL_DESCRIPTION : str = KEY.GENERAL.DESCRIPTION +KEY_GENERAL_LICENSE : str = KEY.GENERAL.LICENSE +KEY_GENERAL_SOURCE_URL : str = KEY.GENERAL.SOURCE_URL +KEY_GENERAL_SOURCE_HF_REPO : str = KEY.GENERAL.SOURCE_HF_REPO +KEY_GENERAL_FILE_TYPE : str = KEY.GENERAL.FILE_TYPE # LLM -KEY_CONTEXT_LENGTH = KEY.LLM.CONTEXT_LENGTH -KEY_EMBEDDING_LENGTH = KEY.LLM.EMBEDDING_LENGTH -KEY_BLOCK_COUNT = KEY.LLM.BLOCK_COUNT -KEY_FEED_FORWARD_LENGTH = KEY.LLM.FEED_FORWARD_LENGTH -KEY_USE_PARALLEL_RESIDUAL = KEY.LLM.USE_PARALLEL_RESIDUAL -KEY_TENSOR_DATA_LAYOUT = KEY.LLM.TENSOR_DATA_LAYOUT +KEY_CONTEXT_LENGTH : str = KEY.LLM.CONTEXT_LENGTH +KEY_EMBEDDING_LENGTH : str = KEY.LLM.EMBEDDING_LENGTH +KEY_BLOCK_COUNT : str = KEY.LLM.BLOCK_COUNT +KEY_FEED_FORWARD_LENGTH : str = KEY.LLM.FEED_FORWARD_LENGTH +KEY_USE_PARALLEL_RESIDUAL: str = KEY.LLM.USE_PARALLEL_RESIDUAL +KEY_TENSOR_DATA_LAYOUT : str = KEY.LLM.TENSOR_DATA_LAYOUT # attention -KEY_ATTENTION_HEAD_COUNT = KEY.ATTENTION.HEAD_COUNT -KEY_ATTENTION_HEAD_COUNT_KV = KEY.ATTENTION.HEAD_COUNT_KV -KEY_ATTENTION_MAX_ALIBI_BIAS = KEY.ATTENTION.MAX_ALIBI_BIAS -KEY_ATTENTION_CLAMP_KQV = KEY.ATTENTION.CLAMP_KQV -KEY_ATTENTION_LAYERNORM_EPS = KEY.ATTENTION.LAYERNORM_EPS -KEY_ATTENTION_LAYERNORM_RMS_EPS = KEY.ATTENTION.LAYERNORM_RMS_EPS +KEY_ATTENTION_HEAD_COUNT : str = KEY.ATTENTION.HEAD_COUNT +KEY_ATTENTION_HEAD_COUNT_KV : str = KEY.ATTENTION.HEAD_COUNT_KV +KEY_ATTENTION_MAX_ALIBI_BIAS : str = KEY.ATTENTION.MAX_ALIBI_BIAS +KEY_ATTENTION_CLAMP_KQV : str = KEY.ATTENTION.CLAMP_KQV +KEY_ATTENTION_LAYERNORM_EPS : str = KEY.ATTENTION.LAYERNORM_EPS +KEY_ATTENTION_LAYERNORM_RMS_EPS: str = KEY.ATTENTION.LAYERNORM_RMS_EPS # RoPE -KEY_ROPE_DIMENSION_COUNT = KEY.ROPE.DIMENSION_COUNT -KEY_ROPE_FREQ_BASE = KEY.ROPE.FREQ_BASE -KEY_ROPE_SCALING_TYPE = KEY.ROPE.SCALING_TYPE -KEY_ROPE_SCALING_FACTOR = KEY.ROPE.SCALING_FACTOR -KEY_ROPE_SCALING_ORIG_CTX_LEN = KEY.ROPE.SCALING_ORIG_CTX_LEN -KEY_ROPE_SCALING_FINETUNED = KEY.ROPE.SCALING_FINETUNED +KEY_ROPE_DIMENSION_COUNT : str = KEY.ROPE.DIMENSION_COUNT +KEY_ROPE_FREQ_BASE : str = KEY.ROPE.FREQ_BASE +KEY_ROPE_SCALING_TYPE : str = KEY.ROPE.SCALING_TYPE +KEY_ROPE_SCALING_FACTOR : str = KEY.ROPE.SCALING_FACTOR +KEY_ROPE_SCALING_ORIG_CTX_LEN: str = KEY.ROPE.SCALING_ORIG_CTX_LEN +KEY_ROPE_SCALING_FINETUNED : str = KEY.ROPE.SCALING_FINETUNED # tokenization -KEY_TOKENIZER_MODEL = KEY.TOKENIZER.MODEL -KEY_TOKENIZER_LIST = KEY.TOKENIZER.LIST -KEY_TOKENIZER_TOKEN_TYPE = KEY.TOKENIZER.TOKEN_TYPE -KEY_TOKENIZER_SCORES = KEY.TOKENIZER.SCORES -KEY_TOKENIZER_MERGES = KEY.TOKENIZER.MERGES -KEY_TOKENIZER_BOS_ID = KEY.TOKENIZER.BOS_ID -KEY_TOKENIZER_EOS_ID = KEY.TOKENIZER.EOS_ID -KEY_TOKENIZER_UNK_ID = KEY.TOKENIZER.UNK_ID -KEY_TOKENIZER_SEP_ID = KEY.TOKENIZER.SEP_ID -KEY_TOKENIZER_PAD_ID = KEY.TOKENIZER.PAD_ID -KEY_TOKENIZER_HF_JSON = KEY.TOKENIZER.HF_JSON -KEY_TOKENIZER_RWKV = KEY.TOKENIZER.RWKV +KEY_TOKENIZER_MODEL : str = KEY.TOKENIZER.MODEL +KEY_TOKENIZER_LIST : str = KEY.TOKENIZER.LIST +KEY_TOKENIZER_TOKEN_TYPE: str = KEY.TOKENIZER.TOKEN_TYPE +KEY_TOKENIZER_SCORES : str = KEY.TOKENIZER.SCORES +KEY_TOKENIZER_MERGES : str = KEY.TOKENIZER.MERGES +KEY_TOKENIZER_BOS_ID : str = KEY.TOKENIZER.BOS_ID +KEY_TOKENIZER_EOS_ID : str = KEY.TOKENIZER.EOS_ID +KEY_TOKENIZER_UNK_ID : str = KEY.TOKENIZER.UNK_ID +KEY_TOKENIZER_SEP_ID : str = KEY.TOKENIZER.SEP_ID +KEY_TOKENIZER_PAD_ID : str = KEY.TOKENIZER.PAD_ID +KEY_TOKENIZER_HF_JSON : str = KEY.TOKENIZER.HF_JSON +KEY_TOKENIZER_RWKV : str = KEY.TOKENIZER.RWKV diff --git a/gguf-py/gguf/gguf_reader.py b/gguf-py/gguf/gguf_reader.py index 6b9e0c3cc6d9e..84b889657fad5 100644 --- a/gguf-py/gguf/gguf_reader.py +++ b/gguf-py/gguf/gguf_reader.py @@ -2,7 +2,7 @@ import os from collections import OrderedDict -from typing import TypeVar, NamedTuple +from typing import Any, TypeVar, NamedTuple, Dict, Type, Literal import numpy as np import numpy.typing as npt @@ -35,14 +35,14 @@ class ReaderField(NamedTuple): # Data parts. Some types have multiple components, such as strings # that consist of a length followed by the string data. - parts: [npt.NDArray] = [] + parts: list[npt.NDArray[Any]] = [] # Indexes into parts that we can call the actual data. For example # an array of strings will be populated with indexes to the actual # string data. - data: [int] = [-1] + data: list[int] = [-1] - types: [GGUFValueType] = [] + types: list[GGUFValueType] = [] class ReaderTensor(NamedTuple): @@ -52,17 +52,17 @@ class ReaderTensor(NamedTuple): n_elements: int n_bytes: int data_offset: int - data: npt.NDArray + data: npt.NDArray[Any] field: ReaderField class GGUFReader: - byte_order: str = 'I' - fields: 'OrderedDict[str, ReaderField]' = {} - tensors: [ReaderTensor] = [] + byte_order: Literal['I' | 'S' | '<'] = 'I' + fields: 'OrderedDict[str, ReaderField]' = OrderedDict() + tensors: list[ReaderTensor] = [] alignment: int = GGUF_DEFAULT_ALIGNMENT - _simple_value_map = { + _simple_value_map: Dict[GGUFValueType, Type[Any]] = { GGUFValueType.UINT8: np.uint8, GGUFValueType.INT8: np.int8, GGUFValueType.UINT16: np.uint16, @@ -76,10 +76,12 @@ class GGUFReader: GGUFValueType.BOOL: np.bool_, } - _DT = TypeVar('T', bound = npt.DTypeLike) - def _get(self, offset: int, dtype: _DT, count: int = 1, override_order: None | str = None) -> 'npt.NDArray[_DT]': - end_offs = np.uint64(offset + np.uint64(dtype().nbytes * count)) - return (self.data[np.uint64(offset):end_offs] + _DT = TypeVar('_DT', bound = npt.DTypeLike) + def _get(self, offset: int, dtype: npt.DTypeLike, count: int = 1, override_order: None | Literal['I' | 'S' | '<'] = None) -> npt.NDArray[Any]: + count = int(count) + itemsize = int(np.empty([], dtype = dtype).itemsize) + end_offs = offset + itemsize * count + return (self.data[offset:end_offs] .view(dtype = dtype)[:count] .newbyteorder(override_order or self.byte_order)) @@ -87,80 +89,80 @@ def _push_field(self, field: ReaderField, skip_sum: bool = False) -> int: if field.name in self.fields: raise KeyError(f'Duplicate {field.name} already in list at offset {field.offset}') self.fields[field.name] = field - return 0 if skip_sum else sum(part.nbytes for part in field.parts) + return 0 if skip_sum else sum(int(part.nbytes) for part in field.parts) - def _get_str(self, offset: int) -> (npt.NDArray[np.uint64], npt.NDArray[np.uint8]): + def _get_str(self, offset: int) -> tuple[npt.NDArray[np.uint64], npt.NDArray[np.uint8]]: slen = self._get(offset, np.uint64) return (slen, self._get(offset + 8, np.uint8, slen[0])) - def _get_field_parts(self, orig_offs: int, raw_type: int) -> (int, [np.NDArray], [int], [GGUFValueType]): + def _get_field_parts(self, orig_offs: int, raw_type: int) -> tuple[int, list[npt.NDArray[Any]], list[int], list[GGUFValueType]]: offs = orig_offs - types = [] + types: list[GGUFValueType] = [] gtype = GGUFValueType(raw_type) types.append(gtype) # Handle strings. if gtype == GGUFValueType.STRING: - parts = list(self._get_str(offs)) - size = sum(part.nbytes for part in parts) - return (size, parts, [1], types) + sparts: list[npt.NDArray[Any]] = list(self._get_str(offs)) + size = sum(int(part.nbytes) for part in sparts) + return (size, sparts, [1], types) # Check if it's a simple scalar type. nptype = self._simple_value_map.get(gtype) if nptype is not None: val = self._get(offs, nptype) - return (val.nbytes, [val], [0], types) + return (int(val.nbytes), [val], [0], types) # Handle arrays. if gtype == GGUFValueType.ARRAY: raw_itype = self._get(offs, np.uint32) - offs += raw_itype.nbytes + offs += int(raw_itype.nbytes) alen = self._get(offs, np.uint64) - offs += alen.nbytes - parts = [raw_itype, alen] - data_idxs = [] + offs += int(alen.nbytes) + aparts: list[npt.NDArray[Any]] = [raw_itype, alen] + data_idxs: list[int] = [] for idx in range(alen[0]): curr_size, curr_parts, curr_idxs, curr_types = self._get_field_parts(offs, raw_itype[0]) if idx == 0: types += curr_types - idxs_offs = len(parts) - parts += curr_parts + idxs_offs = len(aparts) + aparts += curr_parts data_idxs += (idx + idxs_offs for idx in curr_idxs) offs += curr_size - return (offs - orig_offs, parts, data_idxs, types) + return (offs - orig_offs, aparts, data_idxs, types) # We can't deal with this one. raise ValueError('Unknown/unhandled field type {gtype}') def _get_tensor(self, orig_offs: int) -> ReaderField: - offs = np.uint64(orig_offs) + offs = orig_offs name_len, name_data = self._get_str(offs) - offs += name_len.nbytes + name_data.nbytes + offs += int(name_len.nbytes + name_data.nbytes) n_dims = self._get(offs, np.uint32) - offs += n_dims.nbytes + offs += int(n_dims.nbytes) dims = self._get(offs, np.uint64, n_dims[0]) - offs += dims.nbytes + offs += int(dims.nbytes) raw_dtype = self._get(offs, np.uint32) - offs += raw_dtype.nbytes + offs += int(raw_dtype.nbytes) offset_tensor = self._get(offs, np.uint64) - offs += offset_tensor.nbytes + offs += int(offset_tensor.nbytes) return ReaderField( orig_offs, - str(name_data, encoding = 'utf-8'), + str(bytes(name_data), encoding = 'utf-8'), [name_len, name_data, n_dims, dims, raw_dtype, offset_tensor], [1, 3, 4, 5], ) - def _build_fields(self, offs, count) -> int: + def _build_fields(self, offs: int, count: int) -> int: for _ in range(count): orig_offs = offs kv_klen, kv_kdata = self._get_str(offs) - offs += kv_klen.nbytes + kv_kdata.nbytes + offs += int(kv_klen.nbytes + kv_kdata.nbytes) raw_kv_type = self._get(offs, np.uint32) - offs += raw_kv_type.nbytes - parts = [kv_klen, kv_kdata, raw_kv_type] + offs += int(raw_kv_type.nbytes) + parts: list[npt.NDArray[Any]] = [kv_klen, kv_kdata, raw_kv_type] idxs_offs = len(parts) field_size, field_parts, field_idxs, field_types = self._get_field_parts(offs, raw_kv_type[0]) parts += field_parts self._push_field(ReaderField( orig_offs, - str(kv_kdata, encoding = 'utf-8'), + str(bytes(kv_kdata), encoding = 'utf-8'), parts, list(idx + idxs_offs for idx in field_idxs), field_types, @@ -168,23 +170,24 @@ def _build_fields(self, offs, count) -> int: offs += field_size return offs - def _build_tensors_fields(self, offs, count) -> (int, [ReaderField]): + def _build_tensors_fields(self, offs: int, count: int) -> tuple[int, list[ReaderField]]: tensor_fields = [] for _ in range(count): field = self._get_tensor(offs) - offs += sum(part.nbytes for part in field.parts) + offs += sum(int(part.nbytes) for part in field.parts) tensor_fields.append(field) return (offs, tensor_fields) - def _build_tensors(self, start_offs: int, fields: [ReaderField]) -> None: + def _build_tensors(self, start_offs: int, fields: list[ReaderField]) -> None: tensors = [] for field in fields: _name_len, name_data, _n_dims, dims, raw_dtype, offset_tensor = field.parts ggml_type = GGMLQuantizationType(raw_dtype[0]) n_elems = np.prod(dims) block_size, type_size = GGML_QUANT_SIZES[ggml_type] - n_bytes = np.uint64(np.uint64(n_elems) * np.uint64(type_size)) // np.uint64(block_size) - data_offs = start_offs + offset_tensor[0] + n_bytes = n_elems * type_size // block_size + data_offs = int(start_offs + offset_tensor[0]) + item_type: npt.DTypeLike if ggml_type == GGMLQuantizationType.F32: item_count = n_elems item_type = np.float32 @@ -195,7 +198,7 @@ def _build_tensors(self, start_offs: int, fields: [ReaderField]) -> None: item_count = n_bytes item_type = np.uint8 tensors.append(ReaderTensor( - name = str(name_data, encoding = 'utf-8'), + name = str(bytes(name_data), encoding = 'utf-8'), tensor_type = ggml_type, shape = dims, n_elements = n_elems, @@ -207,31 +210,31 @@ def _build_tensors(self, start_offs: int, fields: [ReaderField]) -> None: self.tensors = tensors - def __init__(self, path: os.PathLike[str] | str, mode: str = 'r') -> None: + def __init__(self, path: os.PathLike[str] | str, mode: Literal['r' | 'r+' | 'c'] = 'r') -> None: self.data = np.memmap(path, mode = mode) offs = 0 if self._get(offs, np.uint32, override_order = '<')[0] != GGUF_MAGIC: raise ValueError('GGUF magic invalid') offs += 4 - temp = self._get(offs, np.uint32) - if temp[0] > 2000: + temp_version = self._get(offs, np.uint32) + if temp_version[0] > 2000: self.byte_order = 'S' - temp = temp.newbyteorder(self.byte_order) - version = temp[0] + temp_version = temp_version.newbyteorder(self.byte_order) + version = temp_version[0] if version not in READER_SUPPORTED_VERSIONS: raise ValueError(f'Sorry, file appears to be version {version} which we cannot handle') - offs += self._push_field(ReaderField(offs, 'GGUF.version', [temp], [0], [GGUFValueType.UINT32])) - temp = self._get(offs, np.uint64, 2) - offs += self._push_field(ReaderField(offs, 'GGUF.tensor_count', [temp[:1]], [0], [GGUFValueType.UINT64])) - offs += self._push_field(ReaderField(offs, 'GGUF.kv_count', [temp[1:]], [0], [GGUFValueType.UINT64])) - tensor_count, kv_count = temp + offs += self._push_field(ReaderField(offs, 'GGUF.version', [temp_version], [0], [GGUFValueType.UINT32])) + temp_counts = self._get(offs, np.uint64, 2) + offs += self._push_field(ReaderField(offs, 'GGUF.tensor_count', [temp_counts[:1]], [0], [GGUFValueType.UINT64])) + offs += self._push_field(ReaderField(offs, 'GGUF.kv_count', [temp_counts[1:]], [0], [GGUFValueType.UINT64])) + tensor_count, kv_count = temp_counts offs = self._build_fields(offs, kv_count) offs, tensors_fields = self._build_tensors_fields(offs, tensor_count) new_align = self.fields.get('general.alignment') if new_align is not None: if new_align.types != [GGUFValueType.UINT64]: raise ValueError('Bad type for general.alignment field') - self.alignment = new_align.parts[-1] + self.alignment = new_align.parts[-1][0] padding = offs % self.alignment if padding != 0: offs += self.alignment - padding @@ -258,7 +261,7 @@ def __init__(self, path: os.PathLike[str] | str, mode: str = 'r') -> None: if len(field.types) == 1: curr_type = field.types[0] if curr_type == GGUFValueType.STRING: - print(' = {0}'.format(repr(str(field.parts[-1], encoding='utf8')[:60])), end = '') + print(' = {0}'.format(repr(str(bytes(field.parts[-1]), encoding='utf8')[:60])), end = '') elif field.types[0] in reader._simple_value_map: print(' = {0}'.format(field.parts[-1][0]), end = '') print() diff --git a/gguf-py/gguf/gguf_writer.py b/gguf-py/gguf/gguf_writer.py index 2e38824195e8d..21362f844be91 100644 --- a/gguf-py/gguf/gguf_writer.py +++ b/gguf-py/gguf/gguf_writer.py @@ -231,7 +231,7 @@ def add_tensor(self, name: str, tensor: np.ndarray[Any, Any], raw_shape: Sequenc tensor.tofile(self.temp_file) self.write_padding(self.temp_file, tensor.nbytes) - def write_padding(self, fp: IO[bytes], n: int, align: int | None = None): + def write_padding(self, fp: IO[bytes], n: int, align: int | None = None) -> None: pad = GGUFWriter.ggml_pad(n, align if align is not None else self.data_alignment) - n if pad != 0: fp.write(bytes([0] * pad)) @@ -280,7 +280,7 @@ def add_author(self, author: str) -> None: self.add_string(KEY.GENERAL.AUTHOR, author) def add_tensor_data_layout(self, layout: str) -> None: - self.add_string(KEY.TENSOR_DATA_LAYOUT.format(arch=self.arch), layout) + self.add_string(KEY.LLM.TENSOR_DATA_LAYOUT.value.format(arch=self.arch), layout) def add_url(self, url: str) -> None: self.add_string(KEY.GENERAL.URL, url) @@ -310,66 +310,66 @@ def add_custom_alignment(self, alignment: int) -> None: def add_context_length(self, length: int) -> None: self.add_uint32( - KEY.LLM.CONTEXT_LENGTH.format(arch=self.arch), length) + KEY.LLM.CONTEXT_LENGTH.value.format(arch=self.arch), length) def add_embedding_length(self, length: int) -> None: self.add_uint32( - KEY.LLM.EMBEDDING_LENGTH.format(arch=self.arch), length) + KEY.LLM.EMBEDDING_LENGTH.value.format(arch=self.arch), length) def add_block_count(self, length: int) -> None: self.add_uint32( - KEY.LLM.BLOCK_COUNT.format(arch=self.arch), length) + KEY.LLM.BLOCK_COUNT.value.format(arch=self.arch), length) def add_feed_forward_length(self, length: int) -> None: self.add_uint32( - KEY.LLM.FEED_FORWARD_LENGTH.format(arch=self.arch), length) + KEY.LLM.FEED_FORWARD_LENGTH.value.format(arch=self.arch), length) def add_parallel_residual(self, use: bool) -> None: self.add_bool( - KEY.LLM.USE_PARALLEL_RESIDUAL.format(arch=self.arch), use) + KEY.LLM.USE_PARALLEL_RESIDUAL.value.format(arch=self.arch), use) def add_head_count(self, count: int) -> None: self.add_uint32( - KEY.ATTENTION.HEAD_COUNT.format(arch=self.arch), count) + KEY.ATTENTION.HEAD_COUNT.value.format(arch=self.arch), count) def add_head_count_kv(self, count: int) -> None: self.add_uint32( - KEY.ATTENTION.HEAD_COUNT_KV.format(arch=self.arch), count) + KEY.ATTENTION.HEAD_COUNT_KV.value.format(arch=self.arch), count) def add_max_alibi_bias(self, bias: float) -> None: self.add_float32( - KEY.ATTENTION.MAX_ALIBI_BIAS.format(arch=self.arch), bias) + KEY.ATTENTION.MAX_ALIBI_BIAS.value.format(arch=self.arch), bias) def add_clamp_kqv(self, value: float) -> None: self.add_float32( - KEY.ATTENTION.CLAMP_KQV.format(arch=self.arch), value) + KEY.ATTENTION.CLAMP_KQV.value.format(arch=self.arch), value) def add_layer_norm_eps(self, value: float) -> None: self.add_float32( - KEY.ATTENTION.LAYERNORM_EPS.format(arch=self.arch), value) + KEY.ATTENTION.LAYERNORM_EPS.value.format(arch=self.arch), value) def add_layer_norm_rms_eps(self, value: float) -> None: self.add_float32( - KEY.ATTENTION.LAYERNORM_RMS_EPS.format(arch=self.arch), value) + KEY.ATTENTION.LAYERNORM_RMS_EPS.value.format(arch=self.arch), value) def add_rope_dimension_count(self, count: int) -> None: self.add_uint32( - KEY.ROPE.DIMENSION_COUNT.format(arch=self.arch), count) + KEY.ROPE.DIMENSION_COUNT.value.format(arch=self.arch), count) def add_rope_freq_base(self, value: float) -> None: - self.add_float32(KEY.ROPE.FREQ_BASE.format(arch=self.arch), value) + self.add_float32(KEY.ROPE.FREQ_BASE.value.format(arch=self.arch), value) def add_rope_scaling_type(self, value: RopeScalingType) -> None: - self.add_string(KEY.ROPE.SCALING_TYPE.format(arch=self.arch), value.value) + self.add_string(KEY.ROPE.SCALING_TYPE.value.format(arch=self.arch), value.value) def add_rope_scaling_factor(self, value: float) -> None: - self.add_float32(KEY.ROPE.SCALING_FACTOR.format(arch=self.arch), value) + self.add_float32(KEY.ROPE.SCALING_FACTOR.value.format(arch=self.arch), value) def add_rope_scaling_orig_ctx_len(self, value: int) -> None: - self.add_uint32(KEY.ROPE.SCALING_ORIG_CTX_LEN.format(arch=self.arch), value) + self.add_uint32(KEY.ROPE.SCALING_ORIG_CTX_LEN.value.format(arch=self.arch), value) def add_rope_scaling_finetuned(self, value: bool) -> None: - self.add_bool(KEY.ROPE.SCALING_FINETUNED.format(arch=self.arch), value) + self.add_bool(KEY.ROPE.SCALING_FINETUNED.value.format(arch=self.arch), value) def add_tokenizer_model(self, model: str) -> None: self.add_string(KEY.TOKENIZER.MODEL, model)