Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[WIP] Quark Quantizer Support #1207

Open
wants to merge 18 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from 15 commits
Commits
Show all changes
18 commits
Select commit Hold shift + click to select a range
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion cmake/deps.txt
Original file line number Diff line number Diff line change
Expand Up @@ -14,4 +14,4 @@ pybind11;https://github.com/pybind/pybind11/archive/refs/tags/v2.10.1.zip;769b6a
googletest;https://github.com/google/googletest/archive/530d5c8c84abd2a46f38583ee817743c9b3a42b4.zip;5e3a61db2aa975cfd0f97ba92c818744e7fa7034
microsoft_wil;https://github.com/microsoft/wil/archive/refs/tags/v1.0.230629.1.zip;e4a542a323c070376f7c2d1973d0f7ddbc1d2fa5
directx_headers;https://github.com/microsoft/DirectX-Headers/archive/refs/tags/v1.613.1.zip;47653509a3371eabb156360f42faf582f314bf2e
onnxruntime_extensions;https://github.com/microsoft/onnxruntime-extensions.git;77a1812f55dbfed1fc4d9d219bdc4951ef7a6db2
onnxruntime_extensions;https://github.com/microsoft/onnxruntime-extensions.git;4e10ee046a2f035351f3fe88740bd8215a18fdb9
13 changes: 10 additions & 3 deletions src/python/py/models/builder.py
Original file line number Diff line number Diff line change
Expand Up @@ -304,8 +304,7 @@ def __init__(self, config, io_dtype, onnx_dtype, ep, cache_dir, extra_options):
}
if self.quant_type is not None:
# Create quantized attributes from quantization config
self.quant_attrs["bits"] = config.quantization_config["bits"]
self.quant_attrs["group_size"] = config.quantization_config["group_size"]
self.quant_attrs["config"] = config.quantization_config
self.quant_attrs["use_g_idx"] = config.quantization_config["desc_act"] if "desc_act" in config.quantization_config else False

def make_genai_config(self, model_name_or_path, extra_kwargs, out_dir):
Expand Down Expand Up @@ -2101,7 +2100,15 @@ def make_model(self, input_path):
from onnxruntime_genai.models.quantized_model import QuantModel
q_size = self.num_attn_heads * self.head_size
kv_size = self.num_kv_heads * self.head_size
model = QuantModel.from_pretrained(self.quant_type, input_path, self.quant_attrs["bits"], self.quant_attrs["group_size"], self.quant_attrs["use_g_idx"], q_size, kv_size, self.intermediate_size, self.num_layers)
model = QuantModel.from_pretrained(
self.quant_type,
input_path = input_path,
quant_attrs = self.quant_attrs,
q_size = q_size,
kv_size = kv_size,
intermediate_size = self.intermediate_size,
num_layers = self.num_layers,
)
else:
# Load PyTorch model
extra_kwargs = {"num_hidden_layers": self.num_layers} if "num_hidden_layers" in self.extra_options else {}
Expand Down
Loading
Loading