intel · wenhuach21 · Jan 8, 2025 · Dec 24, 2024 · Dec 24, 2024 · Dec 24, 2024
diff --git a/auto_round/autoround.py b/auto_round/autoround.py
@@ -1267,6 +1267,14 @@ def save_quantized(self, output_dir=None, format="auto_round", inplace=True, **k
             if processor is not None:
                 processor.save_pretrained(output_dir)
             return
+        if format in ["gguf:q4_0", "gguf:q4_1"]:
+            if self.group_size != 32:
+                logger.error(f"{format} need group_size=32, but it is {self.group_size}, cannot export.")
+                return
+            if format == "gguf:q4_0" and not self.sym:
+                logger.warning(f"incorrect format choose, will reset to gguf:q4_1")
+            if format == "gguf:q4_1" and self.sym:
+                logger.warning(f"incorrect format choose, will reset to gguf:q4_0")
 
         from auto_round.export import EXPORT_FORMAT
         backend = format

diff --git a/auto_round/export/export_to_gguf/convert.py b/auto_round/export/export_to_gguf/convert.py
@@ -385,7 +385,7 @@ def prepare_tensors(self):
                 # n_dims is implicit in the shape
                 logger.info(
                     f"{f'%-{max_name_len}s' % f'{new_name},'} {old_dtype}"
-                    " --> {data_qtype.name}, shape = {shape_str}")
+                    f" --> {data_qtype.name}, shape = {shape_str}")
 
                 self.gguf_writer.add_tensor(new_name, data, raw_dtype=data_qtype)
 

diff --git a/auto_round/export/export_to_gguf/quant.py b/auto_round/export/export_to_gguf/quant.py
@@ -81,7 +81,7 @@ def q4_0_quant_block(blocks: np.array, scale = None, zp = None):
 def q4_1_quant_block(blocks: np.array, scale = None, zp = None):
     if scale is not None:
         d = scale.reshape((-1,1))
-        min = zp.reshape((-1,1))
+        min = zp.reshape((-1,1)) * d * -1
     else:
         max = blocks.max(axis=-1, keepdims=True)
         min = blocks.min(axis=-1, keepdims=True)

diff --git a/auto_round/script/llm.py b/auto_round/script/llm.py
@@ -269,23 +269,26 @@ def tune(args):
     if args.format is None:
         args.format = "auto_round"
     supported_formats = ["auto_round", "auto_gptq", "auto_awq", "auto_round:auto_gptq", "auto_round:auto_awq",
-                         "auto_gptq:marlin", "gguf:q4_0", "gguf:q4_1", "itrex", "iterx_xpu", "fake"]
+                         "auto_gptq:marlin", "gguf:q4_0", "gguf:q4_1", "itrex", "itrex_xpu", "fake"]
     formats = args.format.lower().replace(' ', '').split(",")
     for format in formats:
         if format not in supported_formats:
             raise ValueError(f"{format} is not supported, we only support {supported_formats}")
         if format in ["gguf:q4_0", "gguf:q4_1"]:
             args.bits = 4
-        if args.act_bits <= 8:
-            logger.warning(f"{args.format} not support for activation quantization.")
-        if args.group_size != 32:
-            logger.warning(f"{args.format} not support for group_size: {args.group_size}. "
-                "Reset group_size to 32.")
-            args.group_size = 32
-        if args.format.endswith("_0"):
-            args.asym = False
-        if args.format.endswith("_1"):
-            args.asym = True
+            if args.act_bits <= 8:
+                logger.warning(f"{args.format} not support for activation quantization.")
+            if args.group_size != 32:
+                logger.warning(f"{args.format} not support for group_size: {args.group_size}. "
+                    "Reset group_size to 32.")
+                args.group_size = 32
+            if args.format.endswith("_0") and args.asym:
+                logger.warning(f"{args.format} not support for asymmetric quantization, will reset to sym.")
+                args.asym = False
+            if args.format.endswith("_1") and not args.asym:
+                logger.warning(f"{args.format} not support for symmetric quantization, will reset to asym.")
+                args.asym = True
+            logger.info(f"export format {format}, sym = {not args.asym}, group_size = {args.group_size}")
 
     if "auto_gptq" in args.format and args.asym is True:
         print(