BITBLAS Only supported when desc_act is False.

ModelCloud · Jun 25, 2024 · 842a90c · 842a90c
1 parent cf0e7ca
commit 842a90c
Show file tree

Hide file tree

Showing 2 changed files with 5 additions and 0 deletions.
diff --git a/gptqmodel/nn_modules/qlinear/qlinear_bitblas.py b/gptqmodel/nn_modules/qlinear/qlinear_bitblas.py
@@ -63,6 +63,7 @@ def unpack_qzeros(qzeros, bits):
 class QuantLinear(BaseQuantLinear):
     QUANT_TYPE = "bitblas"
     SUPPORTED_BITS = [1, 2, 4]
+    SUPPORTED_DESC_ACT = [False]
 
     OPT_FEATURES = [1, 16, 32, 64, 128, 256, 512]
     zeros_mode = "quantized"  # "original" or "rescale" or "quantized"

diff --git a/tests/test_perplexity.py b/tests/test_perplexity.py
@@ -98,6 +98,10 @@ def test_quantized_perplexity(self, format: FORMAT):
             format=format,
         )
 
+        if format == FORMAT.MARLIN or format == FORMAT.BITBLAS:
+            # MARLIN and BITBLAS Only supported when desc_act is False.
+            quantize_config.desc_act = False
+
         model = GPTQModel.from_pretrained(
             self.NATIVE_MODEL_ID,
             quantize_config=quantize_config,