Skip to content

Commit

Permalink
BITBLAS Only supported when desc_act is False.
Browse files Browse the repository at this point in the history
  • Loading branch information
ZX-ModelCloud committed Jun 25, 2024
1 parent cf0e7ca commit 842a90c
Show file tree
Hide file tree
Showing 2 changed files with 5 additions and 0 deletions.
1 change: 1 addition & 0 deletions gptqmodel/nn_modules/qlinear/qlinear_bitblas.py
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,7 @@ def unpack_qzeros(qzeros, bits):
class QuantLinear(BaseQuantLinear):
QUANT_TYPE = "bitblas"
SUPPORTED_BITS = [1, 2, 4]
SUPPORTED_DESC_ACT = [False]

OPT_FEATURES = [1, 16, 32, 64, 128, 256, 512]
zeros_mode = "quantized" # "original" or "rescale" or "quantized"
Expand Down
4 changes: 4 additions & 0 deletions tests/test_perplexity.py
Original file line number Diff line number Diff line change
Expand Up @@ -98,6 +98,10 @@ def test_quantized_perplexity(self, format: FORMAT):
format=format,
)

if format == FORMAT.MARLIN or format == FORMAT.BITBLAS:
# MARLIN and BITBLAS Only supported when desc_act is False.
quantize_config.desc_act = False

model = GPTQModel.from_pretrained(
self.NATIVE_MODEL_ID,
quantize_config=quantize_config,
Expand Down

0 comments on commit 842a90c

Please sign in to comment.