From d4bc3096513bd9a578b4f7ccb38c0a4a396a648c Mon Sep 17 00:00:00 2001 From: LRL-ModelCloud Date: Tue, 25 Jun 2024 15:35:48 +0800 Subject: [PATCH] fix marlin format desc_act must be False. --- tests/test_perplexity.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/test_perplexity.py b/tests/test_perplexity.py index ec70bf316..d46d40c65 100644 --- a/tests/test_perplexity.py +++ b/tests/test_perplexity.py @@ -47,7 +47,6 @@ def setUp(self): model = AutoModelForCausalLM.from_pretrained( self.NATIVE_MODEL_ID, device_map="auto", - torch_dtype=torch.float16, ) self.native_ppl = self.calculate_avg_ppl(model, self.tokenizer) @@ -89,6 +88,7 @@ def test_quantized_perplexity(self, format: FORMAT): bits=4, group_size=128, format=format, + desc_act=False if format == FORMAT.MARLIN else True ) model = GPTQModel.from_pretrained( @@ -115,5 +115,5 @@ def test_quantized_perplexity(self, format: FORMAT): print(f"Format {format}, Quantized PPL: {quantized_ppl}") # 4090: [wikitext-2-raw-v1, test, text, 512, 512] data split - # FORMAT.GTPQ and FORMAT.GTPQ_V2 ppl == 8.7954, FORMAT.MARLIN ppl == 8.9865 + # FORMAT.GTPQ and FORMAT.GTPQ_V2 ppl == 8.7863, FORMAT.MARLIN ppl == 9.0036 assert abs(quantized_ppl - self.native_ppl) < 0.6