From d4bc3096513bd9a578b4f7ccb38c0a4a396a648c Mon Sep 17 00:00:00 2001
From: LRL-ModelCloud <lrl@modelcloud.ai>
Date: Tue, 25 Jun 2024 15:35:48 +0800
Subject: [PATCH] fix marlin format desc_act must be False.

---
 tests/test_perplexity.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tests/test_perplexity.py b/tests/test_perplexity.py
index ec70bf316..d46d40c65 100644
--- a/tests/test_perplexity.py
+++ b/tests/test_perplexity.py
@@ -47,7 +47,6 @@ def setUp(self):
         model = AutoModelForCausalLM.from_pretrained(
             self.NATIVE_MODEL_ID,
             device_map="auto",
-            torch_dtype=torch.float16,
         )
 
         self.native_ppl = self.calculate_avg_ppl(model, self.tokenizer)
@@ -89,6 +88,7 @@ def test_quantized_perplexity(self, format: FORMAT):
             bits=4,
             group_size=128,
             format=format,
+            desc_act=False if format == FORMAT.MARLIN else True
         )
 
         model = GPTQModel.from_pretrained(
@@ -115,5 +115,5 @@ def test_quantized_perplexity(self, format: FORMAT):
             print(f"Format {format}, Quantized PPL: {quantized_ppl}")
 
             # 4090: [wikitext-2-raw-v1, test, text, 512, 512] data split
-            # FORMAT.GTPQ and FORMAT.GTPQ_V2 ppl == 8.7954, FORMAT.MARLIN ppl == 8.9865
+            # FORMAT.GTPQ and FORMAT.GTPQ_V2 ppl == 8.7863, FORMAT.MARLIN ppl == 9.0036
             assert abs(quantized_ppl - self.native_ppl) < 0.6