From d7b9899a31bfe54e11005f9af35fe4858cf5fb97 Mon Sep 17 00:00:00 2001
From: Younes Belkada <younesbelkada@gmail.com>
Date: Fri, 24 May 2024 07:41:19 +0200
Subject: [PATCH 1/2] Fix remaining quant tests

---
 docker/transformers-all-latest-gpu/Dockerfile          | 3 ---
 docker/transformers-quantization-latest-gpu/Dockerfile | 3 +++
 tests/quantization/quanto_integration/test_quanto.py   | 2 +-
 3 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/docker/transformers-all-latest-gpu/Dockerfile b/docker/transformers-all-latest-gpu/Dockerfile
index 930fdfb799cd..b888397f95f1 100644
--- a/docker/transformers-all-latest-gpu/Dockerfile
+++ b/docker/transformers-all-latest-gpu/Dockerfile
@@ -45,9 +45,6 @@ RUN python3 -m pip install --no-cache-dir git+https://github.com/huggingface/opt
 # For video model testing
 RUN python3 -m pip install --no-cache-dir decord av==9.2.0
 
-# For GGUF tests
-RUN python3 -m pip install --no-cache-dir gguf
-
 # Some slow tests require bnb
 RUN python3 -m pip install --no-cache-dir bitsandbytes
 
diff --git a/docker/transformers-quantization-latest-gpu/Dockerfile b/docker/transformers-quantization-latest-gpu/Dockerfile
index 2b74dca91f30..6d94dbee5aa0 100755
--- a/docker/transformers-quantization-latest-gpu/Dockerfile
+++ b/docker/transformers-quantization-latest-gpu/Dockerfile
@@ -48,6 +48,9 @@ RUN python3 -m pip install --no-cache-dir aqlm[gpu]==1.0.2
 # Add hqq for quantization testing
 RUN python3 -m pip install --no-cache-dir hqq
 
+# For GGUF tests
+RUN python3 -m pip install --no-cache-dir gguf
+
 # Add autoawq for quantization testing
 # >=v0.2.3 needed for compatibility with torch 2.2.1
 RUN python3 -m pip install --no-cache-dir https://github.com/casper-hansen/AutoAWQ/releases/download/v0.2.3/autoawq-0.2.3+cu118-cp38-cp38-linux_x86_64.whl
diff --git a/tests/quantization/quanto_integration/test_quanto.py b/tests/quantization/quanto_integration/test_quanto.py
index f57447824197..053a80f4b5cc 100644
--- a/tests/quantization/quanto_integration/test_quanto.py
+++ b/tests/quantization/quanto_integration/test_quanto.py
@@ -447,7 +447,7 @@ class QuantoKVCacheQuantizationTest(unittest.TestCase):
     def test_quantized_cache(self):
         EXPECTED_TEXT_COMPLETION = [
             "Simply put, the theory of relativity states that 1) the speed of light is the same for all observers, and 2) the laws of physics are the same for all observers.\nThe first part of the theory of relativity",
-            "My favorite all time favorite condiment is ketchup. I love it on everything. I love it on my eggs, my fries, my burgers, my hot dogs, my sandwiches, my chicken, my pizza, my sal",
+            "My favorite all time favorite condiment is ketchup. I love it on everything. I love it on my eggs, my fries, my burgers, my hot dogs, my sandwiches, my salads, my chicken, my fish",
         ]
 
         prompts = [

From 68144ed10913f9463cf07b6a31a5679811d77403 Mon Sep 17 00:00:00 2001
From: Younes Belkada <49240599+younesbelkada@users.noreply.github.com>
Date: Fri, 24 May 2024 14:06:27 +0200
Subject: [PATCH 2/2] Update test_quanto.py

---
 tests/quantization/quanto_integration/test_quanto.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/tests/quantization/quanto_integration/test_quanto.py b/tests/quantization/quanto_integration/test_quanto.py
index 053a80f4b5cc..e662300a4669 100644
--- a/tests/quantization/quanto_integration/test_quanto.py
+++ b/tests/quantization/quanto_integration/test_quanto.py
@@ -440,6 +440,7 @@ def test_quantize_activation(self):
         self.assertIn("We don't support quantizing the activations with transformers library", str(e.exception))
 
 
+@require_quanto
 @require_torch_gpu
 class QuantoKVCacheQuantizationTest(unittest.TestCase):
     @slow