From 9acd939dd484367cdeed68366c154ddd4cba0243 Mon Sep 17 00:00:00 2001
From: Juri Ganitkevitch <j.ganitkevitch@gmail.com>
Date: Wed, 31 Jan 2024 21:15:55 -0500
Subject: [PATCH 1/6] Add missing None check for hf_quantizer

---
 src/transformers/modeling_utils.py | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/src/transformers/modeling_utils.py b/src/transformers/modeling_utils.py
index 8a4fd6eaee4c..b644bf8f31c2 100644
--- a/src/transformers/modeling_utils.py
+++ b/src/transformers/modeling_utils.py
@@ -3727,10 +3727,11 @@ def _fix_key(key):
 
                 if param.device == torch.device("meta"):
                     value = torch.empty(*param.size(), dtype=target_dtype)
-                    if getattr(
-                        hf_quantizer, "requires_parameters_quantization", False
-                    ) or not hf_quantizer.check_quantized_param(
-                        model, param_value=value, param_name=key, state_dict={}
+                    if hf_quantizer is not None and (
+                        getattr(hf_quantizer, "requires_parameters_quantization", False)
+                        or not hf_quantizer.check_quantized_param(
+                            model, param_value=value, param_name=key, state_dict={}
+                        )
                     ):
                         set_module_tensor_to_device(model, key, "cpu", value)
                     else:

From 780ea90a7f76cdbcf6a39bcb6047049109c9375e Mon Sep 17 00:00:00 2001
From: Juri Ganitkevitch <j.ganitkevitch@gmail.com>
Date: Thu, 1 Feb 2024 12:08:38 -0500
Subject: [PATCH 2/6] Add test, fix logic.

---
 src/transformers/modeling_utils.py |  5 ++---
 tests/test_modeling_utils.py       | 12 ++++++++++++
 2 files changed, 14 insertions(+), 3 deletions(-)

diff --git a/src/transformers/modeling_utils.py b/src/transformers/modeling_utils.py
index b644bf8f31c2..34ac2e733484 100644
--- a/src/transformers/modeling_utils.py
+++ b/src/transformers/modeling_utils.py
@@ -3727,12 +3727,11 @@ def _fix_key(key):
 
                 if param.device == torch.device("meta"):
                     value = torch.empty(*param.size(), dtype=target_dtype)
-                    if hf_quantizer is not None and (
+                    if (hf_quantizer is None or
                         getattr(hf_quantizer, "requires_parameters_quantization", False)
                         or not hf_quantizer.check_quantized_param(
                             model, param_value=value, param_name=key, state_dict={}
-                        )
-                    ):
+                        )):
                         set_module_tensor_to_device(model, key, "cpu", value)
                     else:
                         hf_quantizer.create_quantized_param(model, value, key, "cpu", state_dict)
diff --git a/tests/test_modeling_utils.py b/tests/test_modeling_utils.py
index aac78e955c3e..b0125e6dd2a1 100755
--- a/tests/test_modeling_utils.py
+++ b/tests/test_modeling_utils.py
@@ -34,8 +34,10 @@
 from transformers import (
     AutoConfig,
     AutoModel,
+    AutoModelForSequenceClassification,
     OwlViTForObjectDetection,
     PretrainedConfig,
+    LlamaConfig,
     is_torch_available,
     logging,
 )
@@ -201,6 +203,7 @@ def forward(self, mask, inputs_embeds):
 
 TINY_T5 = "patrickvonplaten/t5-tiny-random"
 TINY_BERT_FOR_TOKEN_CLASSIFICATION = "hf-internal-testing/tiny-bert-for-token-classification"
+TINY_LLAMA = "seanmor5/tiny-llama-test"
 
 
 def check_models_equal(model1, model2):
@@ -300,6 +303,15 @@ def test_model_from_pretrained_with_different_pretrained_model_name(self):
                 BertModel.from_pretrained(TINY_T5)
         self.assertTrue("You are using a model of type t5 to instantiate a model of type bert" in cl.out)
 
+    @require_accelerate
+    def test_model_from_pretrained_with_none_quantization_config(self):
+        model = None
+        # Needs device_map for low_cpu_mem trigger & missing keys in base model load to trigger.
+        model = AutoModelForSequenceClassification.from_pretrained(
+            TINY_LLAMA, config=LlamaConfig.from_pretrained(TINY_LLAMA), device_map="auto", quantization_config=None
+        )
+        self.assertIsNotNone(model)
+
     def test_model_from_config_torch_dtype(self):
         # test that the model can be instantiated with dtype of user's choice - as long as it's a
         # float dtype. To make it happen config.torch_dtype needs to be set before instantiating the

From aac8f7746aa751bf734cf857fccdd6a7d4c78f17 Mon Sep 17 00:00:00 2001
From: Juri Ganitkevitch <j.ganitkevitch@gmail.com>
Date: Thu, 1 Feb 2024 12:12:11 -0500
Subject: [PATCH 3/6] make style

---
 src/transformers/modeling_utils.py | 8 +++++---
 tests/test_modeling_utils.py       | 2 +-
 2 files changed, 6 insertions(+), 4 deletions(-)

diff --git a/src/transformers/modeling_utils.py b/src/transformers/modeling_utils.py
index 34ac2e733484..d58ac2b68303 100644
--- a/src/transformers/modeling_utils.py
+++ b/src/transformers/modeling_utils.py
@@ -3727,11 +3727,13 @@ def _fix_key(key):
 
                 if param.device == torch.device("meta"):
                     value = torch.empty(*param.size(), dtype=target_dtype)
-                    if (hf_quantizer is None or
-                        getattr(hf_quantizer, "requires_parameters_quantization", False)
+                    if (
+                        hf_quantizer is None
+                        or getattr(hf_quantizer, "requires_parameters_quantization", False)
                         or not hf_quantizer.check_quantized_param(
                             model, param_value=value, param_name=key, state_dict={}
-                        )):
+                        )
+                    ):
                         set_module_tensor_to_device(model, key, "cpu", value)
                     else:
                         hf_quantizer.create_quantized_param(model, value, key, "cpu", state_dict)
diff --git a/tests/test_modeling_utils.py b/tests/test_modeling_utils.py
index b0125e6dd2a1..31718177488b 100755
--- a/tests/test_modeling_utils.py
+++ b/tests/test_modeling_utils.py
@@ -35,9 +35,9 @@
     AutoConfig,
     AutoModel,
     AutoModelForSequenceClassification,
+    LlamaConfig,
     OwlViTForObjectDetection,
     PretrainedConfig,
-    LlamaConfig,
     is_torch_available,
     logging,
 )

From 9b822bc8a75ca46cda0286ec0abac3014e60b8f4 Mon Sep 17 00:00:00 2001
From: Juri Ganitkevitch <j.ganitkevitch@gmail.com>
Date: Thu, 1 Feb 2024 20:37:27 -0500
Subject: [PATCH 4/6] Switch test model to Mistral

---
 tests/test_modeling_utils.py | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/tests/test_modeling_utils.py b/tests/test_modeling_utils.py
index 31718177488b..f9a1a77ff2b3 100755
--- a/tests/test_modeling_utils.py
+++ b/tests/test_modeling_utils.py
@@ -35,7 +35,6 @@
     AutoConfig,
     AutoModel,
     AutoModelForSequenceClassification,
-    LlamaConfig,
     OwlViTForObjectDetection,
     PretrainedConfig,
     is_torch_available,
@@ -203,7 +202,7 @@ def forward(self, mask, inputs_embeds):
 
 TINY_T5 = "patrickvonplaten/t5-tiny-random"
 TINY_BERT_FOR_TOKEN_CLASSIFICATION = "hf-internal-testing/tiny-bert-for-token-classification"
-TINY_LLAMA = "seanmor5/tiny-llama-test"
+TINY_MISTRAL = "hf-internal-testing/tiny-random-MistralForCausalLM"
 
 
 def check_models_equal(model1, model2):
@@ -308,7 +307,7 @@ def test_model_from_pretrained_with_none_quantization_config(self):
         model = None
         # Needs device_map for low_cpu_mem trigger & missing keys in base model load to trigger.
         model = AutoModelForSequenceClassification.from_pretrained(
-            TINY_LLAMA, config=LlamaConfig.from_pretrained(TINY_LLAMA), device_map="auto", quantization_config=None
+            TINY_MISTRAL, device_map="auto", quantization_config=None
         )
         self.assertIsNotNone(model)
 

From 85d6a39bb5a42fd9efa97a26b6319a1487aafce9 Mon Sep 17 00:00:00 2001
From: Juri Ganitkevitch <j.ganitkevitch@gmail.com>
Date: Thu, 1 Feb 2024 20:41:44 -0500
Subject: [PATCH 5/6] Comment

---
 tests/test_modeling_utils.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/tests/test_modeling_utils.py b/tests/test_modeling_utils.py
index f9a1a77ff2b3..4a2d4046a410 100755
--- a/tests/test_modeling_utils.py
+++ b/tests/test_modeling_utils.py
@@ -305,7 +305,8 @@ def test_model_from_pretrained_with_different_pretrained_model_name(self):
     @require_accelerate
     def test_model_from_pretrained_with_none_quantization_config(self):
         model = None
-        # Needs device_map for low_cpu_mem trigger & missing keys in base model load to trigger.
+        # Needs a device_map for to enter the low_cpu_mem branch. We also load AutoModelForSequenceClassification
+        # deliberately to enter the missing keys branch.
         model = AutoModelForSequenceClassification.from_pretrained(
             TINY_MISTRAL, device_map="auto", quantization_config=None
         )

From 60f9d9c0ce52e6ed02f6278e1399a84c8b8032e7 Mon Sep 17 00:00:00 2001
From: Younes Belkada <49240599+younesbelkada@users.noreply.github.com>
Date: Fri, 2 Feb 2024 09:16:13 +0100
Subject: [PATCH 6/6] Update tests/test_modeling_utils.py

---
 tests/test_modeling_utils.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/tests/test_modeling_utils.py b/tests/test_modeling_utils.py
index 4a2d4046a410..cef56822dc3e 100755
--- a/tests/test_modeling_utils.py
+++ b/tests/test_modeling_utils.py
@@ -304,7 +304,6 @@ def test_model_from_pretrained_with_different_pretrained_model_name(self):
 
     @require_accelerate
     def test_model_from_pretrained_with_none_quantization_config(self):
-        model = None
         # Needs a device_map for to enter the low_cpu_mem branch. We also load AutoModelForSequenceClassification
         # deliberately to enter the missing keys branch.
         model = AutoModelForSequenceClassification.from_pretrained(