From 9acd939dd484367cdeed68366c154ddd4cba0243 Mon Sep 17 00:00:00 2001 From: Juri Ganitkevitch Date: Wed, 31 Jan 2024 21:15:55 -0500 Subject: [PATCH 1/6] Add missing None check for hf_quantizer --- src/transformers/modeling_utils.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/src/transformers/modeling_utils.py b/src/transformers/modeling_utils.py index 8a4fd6eaee4c..b644bf8f31c2 100644 --- a/src/transformers/modeling_utils.py +++ b/src/transformers/modeling_utils.py @@ -3727,10 +3727,11 @@ def _fix_key(key): if param.device == torch.device("meta"): value = torch.empty(*param.size(), dtype=target_dtype) - if getattr( - hf_quantizer, "requires_parameters_quantization", False - ) or not hf_quantizer.check_quantized_param( - model, param_value=value, param_name=key, state_dict={} + if hf_quantizer is not None and ( + getattr(hf_quantizer, "requires_parameters_quantization", False) + or not hf_quantizer.check_quantized_param( + model, param_value=value, param_name=key, state_dict={} + ) ): set_module_tensor_to_device(model, key, "cpu", value) else: From 780ea90a7f76cdbcf6a39bcb6047049109c9375e Mon Sep 17 00:00:00 2001 From: Juri Ganitkevitch Date: Thu, 1 Feb 2024 12:08:38 -0500 Subject: [PATCH 2/6] Add test, fix logic. --- src/transformers/modeling_utils.py | 5 ++--- tests/test_modeling_utils.py | 12 ++++++++++++ 2 files changed, 14 insertions(+), 3 deletions(-) diff --git a/src/transformers/modeling_utils.py b/src/transformers/modeling_utils.py index b644bf8f31c2..34ac2e733484 100644 --- a/src/transformers/modeling_utils.py +++ b/src/transformers/modeling_utils.py @@ -3727,12 +3727,11 @@ def _fix_key(key): if param.device == torch.device("meta"): value = torch.empty(*param.size(), dtype=target_dtype) - if hf_quantizer is not None and ( + if (hf_quantizer is None or getattr(hf_quantizer, "requires_parameters_quantization", False) or not hf_quantizer.check_quantized_param( model, param_value=value, param_name=key, state_dict={} - ) - ): + )): set_module_tensor_to_device(model, key, "cpu", value) else: hf_quantizer.create_quantized_param(model, value, key, "cpu", state_dict) diff --git a/tests/test_modeling_utils.py b/tests/test_modeling_utils.py index aac78e955c3e..b0125e6dd2a1 100755 --- a/tests/test_modeling_utils.py +++ b/tests/test_modeling_utils.py @@ -34,8 +34,10 @@ from transformers import ( AutoConfig, AutoModel, + AutoModelForSequenceClassification, OwlViTForObjectDetection, PretrainedConfig, + LlamaConfig, is_torch_available, logging, ) @@ -201,6 +203,7 @@ def forward(self, mask, inputs_embeds): TINY_T5 = "patrickvonplaten/t5-tiny-random" TINY_BERT_FOR_TOKEN_CLASSIFICATION = "hf-internal-testing/tiny-bert-for-token-classification" +TINY_LLAMA = "seanmor5/tiny-llama-test" def check_models_equal(model1, model2): @@ -300,6 +303,15 @@ def test_model_from_pretrained_with_different_pretrained_model_name(self): BertModel.from_pretrained(TINY_T5) self.assertTrue("You are using a model of type t5 to instantiate a model of type bert" in cl.out) + @require_accelerate + def test_model_from_pretrained_with_none_quantization_config(self): + model = None + # Needs device_map for low_cpu_mem trigger & missing keys in base model load to trigger. + model = AutoModelForSequenceClassification.from_pretrained( + TINY_LLAMA, config=LlamaConfig.from_pretrained(TINY_LLAMA), device_map="auto", quantization_config=None + ) + self.assertIsNotNone(model) + def test_model_from_config_torch_dtype(self): # test that the model can be instantiated with dtype of user's choice - as long as it's a # float dtype. To make it happen config.torch_dtype needs to be set before instantiating the From aac8f7746aa751bf734cf857fccdd6a7d4c78f17 Mon Sep 17 00:00:00 2001 From: Juri Ganitkevitch Date: Thu, 1 Feb 2024 12:12:11 -0500 Subject: [PATCH 3/6] make style --- src/transformers/modeling_utils.py | 8 +++++--- tests/test_modeling_utils.py | 2 +- 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/src/transformers/modeling_utils.py b/src/transformers/modeling_utils.py index 34ac2e733484..d58ac2b68303 100644 --- a/src/transformers/modeling_utils.py +++ b/src/transformers/modeling_utils.py @@ -3727,11 +3727,13 @@ def _fix_key(key): if param.device == torch.device("meta"): value = torch.empty(*param.size(), dtype=target_dtype) - if (hf_quantizer is None or - getattr(hf_quantizer, "requires_parameters_quantization", False) + if ( + hf_quantizer is None + or getattr(hf_quantizer, "requires_parameters_quantization", False) or not hf_quantizer.check_quantized_param( model, param_value=value, param_name=key, state_dict={} - )): + ) + ): set_module_tensor_to_device(model, key, "cpu", value) else: hf_quantizer.create_quantized_param(model, value, key, "cpu", state_dict) diff --git a/tests/test_modeling_utils.py b/tests/test_modeling_utils.py index b0125e6dd2a1..31718177488b 100755 --- a/tests/test_modeling_utils.py +++ b/tests/test_modeling_utils.py @@ -35,9 +35,9 @@ AutoConfig, AutoModel, AutoModelForSequenceClassification, + LlamaConfig, OwlViTForObjectDetection, PretrainedConfig, - LlamaConfig, is_torch_available, logging, ) From 9b822bc8a75ca46cda0286ec0abac3014e60b8f4 Mon Sep 17 00:00:00 2001 From: Juri Ganitkevitch Date: Thu, 1 Feb 2024 20:37:27 -0500 Subject: [PATCH 4/6] Switch test model to Mistral --- tests/test_modeling_utils.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/tests/test_modeling_utils.py b/tests/test_modeling_utils.py index 31718177488b..f9a1a77ff2b3 100755 --- a/tests/test_modeling_utils.py +++ b/tests/test_modeling_utils.py @@ -35,7 +35,6 @@ AutoConfig, AutoModel, AutoModelForSequenceClassification, - LlamaConfig, OwlViTForObjectDetection, PretrainedConfig, is_torch_available, @@ -203,7 +202,7 @@ def forward(self, mask, inputs_embeds): TINY_T5 = "patrickvonplaten/t5-tiny-random" TINY_BERT_FOR_TOKEN_CLASSIFICATION = "hf-internal-testing/tiny-bert-for-token-classification" -TINY_LLAMA = "seanmor5/tiny-llama-test" +TINY_MISTRAL = "hf-internal-testing/tiny-random-MistralForCausalLM" def check_models_equal(model1, model2): @@ -308,7 +307,7 @@ def test_model_from_pretrained_with_none_quantization_config(self): model = None # Needs device_map for low_cpu_mem trigger & missing keys in base model load to trigger. model = AutoModelForSequenceClassification.from_pretrained( - TINY_LLAMA, config=LlamaConfig.from_pretrained(TINY_LLAMA), device_map="auto", quantization_config=None + TINY_MISTRAL, device_map="auto", quantization_config=None ) self.assertIsNotNone(model) From 85d6a39bb5a42fd9efa97a26b6319a1487aafce9 Mon Sep 17 00:00:00 2001 From: Juri Ganitkevitch Date: Thu, 1 Feb 2024 20:41:44 -0500 Subject: [PATCH 5/6] Comment --- tests/test_modeling_utils.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tests/test_modeling_utils.py b/tests/test_modeling_utils.py index f9a1a77ff2b3..4a2d4046a410 100755 --- a/tests/test_modeling_utils.py +++ b/tests/test_modeling_utils.py @@ -305,7 +305,8 @@ def test_model_from_pretrained_with_different_pretrained_model_name(self): @require_accelerate def test_model_from_pretrained_with_none_quantization_config(self): model = None - # Needs device_map for low_cpu_mem trigger & missing keys in base model load to trigger. + # Needs a device_map for to enter the low_cpu_mem branch. We also load AutoModelForSequenceClassification + # deliberately to enter the missing keys branch. model = AutoModelForSequenceClassification.from_pretrained( TINY_MISTRAL, device_map="auto", quantization_config=None ) From 60f9d9c0ce52e6ed02f6278e1399a84c8b8032e7 Mon Sep 17 00:00:00 2001 From: Younes Belkada <49240599+younesbelkada@users.noreply.github.com> Date: Fri, 2 Feb 2024 09:16:13 +0100 Subject: [PATCH 6/6] Update tests/test_modeling_utils.py --- tests/test_modeling_utils.py | 1 - 1 file changed, 1 deletion(-) diff --git a/tests/test_modeling_utils.py b/tests/test_modeling_utils.py index 4a2d4046a410..cef56822dc3e 100755 --- a/tests/test_modeling_utils.py +++ b/tests/test_modeling_utils.py @@ -304,7 +304,6 @@ def test_model_from_pretrained_with_different_pretrained_model_name(self): @require_accelerate def test_model_from_pretrained_with_none_quantization_config(self): - model = None # Needs a device_map for to enter the low_cpu_mem branch. We also load AutoModelForSequenceClassification # deliberately to enter the missing keys branch. model = AutoModelForSequenceClassification.from_pretrained(