",
@@ -156,8 +156,9 @@ def test_tokenizer_padding(self):
([1] * 10) + ([0] * 10),
]
prompts = [[prompt] for prompt in self.prepare_prompts()[2]]
- max_length = processor(prompts, padding="max_length", truncation=True, max_length=20)
- longest = processor(prompts, padding="longest", truncation=True, max_length=30)
+
+ max_length = processor(prompts, padding="max_length", truncation=True, max_length=20, return_tensors="pt")
+ longest = processor(prompts, padding="longest", truncation=True, max_length=30, return_tensors="pt")
decoded_max_length = processor.tokenizer.decode(max_length["input_ids"][-1])
decoded_longest = processor.tokenizer.decode(longest["input_ids"][-1])
@@ -203,7 +204,7 @@ def test_model_input_names(self):
processor = IdeficsProcessor(tokenizer=tokenizer, image_processor=image_processor)
prompts = self.prepare_prompts()
- inputs = processor(prompts, padding="longest")
+ inputs = processor(prompts, padding="longest", return_tensors="pt")
# For now the processor supports only ['pixel_values', 'input_ids', 'attention_mask']
self.assertSetEqual(set(inputs.keys()), set(self.input_keys))
diff --git a/tests/models/idefics2/test_modeling_idefics2.py b/tests/models/idefics2/test_modeling_idefics2.py
index 5553c972e6c9..63e6316773b9 100644
--- a/tests/models/idefics2/test_modeling_idefics2.py
+++ b/tests/models/idefics2/test_modeling_idefics2.py
@@ -180,6 +180,10 @@ def setUp(self):
def test_inputs_embeds():
pass
+ @unittest.skip("input_embeds cannot be passed in without input_ids")
+ def test_inputs_embeds_matches_input_ids(self):
+ pass
+
@unittest.skip("Model does not support padding right")
def test_flash_attn_2_generate_padding_right(self):
pass
diff --git a/tests/models/imagegpt/test_modeling_imagegpt.py b/tests/models/imagegpt/test_modeling_imagegpt.py
index e18f74533520..afb5ce87764c 100644
--- a/tests/models/imagegpt/test_modeling_imagegpt.py
+++ b/tests/models/imagegpt/test_modeling_imagegpt.py
@@ -466,6 +466,31 @@ def test_inputs_embeds(self):
with torch.no_grad():
model(**inputs)[0]
+ # override because ImageGPT main input name is `pixel_values`
+ # NOTE: in latest transformers this is deprecated, `input_ids` should be used. TODO
+ def test_inputs_embeds_matches_input_ids(self):
+ config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common()
+
+ for model_class in self.all_model_classes:
+ model = model_class(config)
+ model.to(torch_device)
+ model.eval()
+
+ inputs = copy.deepcopy(self._prepare_for_class(inputs_dict, model_class))
+ with torch.no_grad():
+ out_ids = model(**inputs)[0]
+
+ pixel_values = inputs["pixel_values"]
+ del inputs["pixel_values"]
+
+ wte = model.get_input_embeddings()
+ inputs["inputs_embeds"] = wte(pixel_values)
+
+ with torch.no_grad():
+ out_embeds = model(**inputs)[0]
+
+ self.assertTrue(torch.allclose(out_embeds, out_ids))
+
def _create_and_check_torchscript(self, config, inputs_dict):
if not self.test_torchscript:
return
diff --git a/tests/models/instructblip/test_modeling_instructblip.py b/tests/models/instructblip/test_modeling_instructblip.py
index dcb8040bfcf9..86aea876fa50 100644
--- a/tests/models/instructblip/test_modeling_instructblip.py
+++ b/tests/models/instructblip/test_modeling_instructblip.py
@@ -612,3 +612,24 @@ def test_inference_flant5_xl(self):
generated_text,
"The image depicts a man ironing clothes on the back of a yellow van in the middle of a busy city street. The man is wearing a yellow shirt with a bright yellow tie, and he is using an ironing board to complete his task. The image is unusual due to the fact that it shows a man ironing clothes on the back of a van in the middle of a busy city street. It is possible that the man is trying to save money by doing his laundry on the back of the van, but it is also possible that he is trying to save time by doing his laundry on the back of the van in the middle of a busy city street. Regardless of the reason for the man's actions, it is clear that he is trying to save time by doing his laundry on the back of the van in the middle of a busy city street.",
)
+
+ def test_inference_interpolate_pos_encoding(self):
+ processor = InstructBlipProcessor.from_pretrained("Salesforce/instructblip-flan-t5-xl")
+ model = InstructBlipForConditionalGeneration.from_pretrained(
+ "Salesforce/instructblip-flan-t5-xl",
+ torch_dtype=torch.bfloat16,
+ low_cpu_mem_usage=True,
+ ).to(torch_device)
+ processor.image_processor.size = {"height": 500, "width": 500}
+
+ image = prepare_img()
+ prompt = "What's in the image?"
+ inputs = processor(images=image, text=prompt, return_tensors="pt").to(torch_device)
+
+ predictions = model.generate(**inputs, interpolate_pos_encoding=True)
+ generated_text = processor.batch_decode(predictions, skip_special_tokens=True)[0].strip()
+
+ self.assertEqual(
+ predictions[0].tolist(), [0, 37, 1023, 753, 3, 9, 2335, 3823, 30, 8, 2608, 28, 3, 9, 1782, 5, 1]
+ )
+ self.assertEqual(generated_text, "The image features a woman sitting on the beach with a dog.")
diff --git a/tests/models/jamba/test_modeling_jamba.py b/tests/models/jamba/test_modeling_jamba.py
index f8e9fdb77b20..ffe859bb59d6 100644
--- a/tests/models/jamba/test_modeling_jamba.py
+++ b/tests/models/jamba/test_modeling_jamba.py
@@ -483,7 +483,7 @@ def _prepare_model_kwargs(input_ids, attention_mask, signature):
return model_kwargs
for model_class in decoder_only_classes:
- config, input_ids, attention_mask, _ = self._get_input_ids_and_config()
+ config, input_ids, attention_mask = self._get_input_ids_and_config()
model = model_class(config).to(torch_device).eval()
signature = inspect.signature(model.forward).parameters.keys()
diff --git a/tests/models/kosmos2/test_processor_kosmos2.py b/tests/models/kosmos2/test_processor_kosmos2.py
index c3dd8c4dba58..d2223496c0c2 100644
--- a/tests/models/kosmos2/test_processor_kosmos2.py
+++ b/tests/models/kosmos2/test_processor_kosmos2.py
@@ -17,6 +17,7 @@
import shutil
import tempfile
import unittest
+from tempfile import TemporaryDirectory
import numpy as np
import pytest
@@ -84,6 +85,15 @@ def prepare_image_inputs(self):
return image_inputs
+ def test_image_procesor_load_save_reload(self):
+ # make sure load from Hub repo. -> save -> reload locally work
+ image_processor = CLIPImageProcessor.from_pretrained("microsoft/kosmos-2-patch14-224")
+ with TemporaryDirectory() as tmp_dir:
+ image_processor.save_pretrained(tmp_dir)
+ reloaded_image_processor = CLIPImageProcessor.from_pretrained(tmp_dir)
+ assert image_processor.to_dict() == reloaded_image_processor.to_dict()
+ assert image_processor.to_json_string() == reloaded_image_processor.to_json_string()
+
def test_save_load_pretrained_additional_features(self):
processor = Kosmos2Processor(tokenizer=self.get_tokenizer(), image_processor=self.get_image_processor())
processor.save_pretrained(self.tmpdirname)
diff --git a/tests/models/layoutlmv2/test_image_processing_layoutlmv2.py b/tests/models/layoutlmv2/test_image_processing_layoutlmv2.py
index b6200c3ee560..eebb7420be30 100644
--- a/tests/models/layoutlmv2/test_image_processing_layoutlmv2.py
+++ b/tests/models/layoutlmv2/test_image_processing_layoutlmv2.py
@@ -95,6 +95,7 @@ def test_image_processor_from_dict_with_kwargs(self):
image_processor = self.image_processing_class.from_dict(self.image_processor_dict, size=42)
self.assertEqual(image_processor.size, {"height": 42, "width": 42})
+ @unittest.skip("Tesseract version is not correct in ci. @Arthur FIXME")
def test_layoutlmv2_integration_test(self):
# with apply_OCR = True
image_processing = LayoutLMv2ImageProcessor()
@@ -111,9 +112,9 @@ def test_layoutlmv2_integration_test(self):
self.assertEqual(len(encoding.words), len(encoding.boxes))
# fmt: off
- # the words and boxes were obtained with Tesseract 4.1.1
+ # the words and boxes were obtained with Tesseract 5.3.0
expected_words = [['11:14', 'to', '11:39', 'a.m', '11:39', 'to', '11:44', 'a.m.', '11:44', 'a.m.', 'to', '12:25', 'p.m.', '12:25', 'to', '12:58', 'p.m.', '12:58', 'to', '4:00', 'p.m.', '2:00', 'to', '5:00', 'p.m.', 'Coffee', 'Break', 'Coffee', 'will', 'be', 'served', 'for', 'men', 'and', 'women', 'in', 'the', 'lobby', 'adjacent', 'to', 'exhibit', 'area.', 'Please', 'move', 'into', 'exhibit', 'area.', '(Exhibits', 'Open)', 'TRRF', 'GENERAL', 'SESSION', '(PART', '|)', 'Presiding:', 'Lee', 'A.', 'Waller', 'TRRF', 'Vice', 'President', '“Introductory', 'Remarks”', 'Lee', 'A.', 'Waller,', 'TRRF', 'Vice', 'Presi-', 'dent', 'Individual', 'Interviews', 'with', 'TRRF', 'Public', 'Board', 'Members', 'and', 'Sci-', 'entific', 'Advisory', 'Council', 'Mem-', 'bers', 'Conducted', 'by', 'TRRF', 'Treasurer', 'Philip', 'G.', 'Kuehn', 'to', 'get', 'answers', 'which', 'the', 'public', 'refrigerated', 'warehousing', 'industry', 'is', 'looking', 'for.', 'Plus', 'questions', 'from', 'the', 'floor.', 'Dr.', 'Emil', 'M.', 'Mrak,', 'University', 'of', 'Cal-', 'ifornia,', 'Chairman,', 'TRRF', 'Board;', 'Sam', 'R.', 'Cecil,', 'University', 'of', 'Georgia', 'College', 'of', 'Agriculture;', 'Dr.', 'Stanley', 'Charm,', 'Tufts', 'University', 'School', 'of', 'Medicine;', 'Dr.', 'Robert', 'H.', 'Cotton,', 'ITT', 'Continental', 'Baking', 'Company;', 'Dr.', 'Owen', 'Fennema,', 'University', 'of', 'Wis-', 'consin;', 'Dr.', 'Robert', 'E.', 'Hardenburg,', 'USDA.', 'Questions', 'and', 'Answers', 'Exhibits', 'Open', 'Capt.', 'Jack', 'Stoney', 'Room', 'TRRF', 'Scientific', 'Advisory', 'Council', 'Meeting', 'Ballroom', 'Foyer']] # noqa: E231
- expected_boxes = [[[141, 57, 214, 69], [228, 58, 252, 69], [141, 75, 216, 88], [230, 79, 280, 88], [142, 260, 218, 273], [230, 261, 255, 273], [143, 279, 218, 290], [231, 282, 290, 291], [143, 342, 218, 354], [231, 345, 289, 355], [202, 362, 227, 373], [143, 379, 220, 392], [231, 382, 291, 394], [144, 714, 220, 726], [231, 715, 256, 726], [144, 732, 220, 745], [232, 736, 291, 747], [144, 769, 218, 782], [231, 770, 256, 782], [141, 788, 202, 801], [215, 791, 274, 804], [143, 826, 204, 838], [215, 826, 240, 838], [142, 844, 202, 857], [215, 847, 274, 859], [334, 57, 427, 69], [440, 57, 522, 69], [369, 75, 461, 88], [469, 75, 516, 88], [528, 76, 562, 88], [570, 76, 667, 88], [675, 75, 711, 87], [721, 79, 778, 88], [789, 75, 840, 88], [369, 97, 470, 107], [484, 94, 507, 106], [518, 94, 562, 107], [576, 94, 655, 110], [668, 94, 792, 109], [804, 95, 829, 107], [369, 113, 465, 125], [477, 116, 547, 125], [562, 113, 658, 125], [671, 116, 748, 125], [761, 113, 811, 125], [369, 131, 465, 143], [477, 133, 548, 143], [563, 130, 698, 145], [710, 130, 802, 146], [336, 171, 412, 183], [423, 171, 572, 183], [582, 170, 716, 184], [728, 171, 817, 187], [829, 171, 844, 186], [338, 197, 482, 212], [507, 196, 557, 209], [569, 196, 595, 208], [610, 196, 702, 209], [505, 214, 583, 226], [595, 214, 656, 227], [670, 215, 807, 227], [335, 259, 543, 274], [556, 259, 708, 272], [372, 279, 422, 291], [435, 279, 460, 291], [474, 279, 574, 292], [587, 278, 664, 291], [676, 278, 738, 291], [751, 279, 834, 291], [372, 298, 434, 310], [335, 341, 483, 354], [497, 341, 655, 354], [667, 341, 728, 354], [740, 341, 825, 354], [335, 360, 430, 372], [442, 360, 534, 372], [545, 359, 687, 372], [697, 360, 754, 372], [765, 360, 823, 373], [334, 378, 428, 391], [440, 378, 577, 394], [590, 378, 705, 391], [720, 378, 801, 391], [334, 397, 400, 409], [370, 416, 529, 429], [544, 416, 576, 432], [587, 416, 665, 428], [677, 416, 814, 429], [372, 435, 452, 450], [465, 434, 495, 447], [511, 434, 600, 447], [611, 436, 637, 447], [649, 436, 694, 451], [705, 438, 824, 447], [369, 453, 452, 466], [464, 454, 509, 466], [522, 453, 611, 469], [625, 453, 792, 469], [370, 472, 556, 488], [570, 472, 684, 487], [697, 472, 718, 485], [732, 472, 835, 488], [369, 490, 411, 503], [425, 490, 484, 503], [496, 490, 635, 506], [645, 490, 707, 503], [718, 491, 761, 503], [771, 490, 840, 503], [336, 510, 374, 521], [388, 510, 447, 522], [460, 510, 489, 521], [503, 510, 580, 522], [592, 509, 736, 525], [745, 509, 770, 522], [781, 509, 840, 522], [338, 528, 434, 541], [448, 528, 596, 541], [609, 527, 687, 540], [700, 528, 792, 541], [336, 546, 397, 559], [407, 546, 431, 559], [443, 546, 525, 560], [537, 546, 680, 562], [688, 546, 714, 559], [722, 546, 837, 562], [336, 565, 449, 581], [461, 565, 485, 577], [497, 565, 665, 581], [681, 565, 718, 577], [732, 565, 837, 580], [337, 584, 438, 597], [452, 583, 521, 596], [535, 584, 677, 599], [690, 583, 787, 596], [801, 583, 825, 596], [338, 602, 478, 615], [492, 602, 530, 614], [543, 602, 638, 615], [650, 602, 676, 614], [688, 602, 788, 615], [802, 602, 843, 614], [337, 621, 502, 633], [516, 621, 615, 637], [629, 621, 774, 636], [789, 621, 827, 633], [337, 639, 418, 652], [432, 640, 571, 653], [587, 639, 731, 655], [743, 639, 769, 652], [780, 639, 841, 652], [338, 658, 440, 673], [455, 658, 491, 670], [508, 658, 602, 671], [616, 658, 638, 670], [654, 658, 835, 674], [337, 677, 429, 689], [337, 714, 482, 726], [495, 714, 548, 726], [561, 714, 683, 726], [338, 770, 461, 782], [474, 769, 554, 785], [489, 788, 562, 803], [576, 788, 643, 801], [656, 787, 751, 804], [764, 788, 844, 801], [334, 825, 421, 838], [430, 824, 574, 838], [584, 824, 723, 841], [335, 844, 450, 857], [464, 843, 583, 860], [628, 862, 755, 875], [769, 861, 848, 878]]] # noqa: E231
+ expected_boxes = [[[141, 57, 210, 69], [228, 58, 252, 69], [141, 75, 216, 88], [230, 79, 280, 88], [142, 260, 218, 273], [230, 261, 255, 273], [143, 279, 218, 290], [231, 282, 290, 291], [143, 342, 218, 354], [231, 345, 289, 355], [202, 362, 227, 373], [143, 379, 220, 392], [231, 382, 291, 394], [144, 714, 220, 726], [231, 715, 256, 726], [144, 732, 220, 745], [232, 736, 291, 747], [144, 769, 218, 782], [231, 770, 256, 782], [141, 788, 202, 801], [215, 791, 274, 804], [143, 826, 204, 838], [215, 826, 240, 838], [142, 844, 202, 857], [215, 847, 274, 859], [334, 57, 427, 69], [440, 57, 522, 69], [369, 75, 461, 88], [469, 75, 516, 88], [528, 76, 562, 88], [570, 76, 667, 88], [675, 75, 711, 87], [721, 79, 778, 88], [789, 75, 840, 88], [369, 97, 470, 107], [484, 94, 507, 106], [518, 94, 562, 107], [576, 94, 655, 110], [668, 94, 792, 109], [804, 95, 829, 107], [369, 113, 465, 125], [477, 116, 547, 125], [562, 113, 658, 125], [671, 116, 748, 125], [761, 113, 811, 125], [369, 131, 465, 143], [477, 133, 548, 143], [563, 130, 698, 145], [710, 130, 802, 146], [336, 171, 412, 183], [423, 171, 572, 183], [582, 170, 716, 184], [728, 171, 817, 187], [829, 171, 844, 186], [338, 197, 482, 212], [507, 196, 557, 209], [569, 196, 595, 208], [610, 196, 702, 209], [505, 214, 583, 226], [595, 214, 656, 227], [670, 215, 807, 227], [335, 259, 543, 274], [556, 259, 708, 272], [372, 279, 422, 291], [435, 279, 460, 291], [474, 279, 574, 292], [587, 278, 664, 291], [676, 278, 738, 291], [751, 279, 834, 291], [372, 298, 434, 310], [335, 341, 483, 354], [497, 341, 655, 354], [667, 341, 728, 354], [740, 341, 825, 354], [335, 360, 430, 372], [442, 360, 534, 372], [545, 359, 687, 372], [697, 360, 754, 372], [765, 360, 823, 373], [334, 378, 428, 391], [440, 378, 577, 394], [590, 378, 705, 391], [720, 378, 801, 391], [334, 397, 400, 409], [370, 416, 529, 429], [544, 416, 576, 432], [587, 416, 665, 428], [677, 416, 814, 429], [372, 435, 452, 450], [465, 434, 495, 447], [511, 434, 600, 447], [611, 436, 637, 447], [649, 436, 694, 451], [705, 438, 824, 447], [369, 453, 452, 466], [464, 454, 509, 466], [522, 453, 611, 469], [625, 453, 792, 469], [370, 472, 556, 488], [570, 472, 684, 487], [697, 472, 718, 485], [732, 472, 835, 488], [369, 490, 411, 503], [425, 490, 484, 503], [496, 490, 635, 506], [645, 490, 707, 503], [718, 491, 761, 503], [771, 490, 840, 503], [336, 510, 374, 521], [388, 510, 447, 522], [460, 510, 489, 521], [503, 510, 580, 522], [592, 509, 736, 525], [745, 509, 770, 522], [781, 509, 840, 522], [338, 528, 434, 541], [448, 528, 596, 541], [609, 527, 687, 540], [700, 528, 792, 541], [336, 546, 397, 559], [407, 546, 431, 559], [443, 546, 525, 560], [537, 546, 680, 562], [695, 546, 714, 559], [722, 546, 837, 562], [336, 565, 449, 581], [461, 565, 485, 577], [497, 565, 665, 581], [681, 565, 718, 577], [732, 565, 837, 580], [337, 584, 438, 597], [452, 583, 521, 596], [535, 584, 677, 599], [690, 583, 787, 596], [801, 583, 825, 596], [338, 602, 478, 615], [492, 602, 530, 614], [543, 602, 638, 615], [650, 602, 676, 614], [688, 602, 788, 615], [802, 602, 843, 614], [337, 621, 502, 633], [516, 621, 615, 637], [629, 621, 774, 636], [789, 621, 827, 633], [337, 639, 418, 652], [432, 640, 571, 653], [587, 639, 731, 655], [743, 639, 769, 652], [780, 639, 841, 652], [338, 658, 440, 673], [455, 658, 491, 670], [508, 658, 602, 671], [616, 658, 638, 670], [654, 658, 835, 674], [337, 677, 429, 689], [337, 714, 482, 726], [495, 714, 548, 726], [561, 714, 683, 726], [338, 770, 461, 782], [474, 769, 554, 785], [489, 788, 562, 803], [576, 788, 643, 801], [656, 787, 751, 804], [764, 788, 844, 801], [334, 825, 421, 838], [430, 824, 574, 838], [584, 824, 723, 841], [335, 844, 450, 857], [464, 843, 583, 860], [628, 862, 755, 875], [769, 861, 848, 878]]] # noqa: E231
# fmt: on
self.assertListEqual(encoding.words, expected_words)
diff --git a/tests/models/layoutlmv3/test_image_processing_layoutlmv3.py b/tests/models/layoutlmv3/test_image_processing_layoutlmv3.py
index 9b19c376d90b..8d4b64c2ccd4 100644
--- a/tests/models/layoutlmv3/test_image_processing_layoutlmv3.py
+++ b/tests/models/layoutlmv3/test_image_processing_layoutlmv3.py
@@ -111,9 +111,9 @@ def test_LayoutLMv3_integration_test(self):
self.assertEqual(len(encoding.words), len(encoding.boxes))
# fmt: off
- # the words and boxes were obtained with Tesseract 4.1.1
+ # the words and boxes were obtained with Tesseract 5.3.0
expected_words = [['11:14', 'to', '11:39', 'a.m', '11:39', 'to', '11:44', 'a.m.', '11:44', 'a.m.', 'to', '12:25', 'p.m.', '12:25', 'to', '12:58', 'p.m.', '12:58', 'to', '4:00', 'p.m.', '2:00', 'to', '5:00', 'p.m.', 'Coffee', 'Break', 'Coffee', 'will', 'be', 'served', 'for', 'men', 'and', 'women', 'in', 'the', 'lobby', 'adjacent', 'to', 'exhibit', 'area.', 'Please', 'move', 'into', 'exhibit', 'area.', '(Exhibits', 'Open)', 'TRRF', 'GENERAL', 'SESSION', '(PART', '|)', 'Presiding:', 'Lee', 'A.', 'Waller', 'TRRF', 'Vice', 'President', '“Introductory', 'Remarks”', 'Lee', 'A.', 'Waller,', 'TRRF', 'Vice', 'Presi-', 'dent', 'Individual', 'Interviews', 'with', 'TRRF', 'Public', 'Board', 'Members', 'and', 'Sci-', 'entific', 'Advisory', 'Council', 'Mem-', 'bers', 'Conducted', 'by', 'TRRF', 'Treasurer', 'Philip', 'G.', 'Kuehn', 'to', 'get', 'answers', 'which', 'the', 'public', 'refrigerated', 'warehousing', 'industry', 'is', 'looking', 'for.', 'Plus', 'questions', 'from', 'the', 'floor.', 'Dr.', 'Emil', 'M.', 'Mrak,', 'University', 'of', 'Cal-', 'ifornia,', 'Chairman,', 'TRRF', 'Board;', 'Sam', 'R.', 'Cecil,', 'University', 'of', 'Georgia', 'College', 'of', 'Agriculture;', 'Dr.', 'Stanley', 'Charm,', 'Tufts', 'University', 'School', 'of', 'Medicine;', 'Dr.', 'Robert', 'H.', 'Cotton,', 'ITT', 'Continental', 'Baking', 'Company;', 'Dr.', 'Owen', 'Fennema,', 'University', 'of', 'Wis-', 'consin;', 'Dr.', 'Robert', 'E.', 'Hardenburg,', 'USDA.', 'Questions', 'and', 'Answers', 'Exhibits', 'Open', 'Capt.', 'Jack', 'Stoney', 'Room', 'TRRF', 'Scientific', 'Advisory', 'Council', 'Meeting', 'Ballroom', 'Foyer']] # noqa: E231
- expected_boxes = [[[141, 57, 214, 69], [228, 58, 252, 69], [141, 75, 216, 88], [230, 79, 280, 88], [142, 260, 218, 273], [230, 261, 255, 273], [143, 279, 218, 290], [231, 282, 290, 291], [143, 342, 218, 354], [231, 345, 289, 355], [202, 362, 227, 373], [143, 379, 220, 392], [231, 382, 291, 394], [144, 714, 220, 726], [231, 715, 256, 726], [144, 732, 220, 745], [232, 736, 291, 747], [144, 769, 218, 782], [231, 770, 256, 782], [141, 788, 202, 801], [215, 791, 274, 804], [143, 826, 204, 838], [215, 826, 240, 838], [142, 844, 202, 857], [215, 847, 274, 859], [334, 57, 427, 69], [440, 57, 522, 69], [369, 75, 461, 88], [469, 75, 516, 88], [528, 76, 562, 88], [570, 76, 667, 88], [675, 75, 711, 87], [721, 79, 778, 88], [789, 75, 840, 88], [369, 97, 470, 107], [484, 94, 507, 106], [518, 94, 562, 107], [576, 94, 655, 110], [668, 94, 792, 109], [804, 95, 829, 107], [369, 113, 465, 125], [477, 116, 547, 125], [562, 113, 658, 125], [671, 116, 748, 125], [761, 113, 811, 125], [369, 131, 465, 143], [477, 133, 548, 143], [563, 130, 698, 145], [710, 130, 802, 146], [336, 171, 412, 183], [423, 171, 572, 183], [582, 170, 716, 184], [728, 171, 817, 187], [829, 171, 844, 186], [338, 197, 482, 212], [507, 196, 557, 209], [569, 196, 595, 208], [610, 196, 702, 209], [505, 214, 583, 226], [595, 214, 656, 227], [670, 215, 807, 227], [335, 259, 543, 274], [556, 259, 708, 272], [372, 279, 422, 291], [435, 279, 460, 291], [474, 279, 574, 292], [587, 278, 664, 291], [676, 278, 738, 291], [751, 279, 834, 291], [372, 298, 434, 310], [335, 341, 483, 354], [497, 341, 655, 354], [667, 341, 728, 354], [740, 341, 825, 354], [335, 360, 430, 372], [442, 360, 534, 372], [545, 359, 687, 372], [697, 360, 754, 372], [765, 360, 823, 373], [334, 378, 428, 391], [440, 378, 577, 394], [590, 378, 705, 391], [720, 378, 801, 391], [334, 397, 400, 409], [370, 416, 529, 429], [544, 416, 576, 432], [587, 416, 665, 428], [677, 416, 814, 429], [372, 435, 452, 450], [465, 434, 495, 447], [511, 434, 600, 447], [611, 436, 637, 447], [649, 436, 694, 451], [705, 438, 824, 447], [369, 453, 452, 466], [464, 454, 509, 466], [522, 453, 611, 469], [625, 453, 792, 469], [370, 472, 556, 488], [570, 472, 684, 487], [697, 472, 718, 485], [732, 472, 835, 488], [369, 490, 411, 503], [425, 490, 484, 503], [496, 490, 635, 506], [645, 490, 707, 503], [718, 491, 761, 503], [771, 490, 840, 503], [336, 510, 374, 521], [388, 510, 447, 522], [460, 510, 489, 521], [503, 510, 580, 522], [592, 509, 736, 525], [745, 509, 770, 522], [781, 509, 840, 522], [338, 528, 434, 541], [448, 528, 596, 541], [609, 527, 687, 540], [700, 528, 792, 541], [336, 546, 397, 559], [407, 546, 431, 559], [443, 546, 525, 560], [537, 546, 680, 562], [688, 546, 714, 559], [722, 546, 837, 562], [336, 565, 449, 581], [461, 565, 485, 577], [497, 565, 665, 581], [681, 565, 718, 577], [732, 565, 837, 580], [337, 584, 438, 597], [452, 583, 521, 596], [535, 584, 677, 599], [690, 583, 787, 596], [801, 583, 825, 596], [338, 602, 478, 615], [492, 602, 530, 614], [543, 602, 638, 615], [650, 602, 676, 614], [688, 602, 788, 615], [802, 602, 843, 614], [337, 621, 502, 633], [516, 621, 615, 637], [629, 621, 774, 636], [789, 621, 827, 633], [337, 639, 418, 652], [432, 640, 571, 653], [587, 639, 731, 655], [743, 639, 769, 652], [780, 639, 841, 652], [338, 658, 440, 673], [455, 658, 491, 670], [508, 658, 602, 671], [616, 658, 638, 670], [654, 658, 835, 674], [337, 677, 429, 689], [337, 714, 482, 726], [495, 714, 548, 726], [561, 714, 683, 726], [338, 770, 461, 782], [474, 769, 554, 785], [489, 788, 562, 803], [576, 788, 643, 801], [656, 787, 751, 804], [764, 788, 844, 801], [334, 825, 421, 838], [430, 824, 574, 838], [584, 824, 723, 841], [335, 844, 450, 857], [464, 843, 583, 860], [628, 862, 755, 875], [769, 861, 848, 878]]] # noqa: E231
+ expected_boxes = [[[141, 57, 210, 69], [228, 58, 252, 69], [141, 75, 216, 88], [230, 79, 280, 88], [142, 260, 218, 273], [230, 261, 255, 273], [143, 279, 218, 290], [231, 282, 290, 291], [143, 342, 218, 354], [231, 345, 289, 355], [202, 362, 227, 373], [143, 379, 220, 392], [231, 382, 291, 394], [144, 714, 220, 726], [231, 715, 256, 726], [144, 732, 220, 745], [232, 736, 291, 747], [144, 769, 218, 782], [231, 770, 256, 782], [141, 788, 202, 801], [215, 791, 274, 804], [143, 826, 204, 838], [215, 826, 240, 838], [142, 844, 202, 857], [215, 847, 274, 859], [334, 57, 427, 69], [440, 57, 522, 69], [369, 75, 461, 88], [469, 75, 516, 88], [528, 76, 562, 88], [570, 76, 667, 88], [675, 75, 711, 87], [721, 79, 778, 88], [789, 75, 840, 88], [369, 97, 470, 107], [484, 94, 507, 106], [518, 94, 562, 107], [576, 94, 655, 110], [668, 94, 792, 109], [804, 95, 829, 107], [369, 113, 465, 125], [477, 116, 547, 125], [562, 113, 658, 125], [671, 116, 748, 125], [761, 113, 811, 125], [369, 131, 465, 143], [477, 133, 548, 143], [563, 130, 698, 145], [710, 130, 802, 146], [336, 171, 412, 183], [423, 171, 572, 183], [582, 170, 716, 184], [728, 171, 817, 187], [829, 171, 844, 186], [338, 197, 482, 212], [507, 196, 557, 209], [569, 196, 595, 208], [610, 196, 702, 209], [505, 214, 583, 226], [595, 214, 656, 227], [670, 215, 807, 227], [335, 259, 543, 274], [556, 259, 708, 272], [372, 279, 422, 291], [435, 279, 460, 291], [474, 279, 574, 292], [587, 278, 664, 291], [676, 278, 738, 291], [751, 279, 834, 291], [372, 298, 434, 310], [335, 341, 483, 354], [497, 341, 655, 354], [667, 341, 728, 354], [740, 341, 825, 354], [335, 360, 430, 372], [442, 360, 534, 372], [545, 359, 687, 372], [697, 360, 754, 372], [765, 360, 823, 373], [334, 378, 428, 391], [440, 378, 577, 394], [590, 378, 705, 391], [720, 378, 801, 391], [334, 397, 400, 409], [370, 416, 529, 429], [544, 416, 576, 432], [587, 416, 665, 428], [677, 416, 814, 429], [372, 435, 452, 450], [465, 434, 495, 447], [511, 434, 600, 447], [611, 436, 637, 447], [649, 436, 694, 451], [705, 438, 824, 447], [369, 453, 452, 466], [464, 454, 509, 466], [522, 453, 611, 469], [625, 453, 792, 469], [370, 472, 556, 488], [570, 472, 684, 487], [697, 472, 718, 485], [732, 472, 835, 488], [369, 490, 411, 503], [425, 490, 484, 503], [496, 490, 635, 506], [645, 490, 707, 503], [718, 491, 761, 503], [771, 490, 840, 503], [336, 510, 374, 521], [388, 510, 447, 522], [460, 510, 489, 521], [503, 510, 580, 522], [592, 509, 736, 525], [745, 509, 770, 522], [781, 509, 840, 522], [338, 528, 434, 541], [448, 528, 596, 541], [609, 527, 687, 540], [700, 528, 792, 541], [336, 546, 397, 559], [407, 546, 431, 559], [443, 546, 525, 560], [537, 546, 680, 562], [695, 546, 714, 559], [722, 546, 837, 562], [336, 565, 449, 581], [461, 565, 485, 577], [497, 565, 665, 581], [681, 565, 718, 577], [732, 565, 837, 580], [337, 584, 438, 597], [452, 583, 521, 596], [535, 584, 677, 599], [690, 583, 787, 596], [801, 583, 825, 596], [338, 602, 478, 615], [492, 602, 530, 614], [543, 602, 638, 615], [650, 602, 676, 614], [688, 602, 788, 615], [802, 602, 843, 614], [337, 621, 502, 633], [516, 621, 615, 637], [629, 621, 774, 636], [789, 621, 827, 633], [337, 639, 418, 652], [432, 640, 571, 653], [587, 639, 731, 655], [743, 639, 769, 652], [780, 639, 841, 652], [338, 658, 440, 673], [455, 658, 491, 670], [508, 658, 602, 671], [616, 658, 638, 670], [654, 658, 835, 674], [337, 677, 429, 689], [337, 714, 482, 726], [495, 714, 548, 726], [561, 714, 683, 726], [338, 770, 461, 782], [474, 769, 554, 785], [489, 788, 562, 803], [576, 788, 643, 801], [656, 787, 751, 804], [764, 788, 844, 801], [334, 825, 421, 838], [430, 824, 574, 838], [584, 824, 723, 841], [335, 844, 450, 857], [464, 843, 583, 860], [628, 862, 755, 875], [769, 861, 848, 878]]] # noqa: E231
# fmt: on
self.assertListEqual(encoding.words, expected_words)
diff --git a/tests/models/led/test_modeling_led.py b/tests/models/led/test_modeling_led.py
index 120308db90d8..10d944c496fe 100644
--- a/tests/models/led/test_modeling_led.py
+++ b/tests/models/led/test_modeling_led.py
@@ -457,6 +457,20 @@ def test_attention_outputs(self):
],
)
+ def _check_encoder_attention_for_generate(self, attentions, batch_size, config, seq_length):
+ # overwrite because LED does not have (bs, num_heads, seq_len, seq_len) shape
+ encoder_expected_shape = (
+ batch_size,
+ config.num_attention_heads,
+ seq_length,
+ self.model_tester.attention_window // 2 * 2 + 1,
+ )
+ self.assertIsInstance(attentions, tuple)
+ self.assertListEqual(
+ [layer_attentions.shape for layer_attentions in attentions],
+ [encoder_expected_shape] * len(attentions),
+ )
+
def assert_tensors_close(a, b, atol=1e-12, prefix=""):
"""If tensors have different shapes, different values or a and b are not both tensors, raise a nice Assertion error."""
diff --git a/tests/models/llama/test_modeling_llama.py b/tests/models/llama/test_modeling_llama.py
index dc24fd848c81..5d402bd85994 100644
--- a/tests/models/llama/test_modeling_llama.py
+++ b/tests/models/llama/test_modeling_llama.py
@@ -12,17 +12,18 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
-""" Testing suite for the PyTorch LLaMA model. """
+"""Testing suite for the PyTorch LLaMA model."""
+import gc
import tempfile
import unittest
import pytest
+from packaging import version
from parameterized import parameterized
-from transformers import LlamaConfig, StaticCache, is_torch_available, logging, set_seed
+from transformers import LlamaConfig, StaticCache, is_torch_available, set_seed
from transformers.testing_utils import (
- CaptureLogger,
require_bitsandbytes,
require_flash_attn,
require_read_token,
@@ -591,11 +592,6 @@ def test_eager_matches_sdpa_generate(self):
msg=f"\n{tokenizer.batch_decode(res_eager)} \nvs\n{tokenizer.batch_decode(res_sdpa)}",
)
- @unittest.skip("TODO @gante fix this for Llama")
- @parameterized.expand([(1, False), (1, True), (4, False)])
- def test_new_cache_format(self, num_beams, do_sample):
- pass
-
@require_torch_gpu
class LlamaIntegrationTest(unittest.TestCase):
@@ -684,15 +680,28 @@ def test_model_13b_greedy_generation(self):
@require_torch_gpu
@require_read_token
def test_compile_static_cache(self):
+ # `torch==2.2` will throw an error on this test (as in other compilation tests), but torch==2.1.2 and torch>2.2
+ # work as intended. See https://github.com/pytorch/pytorch/issues/121943
+ if version.parse(torch.__version__) < version.parse("2.3.0"):
+ self.skipTest("This test requires torch >= 2.3 to run.")
+
NUM_TOKENS_TO_GENERATE = 40
+ # Note on `EXPECTED_TEXT_COMPLETION`'s diff: the current value matches the original test if the original test
+ # was changed to have a cache of 53 tokens (as opposed to 4096), on Ampere GPUs.
EXPECTED_TEXT_COMPLETION = {
- 7: [
- "Simply put, the theory of relativity states that 1) the speed of light is constant, 2) the speed of light is the same for all observers, and 3) the laws of physics are the same for all observers.",
- "My favorite all time favorite condiment is ketchup. I love it on everything. I love it on my eggs, my fries, my chicken, my burgers, my hot dogs, my sandwiches, my salads, my p",
- ],
8: [
- "Simply put, the theory of relativity states that 1) the speed of light is the same for all observers, and 2) the laws of physics are the same for all observers.\nThe first part of the theory of relativity",
- "My favorite all time favorite condiment is ketchup. I love it on everything. I love it on my eggs, my fries, my chicken, my burgers, my hot dogs, my sandwiches, my salads, my p",
+ "Simply put, the theory of relativity states that 1) the speed of light is constant in all inertial "
+ "reference frames, and 2) the laws of physics are the same for all inertial reference frames.\nThe "
+ "theory of relativ",
+ "My favorite all time favorite condiment is ketchup. I love it on everything. I love it on my eggs, "
+ "my fries, my chicken, my burgers, my hot dogs, my sandwiches, my salads, my p",
+ ],
+ 7: [
+ "Simply put, the theory of relativity states that 1. surely nothing is faster than light.\nThe theory "
+ "goes that nothing travels faster than light, but the faster you go, the slower everything else will "
+ "be.\nThe theory of relativity",
+ "My favorite all time favorite condiment is ketchup. I love it on hamburgers, hot dogs, fries, eggs, "
+ "and even on a good old fashioned cheeseburger. I love it on everything. I love it so",
],
}
@@ -706,38 +715,25 @@ def test_compile_static_cache(self):
)
inputs = tokenizer(prompts, return_tensors="pt", padding=True).to(model.device)
- def decode_one_tokens(model, cur_token, input_pos, cache_position):
- logits = model(
- cur_token, position_ids=input_pos, cache_position=cache_position, return_dict=False, use_cache=True
- )[0]
- new_token = torch.argmax(logits[:, -1], dim=-1)[:, None]
- return new_token
-
- batch_size, seq_length = inputs["input_ids"].shape
- with torch.no_grad():
- model._setup_cache(StaticCache, 2, max_cache_len=4096)
- cache_position = torch.arange(seq_length, device=torch_device)
- generated_ids = torch.zeros(
- batch_size, seq_length + NUM_TOKENS_TO_GENERATE + 1, dtype=torch.int, device=torch_device
- )
- generated_ids[:, cache_position] = inputs["input_ids"].to(torch_device).to(torch.int)
-
- logits = model(**inputs, cache_position=cache_position, return_dict=False, use_cache=True)[0]
- next_token = torch.argmax(logits[:, -1], dim=-1)[:, None]
- generated_ids[:, seq_length] = next_token[:, 0]
-
- decode_one_tokens = torch.compile(decode_one_tokens, mode="reduce-overhead", fullgraph=True)
- cache_position = torch.tensor([seq_length + 1], device=torch_device)
- for _ in range(1, NUM_TOKENS_TO_GENERATE):
- with torch.backends.cuda.sdp_kernel(enable_flash=False, enable_mem_efficient=False, enable_math=True):
- with CaptureLogger(logging.get_logger(__name__)) as cl:
- next_token = decode_one_tokens(model, next_token.clone(), None, cache_position)
- self.assertNotIn("skipping cudagraphs due to", cl.out)
- generated_ids[:, cache_position] = next_token.int()
- cache_position += 1
-
- text = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)
- self.assertEqual(EXPECTED_TEXT_COMPLETION[self.cuda_compute_capability_major_version], text)
+ # Dynamic Cache
+ generated_ids = model.generate(**inputs, max_new_tokens=NUM_TOKENS_TO_GENERATE, do_sample=False)
+ dynamic_text = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)
+ self.assertEqual(EXPECTED_TEXT_COMPLETION[8], dynamic_text) # Both GPU architectures have the same output
+
+ # Static Cache
+ generated_ids = model.generate(
+ **inputs, max_new_tokens=NUM_TOKENS_TO_GENERATE, do_sample=False, cache_implementation="static"
+ )
+ static_text = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)
+ self.assertEqual(EXPECTED_TEXT_COMPLETION[self.cuda_compute_capability_major_version], static_text)
+
+ # Static Cache + compile
+ model.forward = torch.compile(model.forward, mode="reduce-overhead", fullgraph=True)
+ generated_ids = model.generate(
+ **inputs, max_new_tokens=NUM_TOKENS_TO_GENERATE, do_sample=False, cache_implementation="static"
+ )
+ static_compiled_text = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)
+ self.assertEqual(EXPECTED_TEXT_COMPLETION[self.cuda_compute_capability_major_version], static_compiled_text)
@require_torch
@@ -809,7 +805,7 @@ def test_model_7b_logits(self):
' \ndef main():\n factory = InterfaceManagerFactory(start=datetime.now())\n managers = []\n for i in range(10):\n managers.append(factory.build(id=i))\n class InterfaceManagerFactory(AbstractManagerFactory):\n def __init__(',
' = 0 :=\nbegin\nsplit,\n{ intros h f,\n rw pi_1_etalisation at h,\n simp [h],\n refl\n},\n{ intro h,\n have := @quasi_adjoint C D P,\n simp [←pi_1_etalisation, this, h],\n refl\n}\nend\n /-- A quasi-prefunctoid is 1-connected iff all its etalisations are 1-connected. -/\ntheorem connected_iff_etalisation [C D : precategoroid] (P : quasi_prefunctoid C D) :\nπ₁ P = 0 ↔ '
]
- EXPECTED_IDS = torch.tensor([[ 1, 32007, 822, 3349, 29918, 5464, 29918, 294, 18869, 29898,29879, 29901, 851, 29897, 1599, 851, 29901, 13, 1678, 9995, 29871, 32008, 13, 1678, 736, 1121, 13, 32009, 15941, 1661, 29899, 28599, 2687, 4890, 515, 263, 1347, 29889, 13, 13, 1678, 826, 3174, 29901, 13, 4706, 269, 29901, 450, 1347, 304, 3349, 1661, 29899, 28599, 2687, 4890, 515, 29889, 13, 13, 1678, 16969, 29901, 13, 4706, 450, 1347, 411, 1661, 29899, 28599, 2687, 4890, 6206, 29889, 13, 1678, 9995, 13, 1678, 1121, 353, 5124, 13, 1678, 363, 274, 297, 269, 29901, 13, 4706, 565, 4356, 29898, 29883, 29897, 529, 29871, 29896, 29906, 29947, 29901, 13, 9651, 1121, 4619, 274, 32010, 2]])
+ EXPECTED_IDS = torch.tensor([[1, 32007, 822, 3349, 29918, 5464, 29918, 294, 18869, 29898, 29879, 29901, 851, 29897, 1599, 851, 29901, 13, 1678, 9995, 29871, 32008, 13, 1678, 736, 1121, 13, 32009, 15941, 1661, 29899, 28599, 2687, 4890, 515, 263, 1347, 29889, 13, 13, 1678, 826, 3174, 29901, 13, 4706, 269, 29901, 450, 1347, 304, 3349, 1661, 29899, 28599, 2687, 4890, 515, 29889, 13, 13, 1678, 16969, 29901, 13, 4706, 450, 1347, 411, 1661, 29899, 28599, 2687, 4890, 6206, 29889, 13, 1678, 9995, 13, 1678, 1121, 353, 5124, 13, 1678, 363, 274, 297, 269, 29901, 13, 4706, 565, 4356, 29898, 29883, 29897, 529, 29871, 29896, 29906, 29947, 29901, 13, 9651, 1121, 4619, 274, 32010, 2]])
# fmt: on
self.assertEqual(processed_text_suffix_first, EXPECTED_TEXT)
input_ids = tokenizer(self.PROMPTS[0], return_tensors="pt")["input_ids"]
@@ -821,3 +817,253 @@ def test_model_7b_logits(self):
]
infilling = tokenizer.batch_decode(generated_ids)
self.assertEqual(infilling, EXPECTED_INFILLING)
+
+
+@slow
+@require_torch_gpu
+class Mask4DTestHard(unittest.TestCase):
+ def tearDown(self):
+ gc.collect()
+ torch.cuda.empty_cache()
+
+ def setUp(self):
+ model_name = "TinyLlama/TinyLlama-1.1B-Chat-v1.0"
+ self.model_dtype = torch.float32
+ self.tokenizer = LlamaTokenizer.from_pretrained(model_name)
+ self.model = LlamaForCausalLM.from_pretrained(model_name, torch_dtype=self.model_dtype).to(torch_device)
+
+ def get_test_data(self):
+ template = "my favorite {}"
+ items = ("pet is a", "artist plays a", "name is L") # same number of tokens in each item
+
+ batch_separate = [template.format(x) for x in items] # 3 separate lines
+ batch_shared_prefix = template.format(" ".join(items)) # 1 line with options concatenated
+
+ input_ids = self.tokenizer(batch_separate, return_tensors="pt").input_ids.to(torch_device)
+ input_ids_shared_prefix = self.tokenizer(batch_shared_prefix, return_tensors="pt").input_ids.to(torch_device)
+
+ mask_shared_prefix = torch.tensor(
+ [
+ [
+ [
+ [1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
+ [1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
+ [1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0],
+ [1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0],
+ [1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0],
+ [1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0],
+ [1, 1, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0],
+ [1, 1, 1, 0, 0, 0, 1, 1, 0, 0, 0, 0],
+ [1, 1, 1, 0, 0, 0, 1, 1, 1, 0, 0, 0],
+ [1, 1, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0],
+ [1, 1, 1, 0, 0, 0, 0, 0, 0, 1, 1, 0],
+ [1, 1, 1, 0, 0, 0, 0, 0, 0, 1, 1, 1],
+ ]
+ ]
+ ],
+ device=torch_device,
+ )
+
+ position_ids = torch.arange(input_ids.shape[1]).tile(input_ids.shape[0], 1).to(torch_device)
+
+ # building custom positions ids based on custom mask
+ position_ids_shared_prefix = (mask_shared_prefix.sum(dim=-1) - 1).reshape(1, -1)
+ # effectively: position_ids_shared_prefix = torch.tensor([[0, 1, 2, 3, 4, 5, 3, 4, 5, 3, 4, 5]]).to(device)
+
+ # inverting the mask
+ min_dtype = torch.finfo(self.model_dtype).min
+ mask_shared_prefix = (mask_shared_prefix.eq(0.0)).to(dtype=self.model_dtype) * min_dtype
+
+ return input_ids, position_ids, input_ids_shared_prefix, mask_shared_prefix, position_ids_shared_prefix
+
+ def test_stacked_causal_mask(self):
+ (
+ input_ids,
+ position_ids,
+ input_ids_shared_prefix,
+ mask_shared_prefix,
+ position_ids_shared_prefix,
+ ) = self.get_test_data()
+
+ # regular batch
+ logits = self.model.forward(input_ids, position_ids=position_ids).logits
+ logits_last = logits[:, -1, :] # last tokens in each batch line
+ decoded = [self.tokenizer.decode(t) for t in logits_last.argmax(dim=-1)]
+
+ # single forward run with 4D custom mask
+ logits_shared_prefix = self.model.forward(
+ input_ids_shared_prefix, attention_mask=mask_shared_prefix, position_ids=position_ids_shared_prefix
+ ).logits
+ logits_shared_prefix_last = logits_shared_prefix[
+ 0, torch.where(position_ids_shared_prefix == position_ids_shared_prefix.max())[1], :
+ ] # last three tokens
+ decoded_shared_prefix = [self.tokenizer.decode(t) for t in logits_shared_prefix_last.argmax(dim=-1)]
+
+ self.assertEqual(decoded, decoded_shared_prefix)
+
+ def test_partial_stacked_causal_mask(self):
+ # Same as the test above, but the input is passed in two groups. It tests that we can pass partial 4D attention masks
+
+ (
+ input_ids,
+ position_ids,
+ input_ids_shared_prefix,
+ mask_shared_prefix,
+ position_ids_shared_prefix,
+ ) = self.get_test_data()
+
+ # regular batch
+ logits = self.model.forward(input_ids, position_ids=position_ids).logits
+ logits_last = logits[:, -1, :] # last tokens in each batch line
+ decoded = [self.tokenizer.decode(t) for t in logits_last.argmax(dim=-1)]
+
+ # 2 forward runs with custom 4D masks
+ part_a = 3 # split point
+
+ input_1a = input_ids_shared_prefix[:, :part_a]
+ position_ids_1a = position_ids_shared_prefix[:, :part_a]
+ mask_1a = mask_shared_prefix[:, :, :part_a, :part_a]
+
+ outs_1a = self.model.forward(input_1a, attention_mask=mask_1a, position_ids=position_ids_1a)
+ past_key_values_a = outs_1a["past_key_values"]
+
+ # Case 1: we pass a 4D attention mask regarding the current sequence length (i.e. [..., seq_len, full_len])
+ input_1b = input_ids_shared_prefix[:, part_a:]
+ position_ids_1b = position_ids_shared_prefix[:, part_a:]
+ mask_1b = mask_shared_prefix[:, :, part_a:, :]
+ outs_1b = self.model.forward(
+ input_1b,
+ attention_mask=mask_1b,
+ position_ids=position_ids_1b,
+ past_key_values=past_key_values_a,
+ )
+ decoded_1b = [
+ self.tokenizer.decode(t)
+ for t in outs_1b.logits.argmax(-1)[
+ 0, torch.where(position_ids_shared_prefix == position_ids_shared_prefix.max())[1] - part_a
+ ]
+ ]
+ self.assertEqual(decoded, decoded_1b)
+
+ def test_stacked_causal_mask_static_cache(self):
+ """same as above but with StaticCache"""
+ (
+ input_ids,
+ position_ids,
+ input_ids_shared_prefix,
+ mask_shared_prefix,
+ position_ids_shared_prefix,
+ ) = self.get_test_data()
+
+ # regular batch
+ logits = self.model.forward(input_ids, position_ids=position_ids).logits
+ logits_last = logits[:, -1, :] # last tokens in each batch line
+ decoded = [self.tokenizer.decode(t) for t in logits_last.argmax(dim=-1)]
+
+ # upgrade the model with StaticCache
+ max_cache_len = 16 # note that max_cache_len is greater than the attention_mask.shape[-1]
+ past_key_values = StaticCache(
+ config=self.model.config,
+ max_batch_size=1,
+ max_cache_len=max_cache_len,
+ device=torch_device,
+ dtype=self.model.dtype,
+ )
+
+ padded_attention_mask = torch.nn.functional.pad(
+ input=mask_shared_prefix,
+ pad=(0, max_cache_len - mask_shared_prefix.shape[-1]),
+ mode="constant",
+ value=torch.finfo(self.model_dtype).min,
+ )
+
+ # single forward run with 4D custom mask
+ logits_shared_prefix = self.model.forward(
+ input_ids_shared_prefix,
+ attention_mask=padded_attention_mask,
+ position_ids=position_ids_shared_prefix,
+ cache_position=torch.arange(input_ids_shared_prefix.shape[-1], device=torch_device),
+ past_key_values=past_key_values,
+ ).logits
+ logits_shared_prefix_last = logits_shared_prefix[
+ 0, torch.where(position_ids_shared_prefix == position_ids_shared_prefix.max())[1], :
+ ] # last three tokens
+ decoded_shared_prefix = [self.tokenizer.decode(t) for t in logits_shared_prefix_last.argmax(dim=-1)]
+
+ self.assertEqual(decoded, decoded_shared_prefix)
+
+ def test_partial_stacked_causal_mask_static_cache(self):
+ # Same as the test above, but the input is passed in two groups. It tests that we can pass partial 4D attention masks
+ # we pass a 4D attention mask shaped [..., seq_len, full_static_cache_len])
+ (
+ input_ids,
+ position_ids,
+ input_ids_shared_prefix,
+ mask_shared_prefix,
+ position_ids_shared_prefix,
+ ) = self.get_test_data()
+
+ # regular batch
+ logits = self.model.forward(input_ids, position_ids=position_ids).logits
+ logits_last = logits[:, -1, :] # last tokens in each batch line
+ decoded = [self.tokenizer.decode(t) for t in logits_last.argmax(dim=-1)]
+
+ # upgrade the model with StaticCache
+ max_cache_len = 16 # note that max_cache_len is greater than the attention_mask.shape[-1]
+ past_key_values = StaticCache(
+ config=self.model.config,
+ max_batch_size=1,
+ max_cache_len=max_cache_len,
+ device=torch_device,
+ dtype=self.model.dtype,
+ )
+
+ # forward run for the first part of input
+ part_a = 3 # split point
+
+ input_1a = input_ids_shared_prefix[:, :part_a]
+ position_ids_1a = position_ids_shared_prefix[:, :part_a]
+ mask_1a = mask_shared_prefix[:, :, :part_a, :part_a]
+
+ padded_mask_1a = torch.nn.functional.pad(
+ input=mask_1a,
+ pad=(0, max_cache_len - mask_1a.shape[-1]),
+ mode="constant",
+ value=torch.finfo(self.model_dtype).min,
+ )
+
+ _ = self.model.forward(
+ input_1a,
+ attention_mask=padded_mask_1a,
+ position_ids=position_ids_1a,
+ cache_position=torch.arange(part_a, device=torch_device),
+ past_key_values=past_key_values,
+ )
+
+ # forward run for the second part of input
+ input_1b = input_ids_shared_prefix[:, part_a:]
+ position_ids_1b = position_ids_shared_prefix[:, part_a:]
+ mask_1b = mask_shared_prefix[:, :, part_a:, :]
+
+ padded_mask_1b = torch.nn.functional.pad(
+ input=mask_1b, pad=(0, max_cache_len - mask_1b.shape[-1]), mode="constant", value=0
+ )
+
+ outs_1b = self.model.forward(
+ input_1b,
+ attention_mask=padded_mask_1b,
+ position_ids=position_ids_1b,
+ cache_position=torch.arange(
+ part_a,
+ input_ids_shared_prefix.shape[-1],
+ device=torch_device,
+ ),
+ past_key_values=past_key_values,
+ )
+ decoded_1b = [
+ self.tokenizer.decode(t)
+ for t in outs_1b.logits.argmax(-1)[
+ 0, torch.where(position_ids_shared_prefix == position_ids_shared_prefix.max())[1] - part_a
+ ]
+ ]
+ self.assertEqual(decoded, decoded_1b)
diff --git a/tests/models/llama/test_tokenization_llama.py b/tests/models/llama/test_tokenization_llama.py
index 5a0bcea48af1..84bd6d7a9d9b 100644
--- a/tests/models/llama/test_tokenization_llama.py
+++ b/tests/models/llama/test_tokenization_llama.py
@@ -543,8 +543,15 @@ def test_integration_test_xnli(self):
def test_special_token_special_word(self):
# the word inform should be split as ['in', 'form']
- tokenizer = LlamaTokenizer.from_pretrained("huggyllama/llama-7b", legacy=False)
+ tokenizer = LlamaTokenizerFast.from_pretrained("huggyllama/llama-7b", legacy=False, from_slow=True)
tokenizer.add_tokens([AddedToken("", rstrip=True, lstrip=True)], special_tokens=False)
+
+ example_inputs = tokenizer.tokenize("inform. Hey. .")
+ self.assertEqual(example_inputs, ["", "in", "form", "", ".", "▁Hey", ".", "▁▁▁▁▁▁", "▁."])
+
+ # Make sure dummy space is added if it is indeed the first word
+ example_inputs = tokenizer.tokenize("inform. Hey. .")
+ self.assertEqual(example_inputs, ["▁inform", "", ".", "▁Hey", ".", "▁▁▁▁▁▁", "▁."])
out1 = tokenizer.decode(
tokenizer.encode("inform", add_special_tokens=False), spaces_between_special_tokens=False
)
@@ -553,12 +560,12 @@ def test_special_token_special_word(self):
tokenizer.encode("inform", add_special_tokens=False), spaces_between_special_tokens=True
)
# decoding strips the added prefix space.
- self.assertEqual(out2, " inform")
+ self.assertEqual(out2, "inform")
input_ids = tokenizer.encode("inform", add_special_tokens=False)
- self.assertEqual(input_ids, [29871, 32000, 262, 689]) # 29871 is the spiece underline, '▁' added as it should
+ self.assertEqual(input_ids, [32000, 262, 689]) # 29871 is the spiece underline, '▁' added as it should
out2 = tokenizer.decode(
- tokenizer.encode(" inform", add_special_tokens=False), spaces_between_special_tokens=False
+ tokenizer.encode(" inform", add_special_tokens=False), spaces_between_special_tokens=False
)
# TODO @ArthurZ currently we strip left and right, so this will not keep the spaces
self.assertEqual(out2, "inform")
@@ -575,11 +582,11 @@ def test_special_token_special_word(self):
# Let's make sure that if there are any spaces, we don't remove them!
input_ids = tokenizer.encode(" Hello how", add_special_tokens=False)
- self.assertEqual(input_ids, [259, 1, 15043, 1, 920])
+ self.assertEqual(input_ids, [29871, 1, 15043, 1, 920])
tokens = tokenizer.tokenize(" Hello how", add_special_tokens=False)
- self.assertEqual(tokens, ["▁▁", "", "▁Hello", "", "▁how"])
+ self.assertEqual(tokens, ["▁", "", "▁Hello", "", "▁how"])
decoded_tokens = tokenizer.decode(input_ids)
- self.assertEqual(decoded_tokens, " Hello how")
+ self.assertEqual(decoded_tokens, " Hello how")
# Let's make sure the space is preserved
input_ids = tokenizer.encode("hello", add_special_tokens=True)
@@ -594,6 +601,63 @@ def test_special_token_special_word(self):
decoded_tokens = tokenizer.decode(input_ids)
self.assertEqual(decoded_tokens, "hello")
+ def test_no_prefix_space(self):
+ tokenizer = LlamaTokenizerFast.from_pretrained(
+ "huggyllama/llama-7b", legacy=False, from_slow=True, add_prefix_space=False
+ )
+ tokenizer.add_tokens([AddedToken("", rstrip=True, lstrip=True)], special_tokens=False)
+
+ example_inputs = tokenizer.tokenize("inform. Hey. .")
+ self.assertEqual(example_inputs, ["", "in", "form", "", ".", "▁Hey", ".", "▁▁▁▁▁▁", "▁."])
+
+ # Make sure dummy space is added if it is indeed the first word
+ example_inputs = tokenizer.tokenize("inform. Hey. .")
+ self.assertEqual(example_inputs, ["in", "form", "", ".", "▁Hey", ".", "▁▁▁▁▁▁", "▁."])
+ out1 = tokenizer.decode(
+ tokenizer.encode("inform", add_special_tokens=False), spaces_between_special_tokens=False
+ )
+ self.assertEqual(out1, "inform")
+ out2 = tokenizer.decode(
+ tokenizer.encode("inform", add_special_tokens=False), spaces_between_special_tokens=True
+ )
+ # decoding strips the added prefix space.
+ self.assertEqual(out2, "inform")
+ input_ids = tokenizer.encode("inform", add_special_tokens=False)
+ self.assertEqual(input_ids, [32000, 262, 689]) # 29871 is the spiece underline, '▁' added as it should
+
+ out2 = tokenizer.decode(
+ tokenizer.encode(" inform", add_special_tokens=False), spaces_between_special_tokens=False
+ )
+ self.assertEqual(out2, "inform")
+
+ input_ids = tokenizer.encode(" Hellohow", add_special_tokens=False)
+ self.assertEqual(input_ids, [1, 15043, 1, 3525])
+ tokens = tokenizer.tokenize(" Hellohow", add_special_tokens=False)
+ self.assertEqual(tokens, ["", "▁Hello", "", "how"])
+ decoded_tokens = tokenizer.decode(input_ids)
+ self.assertEqual(decoded_tokens, " Hellohow")
+
+ # Let's make sure that if there are any spaces, we don't remove them!
+ input_ids = tokenizer.encode(" Hello how", add_special_tokens=False)
+ self.assertEqual(input_ids, [29871, 1, 15043, 1, 920])
+ tokens = tokenizer.tokenize(" Hello how", add_special_tokens=False)
+ self.assertEqual(tokens, ["▁", "", "▁Hello", "", "▁how"])
+ decoded_tokens = tokenizer.decode(input_ids)
+ self.assertEqual(decoded_tokens, " Hello how")
+
+ # Let's make sure the space is preserved
+ input_ids = tokenizer.encode("hello", add_special_tokens=True)
+ self.assertEqual(input_ids, [1, 12199])
+ tokens = tokenizer.tokenize("hello")
+ self.assertEqual(tokens, ["hello"])
+ decoded_tokens = tokenizer.decode(input_ids)
+ self.assertEqual(decoded_tokens, "hello")
+
+ input_ids = tokenizer.encode("hello", add_special_tokens=False)
+ self.assertEqual(input_ids, [12199])
+ decoded_tokens = tokenizer.decode(input_ids)
+ self.assertEqual(decoded_tokens, "hello")
+
def test_some_edge_cases(self):
tokenizer = LlamaTokenizer.from_pretrained("huggyllama/llama-7b", legacy=False)
diff --git a/tests/models/llava/test_modeling_llava.py b/tests/models/llava/test_modeling_llava.py
index ce432e0599d7..aaf0284c0587 100644
--- a/tests/models/llava/test_modeling_llava.py
+++ b/tests/models/llava/test_modeling_llava.py
@@ -14,7 +14,6 @@
# limitations under the License.
"""Testing suite for the PyTorch Llava model."""
-import copy
import gc
import unittest
@@ -158,6 +157,19 @@ def prepare_config_and_inputs_for_common(self):
}
return config, inputs_dict
+ def create_and_check_llava_model_fp16_forward(self, config, input_ids, pixel_values, attention_mask):
+ model = LlavaForConditionalGeneration(config=config)
+ model.to(torch_device)
+ model.eval()
+ with torch.autocast(device_type="cuda", dtype=torch.float16):
+ logits = model(
+ input_ids=input_ids,
+ attention_mask=attention_mask,
+ pixel_values=pixel_values.to(torch.bfloat16),
+ return_dict=True,
+ )["logits"]
+ self.parent.assertFalse(torch.isnan(logits).any().item())
+
@require_torch
class LlavaForConditionalGenerationModelTest(ModelTesterMixin, unittest.TestCase):
@@ -192,171 +204,6 @@ def test_training_gradient_checkpointing_use_reentrant(self):
def test_training_gradient_checkpointing_use_reentrant_false(self):
pass
- # Copied from tests.test_modeling_common.ModelTesterMixin.test_resize_tokens_embeddings with config.vocab_size->config.text_config.vocab_size
- def test_resize_tokens_embeddings(self):
- (
- original_config,
- inputs_dict,
- ) = self.model_tester.prepare_config_and_inputs_for_common()
- if not self.test_resize_embeddings:
- return
-
- for model_class in self.all_model_classes:
- config = copy.deepcopy(original_config)
- model = model_class(config)
- model.to(torch_device)
-
- if self.model_tester.is_training is False:
- model.eval()
-
- model_vocab_size = config.text_config.vocab_size
- # Retrieve the embeddings and clone theme
- model_embed = model.resize_token_embeddings(model_vocab_size)
- cloned_embeddings = model_embed.weight.clone()
-
- # Check that resizing the token embeddings with a larger vocab size increases the model's vocab size
- model_embed = model.resize_token_embeddings(model_vocab_size + 10)
- self.assertEqual(model.config.text_config.vocab_size, model_vocab_size + 10)
- # Check that it actually resizes the embeddings matrix
- self.assertEqual(model_embed.weight.shape[0], cloned_embeddings.shape[0] + 10)
- # Check that the model can still do a forward pass successfully (every parameter should be resized)
- model(**self._prepare_for_class(inputs_dict, model_class))
-
- # Check that resizing the token embeddings with a smaller vocab size decreases the model's vocab size
- model_embed = model.resize_token_embeddings(model_vocab_size - 15)
- self.assertEqual(model.config.text_config.vocab_size, model_vocab_size - 15)
- # Check that it actually resizes the embeddings matrix
- self.assertEqual(model_embed.weight.shape[0], cloned_embeddings.shape[0] - 15)
-
- # Check that the model can still do a forward pass successfully (every parameter should be resized)
- # Input ids should be clamped to the maximum size of the vocabulary
- inputs_dict["input_ids"].clamp_(max=model_vocab_size - 15 - 1)
-
- # make sure that decoder_input_ids are resized as well
- if "decoder_input_ids" in inputs_dict:
- inputs_dict["decoder_input_ids"].clamp_(max=model_vocab_size - 15 - 1)
- model(**self._prepare_for_class(inputs_dict, model_class))
-
- # Check that adding and removing tokens has not modified the first part of the embedding matrix.
- models_equal = True
- for p1, p2 in zip(cloned_embeddings, model_embed.weight):
- if p1.data.ne(p2.data).sum() > 0:
- models_equal = False
-
- self.assertTrue(models_equal)
-
- config = copy.deepcopy(original_config)
- model = model_class(config)
- model.to(torch_device)
-
- model_vocab_size = config.text_config.vocab_size
- model.resize_token_embeddings(model_vocab_size + 10, pad_to_multiple_of=1)
- self.assertTrue(model.config.text_config.vocab_size + 10, model_vocab_size)
-
- model_embed = model.resize_token_embeddings(model_vocab_size, pad_to_multiple_of=64)
- self.assertTrue(model_embed.weight.shape[0] // 64, 0)
-
- self.assertTrue(model_embed.weight.shape[0], model.config.text_config.vocab_size)
- self.assertTrue(model.config.text_config.vocab_size, model.vocab_size)
-
- model_embed = model.resize_token_embeddings(model_vocab_size + 13, pad_to_multiple_of=64)
- self.assertTrue(model_embed.weight.shape[0] // 64, 0)
-
- # Check that resizing a model to a multiple of pad_to_multiple leads to a model of exactly that size
- target_dimension = 128
- model_embed = model.resize_token_embeddings(target_dimension, pad_to_multiple_of=64)
- self.assertTrue(model_embed.weight.shape[0], target_dimension)
-
- with self.assertRaisesRegex(
- ValueError,
- "Asking to pad the embedding matrix to a multiple of `1.3`, which is not and integer. Please make sure to pass an integer",
- ):
- model.resize_token_embeddings(model_vocab_size, pad_to_multiple_of=1.3)
-
- # Copied from tests.test_modeling_common.ModelTesterMixin.test_resize_embeddings_untied with config.vocab_size->config.text_config.vocab_size
- def test_resize_embeddings_untied(self):
- (
- original_config,
- inputs_dict,
- ) = self.model_tester.prepare_config_and_inputs_for_common()
- if not self.test_resize_embeddings:
- return
-
- original_config.tie_word_embeddings = False
-
- # if model cannot untied embeddings -> leave test
- if original_config.tie_word_embeddings:
- return
-
- for model_class in self.all_model_classes:
- config = copy.deepcopy(original_config)
- model = model_class(config).to(torch_device)
-
- # if no output embeddings -> leave test
- if model.get_output_embeddings() is None:
- continue
-
- # Check that resizing the token embeddings with a larger vocab size increases the model's vocab size
- model_vocab_size = config.text_config.vocab_size
- model.resize_token_embeddings(model_vocab_size + 10)
- self.assertEqual(model.config.text_config.vocab_size, model_vocab_size + 10)
- output_embeds = model.get_output_embeddings()
- self.assertEqual(output_embeds.weight.shape[0], model_vocab_size + 10)
- # Check bias if present
- if output_embeds.bias is not None:
- self.assertEqual(output_embeds.bias.shape[0], model_vocab_size + 10)
- # Check that the model can still do a forward pass successfully (every parameter should be resized)
- model(**self._prepare_for_class(inputs_dict, model_class))
-
- # Check that resizing the token embeddings with a smaller vocab size decreases the model's vocab size
- model.resize_token_embeddings(model_vocab_size - 15)
- self.assertEqual(model.config.text_config.vocab_size, model_vocab_size - 15)
- # Check that it actually resizes the embeddings matrix
- output_embeds = model.get_output_embeddings()
- self.assertEqual(output_embeds.weight.shape[0], model_vocab_size - 15)
- # Check bias if present
- if output_embeds.bias is not None:
- self.assertEqual(output_embeds.bias.shape[0], model_vocab_size - 15)
- # Check that the model can still do a forward pass successfully (every parameter should be resized)
- # Input ids should be clamped to the maximum size of the vocabulary
- inputs_dict["input_ids"].clamp_(max=model_vocab_size - 15 - 1)
- if "decoder_input_ids" in inputs_dict:
- inputs_dict["decoder_input_ids"].clamp_(max=model_vocab_size - 15 - 1)
- # Check that the model can still do a forward pass successfully (every parameter should be resized)
- model(**self._prepare_for_class(inputs_dict, model_class))
-
- # Copied from tests.test_modeling_common.ModelTesterMixin.test_tie_model_weights with config.vocab_size->config.text_config.vocab_size
- def test_tie_model_weights(self):
- if not self.test_torchscript:
- return
-
- config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common()
-
- def check_same_values(layer_1, layer_2):
- equal = True
- for p1, p2 in zip(layer_1.weight, layer_2.weight):
- if p1.data.ne(p2.data).sum() > 0:
- equal = False
- return equal
-
- for model_class in self.all_model_classes:
- config.torchscript = True
- model_not_tied = model_class(config)
- if model_not_tied.get_output_embeddings() is None:
- continue
-
- config_tied = copy.deepcopy(config)
- config_tied.torchscript = False
- model_tied = model_class(config_tied)
- params_tied = list(model_tied.parameters())
- # Check that the embedding layer and decoding layer are the same in size and in value
- # self.assertTrue(check_same_values(embeddings, decoding))
-
- # Check that after resize they remain tied.
- model_tied.resize_token_embeddings(config.text_config.vocab_size + 10)
- params_tied_2 = list(model_tied.parameters())
- self.assertEqual(len(params_tied_2), len(params_tied))
-
@require_torch
class LlavaForConditionalGenerationIntegrationTest(unittest.TestCase):
@@ -391,7 +238,7 @@ def test_small_model_integration_test(self):
@slow
@require_bitsandbytes
- def test_small_model_integration_test_llama(self):
+ def test_small_model_integration_test_llama_single(self):
# Let' s make sure we test the preprocessing to replace what is used
model_id = "llava-hf/llava-1.5-7b-hf"
@@ -404,7 +251,7 @@ def test_small_model_integration_test_llama(self):
inputs = processor(prompt, raw_image, return_tensors="pt").to(torch_device, torch.float16)
output = model.generate(**inputs, max_new_tokens=900, do_sample=False)
- EXPECTED_DECODED_TEXT = "USER: \nWhat are the things I should be cautious about when I visit this place? ASSISTANT: When visiting this place, which is a pier or dock extending over a body of water, there are a few things to be cautious about. First, be aware of the weather conditions, as sudden changes in weather can make the pier unsafe to walk on. Second, be mindful of the water depth and any potential hazards, such as submerged rocks or debris, that could cause accidents or injuries. Additionally, be cautious of the tides and currents, as they can change rapidly and pose a risk to swimmers or those who venture too close to the edge of the pier. Finally, be respectful of the environment and other visitors, and follow any posted rules or guidelines for the area." # fmt: skip
+ EXPECTED_DECODED_TEXT = "USER: \nWhat are the things I should be cautious about when I visit this place? ASSISTANT: When visiting this place, which is a pier or dock extending over a body of water, there are a few things to be cautious about. First, be aware of the weather conditions, as sudden changes in weather can make the pier unsafe to walk on. Second, be mindful of the water depth and any potential hazards, such as submerged rocks or debris, that could cause accidents or injuries. Additionally, be cautious of the tides and currents, as they can change rapidly and pose a risk to swimmers or those who venture too close to the edge of the pier. Lastly, be respectful of the environment and other visitors, as the pier is a shared space where people can enjoy the view, relax, or engage in recreational activities." # fmt: skip
self.assertEqual(
processor.decode(output[0], skip_special_tokens=True),
@@ -433,7 +280,10 @@ def test_small_model_integration_test_llama_batched(self):
EXPECTED_DECODED_TEXT = ['USER: \nWhat are the things I should be cautious about when I visit this place? What should I bring with me? ASSISTANT: When visiting this place, which is a pier or dock extending over a body of water, you', 'USER: \nWhat is this? ASSISTANT: The image features two cats lying down on a pink couch. One cat is located on'] # fmt: skip
- self.assertEqual(processor.batch_decode(output, skip_special_tokens=True), EXPECTED_DECODED_TEXT)
+ self.assertEqual(
+ processor.batch_decode(output, skip_special_tokens=True),
+ EXPECTED_DECODED_TEXT,
+ )
@slow
@require_bitsandbytes
@@ -453,7 +303,10 @@ def test_small_model_integration_test_batch(self):
output = model.generate(**inputs, max_new_tokens=20)
EXPECTED_DECODED_TEXT = ['USER: \nWhat are the things I should be cautious about when I visit this place? What should I bring with me?\nASSISTANT: When visiting this place, there are a few things to be cautious about and items to bring along', 'USER: \nWhat is this?\nASSISTANT: Cats'] # fmt: skip
- self.assertEqual(self.processor.batch_decode(output, skip_special_tokens=True), EXPECTED_DECODED_TEXT)
+ self.assertEqual(
+ self.processor.batch_decode(output, skip_special_tokens=True),
+ EXPECTED_DECODED_TEXT,
+ )
@slow
@require_bitsandbytes
@@ -480,7 +333,10 @@ def test_small_model_integration_test_llama_batched_regression(self):
EXPECTED_DECODED_TEXT = ['USER: \nWhat are the things I should be cautious about when I visit this place? What should I bring with me?\nASSISTANT: When visiting this place, which appears to be a dock or pier extending over a body of water', 'USER: \nWhat is this?\nASSISTANT: Two cats lying on a bed!\nUSER: \nAnd this?\nASSISTANT: A cat sleeping on a bed.'] # fmt: skip
- self.assertEqual(processor.batch_decode(output, skip_special_tokens=True), EXPECTED_DECODED_TEXT)
+ self.assertEqual(
+ processor.batch_decode(output, skip_special_tokens=True),
+ EXPECTED_DECODED_TEXT,
+ )
@slow
@require_torch
@@ -508,7 +364,7 @@ def test_batched_generation(self):
model = model.eval()
EXPECTED_OUTPUT = [
- "\n \nUSER: What's the the difference of two images?\nASSISTANT: In the two images, the primary difference is the presence of a small dog holding a flower in one",
+ "\n \nUSER: What's the the difference of two images?\nASSISTANT: In the two images, the primary difference is the presence of a small dog in one and a ll",
"\nUSER: Describe the image.\nASSISTANT: The image features a small, fluffy dog sitting on a sidewalk. The dog is holding",
"\nUSER: Describe the image.\nASSISTANT: The image features a lone, adult llama standing on a grassy hill. The llama",
]
@@ -591,14 +447,6 @@ def test_tokenizer_integration(self):
fast_tokenizer.add_tokens("", True)
prompt = "<|im_start|>system\nAnswer the questions.<|im_end|><|im_start|>user\n\nWhat is shown in this image?<|im_end|><|im_start|>assistant\n"
- # If the token is added as special, it's not normalized, and the only diff is the extra space after special tokens.
- # https://github.com/huggingface/transformers/pull/28881 is the fix for this.
- self.assertEqual(
- slow_tokenizer.tokenize(prompt),
- ['<|im_start|>', 'system', '\n', 'Answer', '▁the', '▁questions', '.', '<|im_end|>', '<|im_start|>', 'user', '\n', '', '\n', 'What', '▁is', '▁shown', '▁in', '▁this', '▁image', '?', '<|im_end|>', '<|im_start|>', 'ass', 'istant', '\n']
- ) # fmt: skip
-
- self.assertEqual(
- fast_tokenizer.tokenize(prompt),
- ['<|im_start|>', '▁system', '\n', 'Answer', '▁the', '▁questions', '.', '<|im_end|>', '<|im_start|>', '▁user', '\n', '', '▁', '\n', 'What', '▁is', '▁shown', '▁in', '▁this', '▁image', '?', '<|im_end|>', '<|im_start|>', '▁assistant', '\n']
- ) # fmt: skip
+ EXPECTED_OUTPUT = ['<|im_start|>', 'system', '\n', 'Answer', '▁the', '▁questions', '.', '<|im_end|>', '<|im_start|>', 'user', '\n', '', '\n', 'What', '▁is', '▁shown', '▁in', '▁this', '▁image', '?', '<|im_end|>', '<|im_start|>', 'ass', 'istant', '\n'] # fmt: skip
+ self.assertEqual(slow_tokenizer.tokenize(prompt), EXPECTED_OUTPUT)
+ self.assertEqual(fast_tokenizer.tokenize(prompt), EXPECTED_OUTPUT)
diff --git a/tests/models/llava_next/test_modeling_llava_next.py b/tests/models/llava_next/test_modeling_llava_next.py
index 1c7e32009043..0eb0611ace40 100644
--- a/tests/models/llava_next/test_modeling_llava_next.py
+++ b/tests/models/llava_next/test_modeling_llava_next.py
@@ -14,7 +14,6 @@
# limitations under the License.
""" Testing suite for the PyTorch Llava-NeXT model. """
-import copy
import gc
import unittest
@@ -28,11 +27,21 @@
is_torch_available,
is_vision_available,
)
-from transformers.testing_utils import require_bitsandbytes, require_torch, slow, torch_device
+from transformers.testing_utils import (
+ require_bitsandbytes,
+ require_torch,
+ slow,
+ torch_device,
+)
from ...generation.test_utils import GenerationTesterMixin
from ...test_configuration_common import ConfigTester
-from ...test_modeling_common import ModelTesterMixin, _config_zero_init, floats_tensor, ids_tensor
+from ...test_modeling_common import (
+ ModelTesterMixin,
+ _config_zero_init,
+ floats_tensor,
+ ids_tensor,
+)
if is_torch_available():
@@ -158,6 +167,39 @@ def prepare_config_and_inputs_for_common(self):
}
return config, inputs_dict
+ def create_and_check_llava_next_model_fp16_forward(
+ self, config, input_ids, pixel_values, attention_mask, image_sizes
+ ):
+ model = LlavaNextForConditionalGeneration(config=config)
+ model.to(torch_device)
+ model.half()
+ model.eval()
+ logits = model(
+ input_ids=input_ids,
+ attention_mask=attention_mask,
+ image_sizes=image_sizes,
+ pixel_values=pixel_values.to(torch.bfloat16),
+ return_dict=True,
+ )["logits"]
+ self.parent.assertFalse(torch.isnan(logits).any().item())
+
+ def create_and_check_llava_next_model_fp16_autocast_forward(
+ self, config, input_ids, pixel_values, attention_mask, image_sizes
+ ):
+ config.torch_dtype = torch.float16
+ model = LlavaNextForConditionalGeneration(config=config)
+ model.to(torch_device)
+ model.eval()
+ with torch.autocast(device_type="cuda", dtype=torch.float16):
+ logits = model(
+ input_ids=input_ids,
+ attention_mask=attention_mask,
+ image_sizes=image_sizes,
+ pixel_values=pixel_values.to(torch.bfloat16),
+ return_dict=True,
+ )["logits"]
+ self.parent.assertFalse(torch.isnan(logits).any().item())
+
@require_torch
class LlavaNextForConditionalGenerationModelTest(ModelTesterMixin, GenerationTesterMixin, unittest.TestCase):
@@ -215,171 +257,6 @@ def test_feed_forward_chunking(self):
def test_cpu_offload(self):
pass
- # Copied from tests.test_modeling_common.ModelTesterMixin.test_resize_tokens_embeddings with config.vocab_size->config.text_config.vocab_size
- def test_resize_tokens_embeddings(self):
- (
- original_config,
- inputs_dict,
- ) = self.model_tester.prepare_config_and_inputs_for_common()
- if not self.test_resize_embeddings:
- return
-
- for model_class in self.all_model_classes:
- config = copy.deepcopy(original_config)
- model = model_class(config)
- model.to(torch_device)
-
- if self.model_tester.is_training is False:
- model.eval()
-
- model_vocab_size = config.text_config.vocab_size
- # Retrieve the embeddings and clone theme
- model_embed = model.resize_token_embeddings(model_vocab_size)
- cloned_embeddings = model_embed.weight.clone()
-
- # Check that resizing the token embeddings with a larger vocab size increases the model's vocab size
- model_embed = model.resize_token_embeddings(model_vocab_size + 10)
- self.assertEqual(model.config.text_config.vocab_size, model_vocab_size + 10)
- # Check that it actually resizes the embeddings matrix
- self.assertEqual(model_embed.weight.shape[0], cloned_embeddings.shape[0] + 10)
- # Check that the model can still do a forward pass successfully (every parameter should be resized)
- model(**self._prepare_for_class(inputs_dict, model_class))
-
- # Check that resizing the token embeddings with a smaller vocab size decreases the model's vocab size
- model_embed = model.resize_token_embeddings(model_vocab_size - 15)
- self.assertEqual(model.config.text_config.vocab_size, model_vocab_size - 15)
- # Check that it actually resizes the embeddings matrix
- self.assertEqual(model_embed.weight.shape[0], cloned_embeddings.shape[0] - 15)
-
- # Check that the model can still do a forward pass successfully (every parameter should be resized)
- # Input ids should be clamped to the maximum size of the vocabulary
- inputs_dict["input_ids"].clamp_(max=model_vocab_size - 15 - 1)
-
- # make sure that decoder_input_ids are resized as well
- if "decoder_input_ids" in inputs_dict:
- inputs_dict["decoder_input_ids"].clamp_(max=model_vocab_size - 15 - 1)
- model(**self._prepare_for_class(inputs_dict, model_class))
-
- # Check that adding and removing tokens has not modified the first part of the embedding matrix.
- models_equal = True
- for p1, p2 in zip(cloned_embeddings, model_embed.weight):
- if p1.data.ne(p2.data).sum() > 0:
- models_equal = False
-
- self.assertTrue(models_equal)
-
- config = copy.deepcopy(original_config)
- model = model_class(config)
- model.to(torch_device)
-
- model_vocab_size = config.text_config.vocab_size
- model.resize_token_embeddings(model_vocab_size + 10, pad_to_multiple_of=1)
- self.assertTrue(model.config.text_config.vocab_size + 10, model_vocab_size)
-
- model_embed = model.resize_token_embeddings(model_vocab_size, pad_to_multiple_of=64)
- self.assertTrue(model_embed.weight.shape[0] // 64, 0)
-
- self.assertTrue(model_embed.weight.shape[0], model.config.text_config.vocab_size)
- self.assertTrue(model.config.text_config.vocab_size, model.vocab_size)
-
- model_embed = model.resize_token_embeddings(model_vocab_size + 13, pad_to_multiple_of=64)
- self.assertTrue(model_embed.weight.shape[0] // 64, 0)
-
- # Check that resizing a model to a multiple of pad_to_multiple leads to a model of exactly that size
- target_dimension = 128
- model_embed = model.resize_token_embeddings(target_dimension, pad_to_multiple_of=64)
- self.assertTrue(model_embed.weight.shape[0], target_dimension)
-
- with self.assertRaisesRegex(
- ValueError,
- "Asking to pad the embedding matrix to a multiple of `1.3`, which is not and integer. Please make sure to pass an integer",
- ):
- model.resize_token_embeddings(model_vocab_size, pad_to_multiple_of=1.3)
-
- # Copied from tests.test_modeling_common.ModelTesterMixin.test_resize_embeddings_untied with config.vocab_size->config.text_config.vocab_size
- def test_resize_embeddings_untied(self):
- (
- original_config,
- inputs_dict,
- ) = self.model_tester.prepare_config_and_inputs_for_common()
- if not self.test_resize_embeddings:
- return
-
- original_config.tie_word_embeddings = False
-
- # if model cannot untied embeddings -> leave test
- if original_config.tie_word_embeddings:
- return
-
- for model_class in self.all_model_classes:
- config = copy.deepcopy(original_config)
- model = model_class(config).to(torch_device)
-
- # if no output embeddings -> leave test
- if model.get_output_embeddings() is None:
- continue
-
- # Check that resizing the token embeddings with a larger vocab size increases the model's vocab size
- model_vocab_size = config.text_config.vocab_size
- model.resize_token_embeddings(model_vocab_size + 10)
- self.assertEqual(model.config.text_config.vocab_size, model_vocab_size + 10)
- output_embeds = model.get_output_embeddings()
- self.assertEqual(output_embeds.weight.shape[0], model_vocab_size + 10)
- # Check bias if present
- if output_embeds.bias is not None:
- self.assertEqual(output_embeds.bias.shape[0], model_vocab_size + 10)
- # Check that the model can still do a forward pass successfully (every parameter should be resized)
- model(**self._prepare_for_class(inputs_dict, model_class))
-
- # Check that resizing the token embeddings with a smaller vocab size decreases the model's vocab size
- model.resize_token_embeddings(model_vocab_size - 15)
- self.assertEqual(model.config.text_config.vocab_size, model_vocab_size - 15)
- # Check that it actually resizes the embeddings matrix
- output_embeds = model.get_output_embeddings()
- self.assertEqual(output_embeds.weight.shape[0], model_vocab_size - 15)
- # Check bias if present
- if output_embeds.bias is not None:
- self.assertEqual(output_embeds.bias.shape[0], model_vocab_size - 15)
- # Check that the model can still do a forward pass successfully (every parameter should be resized)
- # Input ids should be clamped to the maximum size of the vocabulary
- inputs_dict["input_ids"].clamp_(max=model_vocab_size - 15 - 1)
- if "decoder_input_ids" in inputs_dict:
- inputs_dict["decoder_input_ids"].clamp_(max=model_vocab_size - 15 - 1)
- # Check that the model can still do a forward pass successfully (every parameter should be resized)
- model(**self._prepare_for_class(inputs_dict, model_class))
-
- # Copied from tests.test_modeling_common.ModelTesterMixin.test_tie_model_weights with config.vocab_size->config.text_config.vocab_size
- def test_tie_model_weights(self):
- if not self.test_torchscript:
- return
-
- config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common()
-
- def check_same_values(layer_1, layer_2):
- equal = True
- for p1, p2 in zip(layer_1.weight, layer_2.weight):
- if p1.data.ne(p2.data).sum() > 0:
- equal = False
- return equal
-
- for model_class in self.all_model_classes:
- config.torchscript = True
- model_not_tied = model_class(config)
- if model_not_tied.get_output_embeddings() is None:
- continue
-
- config_tied = copy.deepcopy(config)
- config_tied.torchscript = False
- model_tied = model_class(config_tied)
- params_tied = list(model_tied.parameters())
- # Check that the embedding layer and decoding layer are the same in size and in value
- # self.assertTrue(check_same_values(embeddings, decoding))
-
- # Check that after resize they remain tied.
- model_tied.resize_token_embeddings(config.text_config.vocab_size + 10)
- params_tied_2 = list(model_tied.parameters())
- self.assertEqual(len(params_tied_2), len(params_tied))
-
@require_torch
class LlavaNextForConditionalGenerationIntegrationTest(unittest.TestCase):
@@ -405,14 +282,20 @@ def test_small_model_integration_test(self):
inputs = self.processor(self.prompt, self.image, return_tensors="pt")
# verify inputs against original implementation
- filepath = hf_hub_download(repo_id="nielsr/test-image", filename="llava_1_6_input_ids.pt", repo_type="dataset")
+ filepath = hf_hub_download(
+ repo_id="nielsr/test-image",
+ filename="llava_1_6_input_ids.pt",
+ repo_type="dataset",
+ )
original_input_ids = torch.load(filepath, map_location="cpu")
# replace -200 by image_token_index (since we use token ID = 32000 for the image token)
original_input_ids[original_input_ids == -200] = model.config.image_token_index
assert original_input_ids[0].tolist() == inputs.input_ids[0].tolist()
filepath = hf_hub_download(
- repo_id="nielsr/test-image", filename="llava_1_6_pixel_values.pt", repo_type="dataset"
+ repo_id="nielsr/test-image",
+ filename="llava_1_6_pixel_values.pt",
+ repo_type="dataset",
)
original_pixel_values = torch.load(filepath, map_location="cpu")
assert torch.allclose(original_pixel_values, inputs.pixel_values.half())
@@ -423,7 +306,11 @@ def test_small_model_integration_test(self):
output = model(**inputs)
expected_slice = torch.tensor(
- [[-4.7695, -4.5664, -0.2786], [-10.6250, -10.8906, -2.5254], [-6.7383, -7.2461, -0.6787]],
+ [
+ [-4.7695, -4.5664, -0.2786],
+ [-10.6250, -10.8906, -2.5254],
+ [-6.7383, -7.2461, -0.6787],
+ ],
dtype=torch.float32,
device=torch_device,
)
@@ -448,7 +335,10 @@ def test_small_model_integration_test_batch(self):
cats_image = Image.open(requests.get(url, stream=True).raw)
inputs = self.processor(
- [self.prompt, self.prompt], images=[self.image, cats_image], return_tensors="pt", padding=True
+ [self.prompt, self.prompt],
+ images=[self.image, cats_image],
+ return_tensors="pt",
+ padding=True,
).to(torch_device)
# make sure image_sizes are the same
@@ -458,4 +348,33 @@ def test_small_model_integration_test_batch(self):
output = model.generate(**inputs, max_new_tokens=20)
EXPECTED_DECODED_TEXT = ['[INST] \nWhat is shown in this image? [/INST] The image appears to be a radar chart, which is a type of multi-dimensional plot that displays', '[INST] \nWhat is shown in this image? [/INST] The image shows two cats lying on a pink surface, which appears to be a couch or a cush'] # fmt: skip
- self.assertEqual(self.processor.batch_decode(output, skip_special_tokens=True), EXPECTED_DECODED_TEXT)
+ self.assertEqual(
+ self.processor.batch_decode(output, skip_special_tokens=True),
+ EXPECTED_DECODED_TEXT,
+ )
+
+ @slow
+ @require_bitsandbytes
+ def test_small_model_integration_test_unk_token(self):
+ # related to (#29835)
+ model = LlavaNextForConditionalGeneration.from_pretrained(
+ "llava-hf/llava-v1.6-mistral-7b-hf",
+ load_in_4bit=True,
+ )
+
+ prompt_with_unk = "[INST] \nWhat is shown in this image? [/INST]"
+ inputs = self.processor(prompt_with_unk, self.image, return_tensors="pt")
+
+ # verify single forward pass
+ inputs = inputs.to(torch_device)
+ with torch.no_grad():
+ output = model(**inputs)
+
+ # verify generation
+ output = model.generate(**inputs, max_new_tokens=40)
+ EXPECTED_DECODED_TEXT = '[INST] \nWhat is shown in this image? [/INST] The image appears to be a radar chart, which is a type of multi-dimensional plot that displays values for multiple quantitative variables represented on axes starting from the same point. This particular radar chart' # fmt: skip
+
+ self.assertEqual(
+ self.processor.decode(output[0], skip_special_tokens=True),
+ EXPECTED_DECODED_TEXT,
+ )
diff --git a/tests/models/longt5/test_modeling_longt5.py b/tests/models/longt5/test_modeling_longt5.py
index c65af001e103..42efd5f01e65 100644
--- a/tests/models/longt5/test_modeling_longt5.py
+++ b/tests/models/longt5/test_modeling_longt5.py
@@ -752,7 +752,7 @@ def test_attention_outputs(self):
def _check_encoder_attention_for_generate(self, attentions, batch_size, config, seq_length):
block_len = getattr(self.model_tester, "block_len", None)
- encoder_expected_shape = (batch_size, 1, config.num_attention_heads, block_len, 3 * block_len)
+ encoder_expected_shape = (batch_size, 2, config.num_attention_heads, block_len, 3 * block_len)
self.assertIsInstance(attentions, tuple)
self.assertListEqual(
[layer_attentions.shape for layer_attentions in attentions],
@@ -885,7 +885,7 @@ def _check_encoder_attention_for_generate(self, attentions, batch_size, config,
global_seq_length = seq_length // global_block_size
encoder_expected_shape = (
batch_size,
- 1,
+ 2,
config.num_attention_heads,
block_len,
3 * block_len + global_seq_length,
diff --git a/tests/models/lxmert/test_modeling_lxmert.py b/tests/models/lxmert/test_modeling_lxmert.py
index 723fef6061b3..a98643b33cd7 100644
--- a/tests/models/lxmert/test_modeling_lxmert.py
+++ b/tests/models/lxmert/test_modeling_lxmert.py
@@ -766,6 +766,18 @@ def prepare_tf_inputs_from_pt_inputs(self, pt_inputs_dict):
return tf_inputs_dict
+ @unittest.skip("No support for low_cpu_mem_usage=True.")
+ def test_save_load_low_cpu_mem_usage(self):
+ pass
+
+ @unittest.skip("No support for low_cpu_mem_usage=True.")
+ def test_save_load_low_cpu_mem_usage_checkpoints(self):
+ pass
+
+ @unittest.skip("No support for low_cpu_mem_usage=True.")
+ def test_save_load_low_cpu_mem_usage_no_safetensors(self):
+ pass
+
@require_torch
class LxmertModelIntegrationTest(unittest.TestCase):
diff --git a/tests/models/marian/test_modeling_marian.py b/tests/models/marian/test_modeling_marian.py
index 593ef8e3405e..3144dd48dab2 100644
--- a/tests/models/marian/test_modeling_marian.py
+++ b/tests/models/marian/test_modeling_marian.py
@@ -372,6 +372,18 @@ def test_training_gradient_checkpointing_use_reentrant(self):
def test_training_gradient_checkpointing_use_reentrant_false(self):
pass
+ @unittest.skip("No support for low_cpu_mem_usage=True.")
+ def test_save_load_low_cpu_mem_usage(self):
+ pass
+
+ @unittest.skip("No support for low_cpu_mem_usage=True.")
+ def test_save_load_low_cpu_mem_usage_checkpoints(self):
+ pass
+
+ @unittest.skip("No support for low_cpu_mem_usage=True.")
+ def test_save_load_low_cpu_mem_usage_no_safetensors(self):
+ pass
+
def assert_tensors_close(a, b, atol=1e-12, prefix=""):
"""If tensors have different shapes, different values or a and b are not both tensors, raise a nice Assertion error."""
diff --git a/tests/models/mistral/test_modeling_mistral.py b/tests/models/mistral/test_modeling_mistral.py
index 3500024b3ea1..bbc36c050e23 100644
--- a/tests/models/mistral/test_modeling_mistral.py
+++ b/tests/models/mistral/test_modeling_mistral.py
@@ -627,3 +627,127 @@ def test_speculative_generation(self):
del model
backend_empty_cache(torch_device)
gc.collect()
+
+
+@slow
+@require_torch_gpu
+class Mask4DTestHard(unittest.TestCase):
+ def tearDown(self):
+ gc.collect()
+ torch.cuda.empty_cache()
+
+ def setUp(self):
+ model_name = "mistralai/Mistral-7B-v0.1"
+ self.model_dtype = torch.float32
+ self.tokenizer = AutoTokenizer.from_pretrained(model_name, use_fast=False)
+ self.model = MistralForCausalLM.from_pretrained(model_name, torch_dtype=self.model_dtype).to(torch_device)
+
+ def get_test_data(self):
+ template = "my favorite {}"
+ items = ("pet is a", "artist plays a", "name is L") # same number of tokens in each item
+
+ batch_separate = [template.format(x) for x in items] # 3 separate lines
+ batch_shared_prefix = template.format(" ".join(items)) # 1 line with options concatenated
+
+ input_ids = self.tokenizer(batch_separate, return_tensors="pt").input_ids.to(torch_device)
+ input_ids_shared_prefix = self.tokenizer(batch_shared_prefix, return_tensors="pt").input_ids.to(torch_device)
+
+ mask_shared_prefix = torch.tensor(
+ [
+ [
+ [
+ [1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
+ [1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
+ [1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0],
+ [1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0],
+ [1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0],
+ [1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0],
+ [1, 1, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0],
+ [1, 1, 1, 0, 0, 0, 1, 1, 0, 0, 0, 0],
+ [1, 1, 1, 0, 0, 0, 1, 1, 1, 0, 0, 0],
+ [1, 1, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0],
+ [1, 1, 1, 0, 0, 0, 0, 0, 0, 1, 1, 0],
+ [1, 1, 1, 0, 0, 0, 0, 0, 0, 1, 1, 1],
+ ]
+ ]
+ ],
+ device=torch_device,
+ )
+
+ position_ids = torch.arange(input_ids.shape[1]).tile(input_ids.shape[0], 1).to(torch_device)
+
+ # building custom positions ids based on custom mask
+ position_ids_shared_prefix = (mask_shared_prefix.sum(dim=-1) - 1).reshape(1, -1)
+ # effectively: position_ids_shared_prefix = torch.tensor([[0, 1, 2, 3, 4, 5, 3, 4, 5, 3, 4, 5]]).to(device)
+
+ # inverting the mask
+ min_dtype = torch.finfo(self.model_dtype).min
+ mask_shared_prefix = (mask_shared_prefix.eq(0.0)).to(dtype=self.model_dtype) * min_dtype
+
+ return input_ids, position_ids, input_ids_shared_prefix, mask_shared_prefix, position_ids_shared_prefix
+
+ def test_stacked_causal_mask(self):
+ (
+ input_ids,
+ position_ids,
+ input_ids_shared_prefix,
+ mask_shared_prefix,
+ position_ids_shared_prefix,
+ ) = self.get_test_data()
+
+ # regular batch
+ logits = self.model.forward(input_ids, position_ids=position_ids).logits
+ logits_last = logits[:, -1, :] # last tokens in each batch line
+ decoded = [self.tokenizer.decode(t) for t in logits_last.argmax(dim=-1)]
+
+ # single forward run with 4D custom mask
+ logits_shared_prefix = self.model.forward(
+ input_ids_shared_prefix, attention_mask=mask_shared_prefix, position_ids=position_ids_shared_prefix
+ ).logits
+ logits_shared_prefix_last = logits_shared_prefix[
+ 0, torch.where(position_ids_shared_prefix == position_ids_shared_prefix.max())[1], :
+ ] # last three tokens
+ decoded_shared_prefix = [self.tokenizer.decode(t) for t in logits_shared_prefix_last.argmax(dim=-1)]
+
+ self.assertEqual(decoded, decoded_shared_prefix)
+
+ def test_partial_stacked_causal_mask(self):
+ # Same as the test above, but the input is passed in two groups. It tests that we can pass partial 4D attention masks
+
+ (
+ input_ids,
+ position_ids,
+ input_ids_shared_prefix,
+ mask_shared_prefix,
+ position_ids_shared_prefix,
+ ) = self.get_test_data()
+
+ # regular batch
+ logits = self.model.forward(input_ids, position_ids=position_ids).logits
+ logits_last = logits[:, -1, :] # last tokens in each batch line
+ decoded = [self.tokenizer.decode(t) for t in logits_last.argmax(dim=-1)]
+
+ # 2 forward runs with custom 4D masks
+ part_a = 3 # split point
+
+ input_1a = input_ids_shared_prefix[:, :part_a]
+ position_ids_1a = position_ids_shared_prefix[:, :part_a]
+ mask_1a = mask_shared_prefix[:, :, :part_a, :part_a]
+
+ outs_1a = self.model.forward(input_1a, attention_mask=mask_1a, position_ids=position_ids_1a)
+ past_key_values_a = outs_1a["past_key_values"]
+
+ # Case 1: we pass a 4D attention mask regarding the current sequence length (i.e. [..., seq_len, full_len])
+ input_1b = input_ids_shared_prefix[:, part_a:]
+ position_ids_1b = position_ids_shared_prefix[:, part_a:]
+ mask_1b = mask_shared_prefix[:, :, part_a:, :]
+ outs_1b = self.model.forward(
+ input_1b, attention_mask=mask_1b, position_ids=position_ids_1b, past_key_values=past_key_values_a
+ )
+ decoded_1b = [
+ self.tokenizer.decode(t)
+ for t in outs_1b.logits.argmax(-1)[
+ 0, torch.where(position_ids_shared_prefix == position_ids_shared_prefix.max())[1] - part_a
+ ]
+ ]
+ self.assertEqual(decoded, decoded_1b)
diff --git a/tests/models/musicgen/test_modeling_musicgen.py b/tests/models/musicgen/test_modeling_musicgen.py
index df1df64c9cf3..8482072d73cf 100644
--- a/tests/models/musicgen/test_modeling_musicgen.py
+++ b/tests/models/musicgen/test_modeling_musicgen.py
@@ -35,6 +35,7 @@
is_torch_available,
require_flash_attn,
require_torch,
+ require_torch_accelerator,
require_torch_fp16,
require_torch_gpu,
require_torch_sdpa,
@@ -109,8 +110,7 @@ def __init__(
parent,
batch_size=4, # need batch_size != num_hidden_layers
seq_length=7,
- is_training=False,
- use_labels=False,
+ is_training=True,
vocab_size=99,
hidden_size=16,
num_hidden_layers=2,
@@ -128,7 +128,6 @@ def __init__(
self.batch_size = batch_size
self.seq_length = seq_length
self.is_training = is_training
- self.use_labels = use_labels
self.vocab_size = vocab_size
self.hidden_size = hidden_size
self.num_hidden_layers = num_hidden_layers
@@ -148,7 +147,9 @@ def prepare_config_and_inputs(self):
config = self.get_config()
inputs_dict = prepare_musicgen_decoder_inputs_dict(
- config, input_ids, encoder_hidden_states=encoder_hidden_states
+ config,
+ input_ids,
+ encoder_hidden_states=encoder_hidden_states,
)
return config, inputs_dict
@@ -189,6 +190,45 @@ def setUp(self):
def test_config(self):
self.config_tester.run_common_tests()
+ # special case for labels
+ def _prepare_for_class(self, inputs_dict, model_class, return_labels=False):
+ inputs_dict = super()._prepare_for_class(inputs_dict, model_class, return_labels=return_labels)
+
+ if return_labels:
+ inputs_dict["labels"] = torch.zeros(
+ (self.model_tester.batch_size, self.model_tester.seq_length, self.model_tester.num_codebooks),
+ dtype=torch.long,
+ device=torch_device,
+ )
+ return inputs_dict
+
+ def check_training_gradient_checkpointing(self, gradient_checkpointing_kwargs=None):
+ if not self.model_tester.is_training:
+ return
+
+ config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common()
+ config.use_cache = False
+ config.return_dict = True
+ model = MusicgenForCausalLM(config)
+
+ model.to(torch_device)
+ model.gradient_checkpointing_enable(gradient_checkpointing_kwargs=gradient_checkpointing_kwargs)
+ model.train()
+
+ # Contrarily to the initial method, we don't unfreeze freezed parameters.
+ # Indeed, sinusoidal position embeddings have frozen weights that should stay frozen.
+
+ optimizer = torch.optim.SGD(model.parameters(), lr=0.01)
+
+ inputs = self._prepare_for_class(inputs_dict, MusicgenForCausalLM, return_labels=True)
+ loss = model(**inputs).loss
+ loss.backward()
+ optimizer.step()
+
+ for k, v in model.named_parameters():
+ if v.requires_grad:
+ self.assertTrue(v.grad is not None, f"{k} in {MusicgenForCausalLM.__name__} has no gradient!")
+
# override since we have to compute the input embeddings over codebooks
def test_inputs_embeds(self):
config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common()
@@ -225,6 +265,10 @@ def test_model_common_attributes(self):
lm_heads = model.get_output_embeddings()
self.assertTrue(lm_heads is None or isinstance(lm_heads[0], torch.nn.Linear))
+ @unittest.skip(reason="MusicGen does not use inputs_embeds")
+ def test_inputs_embeds_matches_input_ids(self):
+ pass
+
# skip as this model doesn't support all arguments tested
def test_model_outputs_equivalence(self):
pass
@@ -245,34 +289,28 @@ def _get_input_ids_and_config(self, batch_size=2):
sequence_length = input_ids.shape[-1]
input_ids = input_ids[: batch_size * config.num_codebooks, :]
- # generate max 3 tokens
- max_length = input_ids.shape[-1] + 3
attention_mask = torch.ones((batch_size, sequence_length), dtype=torch.long)
- return config, input_ids, attention_mask, max_length
+ return config, input_ids, attention_mask
@staticmethod
def _get_logits_processor_and_warper_kwargs(
input_length,
forced_bos_token_id=None,
forced_eos_token_id=None,
- max_length=None,
):
- process_kwargs = {
- "min_length": input_length + 1 if max_length is None else max_length - 1,
- }
+ process_kwargs = {}
warper_kwargs = {}
return process_kwargs, warper_kwargs
def test_greedy_generate_stereo_outputs(self):
for model_class in self.greedy_sample_model_classes:
- config, input_ids, attention_mask, max_length = self._get_input_ids_and_config()
+ config, input_ids, attention_mask = self._get_input_ids_and_config()
config.audio_channels = 2
model = model_class(config).to(torch_device).eval()
output_generate = self._greedy_generate(
model=model,
input_ids=input_ids.to(torch_device),
attention_mask=attention_mask.to(torch_device),
- max_length=max_length,
output_scores=True,
output_hidden_states=True,
output_attentions=True,
@@ -902,6 +940,7 @@ def prepare_musicgen_inputs_dict(
head_mask=None,
decoder_head_mask=None,
cross_attn_head_mask=None,
+ labels=None,
):
if decoder_attention_mask is None:
decoder_attention_mask = decoder_input_ids.reshape(
@@ -928,6 +967,7 @@ def prepare_musicgen_inputs_dict(
"head_mask": head_mask,
"decoder_head_mask": decoder_head_mask,
"cross_attn_head_mask": cross_attn_head_mask,
+ "labels": labels,
}
@@ -937,8 +977,7 @@ def __init__(
parent,
batch_size=4, # need batch_size != num_hidden_layers
seq_length=7,
- is_training=False,
- use_labels=False,
+ is_training=True,
vocab_size=99,
hidden_size=16,
num_hidden_layers=2,
@@ -958,7 +997,6 @@ def __init__(
self.batch_size = batch_size
self.seq_length = seq_length
self.is_training = is_training
- self.use_labels = use_labels
self.vocab_size = vocab_size
self.hidden_size = hidden_size
self.num_hidden_layers = num_hidden_layers
@@ -1032,6 +1070,47 @@ class MusicgenTest(ModelTesterMixin, GenerationTesterMixin, PipelineTesterMixin,
def setUp(self):
self.model_tester = MusicgenTester(self)
+ # special case for labels
+ def _prepare_for_class(self, inputs_dict, model_class, return_labels=False):
+ inputs_dict = super()._prepare_for_class(inputs_dict, model_class, return_labels=return_labels)
+
+ if return_labels:
+ inputs_dict["labels"] = torch.zeros(
+ (self.model_tester.batch_size, self.model_tester.seq_length, self.model_tester.num_codebooks),
+ dtype=torch.long,
+ device=torch_device,
+ )
+ return inputs_dict
+
+ def check_training_gradient_checkpointing(self, gradient_checkpointing_kwargs=None):
+ if not self.model_tester.is_training:
+ return
+
+ for model_class in self.all_model_classes:
+ config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common()
+ config.use_cache = False
+ config.return_dict = True
+ model = model_class(config)
+
+ model.to(torch_device)
+ model.gradient_checkpointing_enable(gradient_checkpointing_kwargs=gradient_checkpointing_kwargs)
+ model.train()
+
+ # The audio encoder weights are not used during the forward pass (only during the generate pass)
+ # So we need to freeze it to be able to train.
+ model.freeze_audio_encoder()
+
+ optimizer = torch.optim.SGD(model.parameters(), lr=0.01)
+
+ inputs = self._prepare_for_class(inputs_dict, model_class, return_labels=True)
+ loss = model(**inputs).loss
+ loss.backward()
+ optimizer.step()
+
+ for k, v in model.named_parameters():
+ if v.requires_grad:
+ self.assertTrue(v.grad is not None, f"{k} in {model_class.__name__} has no gradient!")
+
def _check_output_with_attentions(self, outputs, config, input_ids, decoder_input_ids):
text_encoder_config = config.text_encoder
decoder_config = config.decoder
@@ -1194,6 +1273,18 @@ def test_tied_model_weights_key_ignore(self):
def test_tied_weights_keys(self):
pass
+ @unittest.skip("No support for low_cpu_mem_usage=True.")
+ def test_save_load_low_cpu_mem_usage(self):
+ pass
+
+ @unittest.skip("No support for low_cpu_mem_usage=True.")
+ def test_save_load_low_cpu_mem_usage_checkpoints(self):
+ pass
+
+ @unittest.skip("No support for low_cpu_mem_usage=True.")
+ def test_save_load_low_cpu_mem_usage_no_safetensors(self):
+ pass
+
# override since changing `output_hidden_states` / `output_attentions` from the top-level model config won't work
def test_retain_grad_hidden_states_attentions(self):
config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common()
@@ -1327,9 +1418,7 @@ def _get_input_ids_and_config(self, batch_size=2):
input_ids = input_ids[:batch_size, :]
attention_mask = torch.ones((batch_size, sequence_length), dtype=torch.long)
- # generate max 3 tokens
- max_length = 3
- return config, input_ids, attention_mask, max_length
+ return config, input_ids, attention_mask
# override since the `input_ids` cannot be used as the `decoder_input_ids` for musicgen (input / outputs are
# different modalities -> different shapes)
@@ -1338,29 +1427,22 @@ def _greedy_generate(
model,
input_ids,
attention_mask,
- max_length,
output_scores=False,
output_attentions=False,
output_hidden_states=False,
return_dict_in_generate=False,
):
- logits_process_kwargs, _ = self._get_logits_processor_and_warper_kwargs(
- input_ids.shape[-1],
- max_length=max_length,
- )
-
model_kwargs = {"attention_mask": attention_mask} if attention_mask is not None else {}
output_generate = model.generate(
input_ids,
do_sample=False,
num_beams=1,
- max_length=max_length,
+ max_new_tokens=self.max_new_tokens,
output_attentions=output_attentions,
output_hidden_states=output_hidden_states,
output_scores=output_scores,
return_dict_in_generate=return_dict_in_generate,
remove_invalid_values=True,
- **logits_process_kwargs,
**model_kwargs,
)
@@ -1373,10 +1455,7 @@ def _sample_generate(
model,
input_ids,
attention_mask,
- max_length,
num_return_sequences,
- logits_warper_kwargs,
- process_kwargs,
output_scores=False,
output_attentions=False,
output_hidden_states=False,
@@ -1388,15 +1467,13 @@ def _sample_generate(
input_ids,
do_sample=True,
num_beams=1,
- max_length=max_length,
+ max_new_tokens=self.max_new_tokens,
num_return_sequences=num_return_sequences,
output_scores=output_scores,
output_attentions=output_attentions,
output_hidden_states=output_hidden_states,
return_dict_in_generate=return_dict_in_generate,
remove_invalid_values=True,
- **logits_warper_kwargs,
- **process_kwargs,
**model_kwargs,
)
@@ -1407,25 +1484,21 @@ def _get_logits_processor_and_warper_kwargs(
input_length,
forced_bos_token_id=None,
forced_eos_token_id=None,
- max_length=None,
):
- process_kwargs = {
- "min_length": input_length + 1 if max_length is None else max_length - 1,
- }
+ process_kwargs = {}
warper_kwargs = {}
return process_kwargs, warper_kwargs
def test_greedy_generate_dict_outputs(self):
for model_class in self.greedy_sample_model_classes:
# disable cache
- config, input_ids, attention_mask, max_length = self._get_input_ids_and_config()
+ config, input_ids, attention_mask = self._get_input_ids_and_config()
config.use_cache = False
model = model_class(config).to(torch_device).eval()
output_generate = self._greedy_generate(
model=model,
input_ids=input_ids.to(torch_device),
attention_mask=attention_mask.to(torch_device),
- max_length=max_length,
output_scores=True,
output_hidden_states=True,
output_attentions=True,
@@ -1439,7 +1512,7 @@ def test_greedy_generate_dict_outputs(self):
def test_greedy_generate_dict_outputs_use_cache(self):
for model_class in self.greedy_sample_model_classes:
# enable cache
- config, input_ids, attention_mask, max_length = self._get_input_ids_and_config()
+ config, input_ids, attention_mask = self._get_input_ids_and_config()
config.use_cache = True
config.is_decoder = True
@@ -1448,7 +1521,6 @@ def test_greedy_generate_dict_outputs_use_cache(self):
model=model,
input_ids=input_ids.to(torch_device),
attention_mask=attention_mask.to(torch_device),
- max_length=max_length,
output_scores=True,
output_hidden_states=True,
output_attentions=True,
@@ -1459,46 +1531,30 @@ def test_greedy_generate_dict_outputs_use_cache(self):
def test_sample_generate(self):
for model_class in self.greedy_sample_model_classes:
- config, input_ids, attention_mask, max_length = self._get_input_ids_and_config()
+ config, input_ids, attention_mask = self._get_input_ids_and_config()
model = model_class(config).to(torch_device).eval()
- process_kwargs, logits_warper_kwargs = self._get_logits_processor_and_warper_kwargs(
- input_ids.shape[-1],
- max_length=max_length,
- )
-
# check `generate()` and `sample()` are equal
output_generate = self._sample_generate(
model=model,
input_ids=input_ids.to(torch_device),
attention_mask=attention_mask.to(torch_device),
- max_length=max_length,
num_return_sequences=1,
- logits_warper_kwargs=logits_warper_kwargs,
- process_kwargs=process_kwargs,
)
self.assertIsInstance(output_generate, torch.Tensor)
def test_sample_generate_dict_output(self):
for model_class in self.greedy_sample_model_classes:
# disable cache
- config, input_ids, attention_mask, max_length = self._get_input_ids_and_config()
+ config, input_ids, attention_mask = self._get_input_ids_and_config()
config.use_cache = False
model = model_class(config).to(torch_device).eval()
- process_kwargs, logits_warper_kwargs = self._get_logits_processor_and_warper_kwargs(
- input_ids.shape[-1],
- max_length=max_length,
- )
-
output_generate = self._sample_generate(
model=model,
input_ids=input_ids.to(torch_device),
attention_mask=attention_mask.to(torch_device),
- max_length=max_length,
num_return_sequences=3,
- logits_warper_kwargs=logits_warper_kwargs,
- process_kwargs=process_kwargs,
output_scores=True,
output_hidden_states=True,
output_attentions=True,
@@ -1508,7 +1564,7 @@ def test_sample_generate_dict_output(self):
self.assertIsInstance(output_generate, GenerateEncoderDecoderOutput)
def test_generate_without_input_ids(self):
- config, _, _, max_length = self._get_input_ids_and_config()
+ config, _, _ = self._get_input_ids_and_config()
# if no bos token id => cannot generate from None
if config.bos_token_id is None:
@@ -1518,10 +1574,13 @@ def test_generate_without_input_ids(self):
model = model_class(config).to(torch_device)
model.eval()
- output_ids_generate = model.generate(do_sample=False, max_length=max_length, remove_invalid_values=True)
+ output_ids_generate = model.generate(
+ do_sample=False, max_new_tokens=self.max_new_tokens, remove_invalid_values=True
+ )
self.assertIsNotNone(output_ids_generate)
@require_torch_fp16
+ @require_torch_accelerator # not all operations are supported in fp16 on CPU
def test_generate_fp16(self):
config, input_dict = self.model_tester.prepare_config_and_inputs()
@@ -1537,7 +1596,7 @@ def test_generate_fp16(self):
def test_greedy_generate_stereo_outputs(self):
for model_class in self.greedy_sample_model_classes:
- config, input_ids, attention_mask, max_length = self._get_input_ids_and_config()
+ config, input_ids, attention_mask = self._get_input_ids_and_config()
config.audio_channels = 2
model = model_class(config).to(torch_device).eval()
@@ -1545,7 +1604,6 @@ def test_greedy_generate_stereo_outputs(self):
model=model,
input_ids=input_ids.to(torch_device),
attention_mask=attention_mask.to(torch_device),
- max_length=max_length,
output_scores=True,
output_hidden_states=True,
output_attentions=True,
@@ -1556,6 +1614,10 @@ def test_greedy_generate_stereo_outputs(self):
self.assertNotIn(config.pad_token_id, output_generate)
+ @unittest.skip("MusicgenModel is actually not the base of MusicgenForCausalLM as the latter is a composit model")
+ def test_save_load_fast_init_from_base(self):
+ pass
+
@require_flash_attn
@require_torch_gpu
@mark.flash_attn_test
@@ -2189,6 +2251,27 @@ def test_eager_matches_sdpa_generate(self):
self.assertTrue(torch.allclose(res_eager, res_sdpa))
+ def test_requires_grad_with_frozen_encoders(self):
+ config = self.model_tester.get_config()
+ for model_class in self.all_model_classes:
+ model = model_class(config)
+ model.freeze_audio_encoder()
+
+ audio_encoder_grads = [param.requires_grad for param in model.audio_encoder.parameters()]
+ text_encoder_grads = [param.requires_grad for param in model.text_encoder.parameters()]
+
+ self.assertFalse(all(audio_encoder_grads))
+ self.assertTrue(all(text_encoder_grads))
+
+ model = model_class(config)
+ model.freeze_text_encoder()
+
+ audio_encoder_grads = [param.requires_grad for param in model.audio_encoder.parameters()]
+ text_encoder_grads = [param.requires_grad for param in model.text_encoder.parameters()]
+
+ self.assertTrue(all(audio_encoder_grads))
+ self.assertFalse(all(text_encoder_grads))
+
def get_bip_bip(bip_duration=0.125, duration=0.5, sample_rate=32000):
"""Produces a series of 'bip bip' sounds at a given frequency."""
diff --git a/tests/models/musicgen_melody/test_modeling_musicgen_melody.py b/tests/models/musicgen_melody/test_modeling_musicgen_melody.py
index 667958a2513b..b32b50825846 100644
--- a/tests/models/musicgen_melody/test_modeling_musicgen_melody.py
+++ b/tests/models/musicgen_melody/test_modeling_musicgen_melody.py
@@ -35,6 +35,7 @@
is_torchaudio_available,
require_flash_attn,
require_torch,
+ require_torch_accelerator,
require_torch_fp16,
require_torch_gpu,
require_torch_sdpa,
@@ -108,8 +109,7 @@ def __init__(
parent,
batch_size=3, # need batch_size != num_hidden_layers because of #29297
seq_length=7,
- is_training=False,
- use_labels=False,
+ is_training=True,
vocab_size=99,
hidden_size=16,
num_hidden_layers=2,
@@ -128,7 +128,6 @@ def __init__(
self.batch_size = batch_size
self.seq_length = seq_length
self.is_training = is_training
- self.use_labels = use_labels
self.vocab_size = vocab_size
self.hidden_size = hidden_size
self.num_hidden_layers = num_hidden_layers
@@ -150,7 +149,9 @@ def prepare_config_and_inputs(self):
config = self.get_config()
inputs_dict = prepare_musicgen_melody_decoder_inputs_dict(
- config, input_ids, encoder_hidden_states=encoder_hidden_states
+ config,
+ input_ids,
+ encoder_hidden_states=encoder_hidden_states,
)
return config, inputs_dict
@@ -190,6 +191,47 @@ def setUp(self):
def test_config(self):
self.config_tester.run_common_tests()
+ # special case for labels
+ # Copied from tests.models.musicgen.test_modeling_musicgen.MusicgenDecoderTest._prepare_for_class
+ def _prepare_for_class(self, inputs_dict, model_class, return_labels=False):
+ inputs_dict = super()._prepare_for_class(inputs_dict, model_class, return_labels=return_labels)
+
+ if return_labels:
+ inputs_dict["labels"] = torch.zeros(
+ (self.model_tester.batch_size, self.model_tester.seq_length, self.model_tester.num_codebooks),
+ dtype=torch.long,
+ device=torch_device,
+ )
+ return inputs_dict
+
+ # Copied from tests.models.musicgen.test_modeling_musicgen.MusicgenDecoderTest.check_training_gradient_checkpointing with Musicgen->MusicgenMelody
+ def check_training_gradient_checkpointing(self, gradient_checkpointing_kwargs=None):
+ if not self.model_tester.is_training:
+ return
+
+ config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common()
+ config.use_cache = False
+ config.return_dict = True
+ model = MusicgenMelodyForCausalLM(config)
+
+ model.to(torch_device)
+ model.gradient_checkpointing_enable(gradient_checkpointing_kwargs=gradient_checkpointing_kwargs)
+ model.train()
+
+ # Contrarily to the initial method, we don't unfreeze freezed parameters.
+ # Indeed, sinusoidal position embeddings have frozen weights that should stay frozen.
+
+ optimizer = torch.optim.SGD(model.parameters(), lr=0.01)
+
+ inputs = self._prepare_for_class(inputs_dict, MusicgenMelodyForCausalLM, return_labels=True)
+ loss = model(**inputs).loss
+ loss.backward()
+ optimizer.step()
+
+ for k, v in model.named_parameters():
+ if v.requires_grad:
+ self.assertTrue(v.grad is not None, f"{k} in {MusicgenMelodyForCausalLM.__name__} has no gradient!")
+
# override since we have to compute the input embeddings over codebooks
def test_inputs_embeds(self):
config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common()
@@ -226,6 +268,10 @@ def test_model_common_attributes(self):
lm_heads = model.get_output_embeddings()
self.assertTrue(lm_heads is None or isinstance(lm_heads[0], torch.nn.Linear))
+ @unittest.skip(reason="MusicGen melody does not use inputs_embeds")
+ def test_inputs_embeds_matches_input_ids(self):
+ pass
+
@unittest.skip("this model doesn't support all arguments tested")
def test_model_outputs_equivalence(self):
pass
@@ -246,34 +292,28 @@ def _get_input_ids_and_config(self, batch_size=2):
sequence_length = input_ids.shape[-1]
input_ids = input_ids[: batch_size * config.num_codebooks, :]
- # generate max 3 tokens
- max_length = input_ids.shape[-1] + 3
attention_mask = torch.ones((batch_size, sequence_length), dtype=torch.long)
- return config, input_ids, attention_mask, max_length
+ return config, input_ids, attention_mask
@staticmethod
def _get_logits_processor_and_warper_kwargs(
input_length,
forced_bos_token_id=None,
forced_eos_token_id=None,
- max_length=None,
):
- process_kwargs = {
- "min_length": input_length + 1 if max_length is None else max_length - 1,
- }
+ process_kwargs = {}
warper_kwargs = {}
return process_kwargs, warper_kwargs
def test_greedy_generate_stereo_outputs(self):
for model_class in self.greedy_sample_model_classes:
- config, input_ids, attention_mask, max_length = self._get_input_ids_and_config()
+ config, input_ids, attention_mask = self._get_input_ids_and_config()
config.audio_channels = 2
model = model_class(config).to(torch_device).eval()
output_generate = self._greedy_generate(
model=model,
input_ids=input_ids.to(torch_device),
attention_mask=attention_mask.to(torch_device),
- max_length=max_length,
output_scores=True,
output_hidden_states=True,
output_attentions=True,
@@ -901,6 +941,7 @@ def prepare_musicgen_melody_inputs_dict(
decoder_attention_mask=None,
head_mask=None,
decoder_head_mask=None,
+ labels=None,
):
if decoder_attention_mask is None:
decoder_attention_mask = decoder_input_ids.reshape(
@@ -922,6 +963,7 @@ def prepare_musicgen_melody_inputs_dict(
"decoder_attention_mask": decoder_attention_mask,
"head_mask": head_mask,
"decoder_head_mask": decoder_head_mask,
+ "labels": labels,
}
@@ -931,8 +973,7 @@ def __init__(
parent,
batch_size=3, # need batch_size != num_hidden_layers because of #29297
seq_length=7,
- is_training=False,
- use_labels=False,
+ is_training=True,
vocab_size=99,
hidden_size=16,
num_hidden_layers=2,
@@ -954,7 +995,6 @@ def __init__(
self.batch_size = batch_size
self.seq_length = seq_length
self.is_training = is_training
- self.use_labels = use_labels
self.vocab_size = vocab_size
self.hidden_size = hidden_size
self.num_hidden_layers = num_hidden_layers
@@ -1034,6 +1074,47 @@ class MusicgenMelodyTest(ModelTesterMixin, GenerationTesterMixin, PipelineTester
def setUp(self):
self.model_tester = MusicgenMelodyTester(self)
+ # special case for labels
+ def _prepare_for_class(self, inputs_dict, model_class, return_labels=False):
+ inputs_dict = super()._prepare_for_class(inputs_dict, model_class, return_labels=return_labels)
+
+ if return_labels:
+ inputs_dict["labels"] = torch.zeros(
+ (self.model_tester.batch_size, self.model_tester.seq_length, self.model_tester.num_codebooks),
+ dtype=torch.long,
+ device=torch_device,
+ )
+ return inputs_dict
+
+ def check_training_gradient_checkpointing(self, gradient_checkpointing_kwargs=None):
+ if not self.model_tester.is_training:
+ return
+
+ for model_class in self.all_model_classes:
+ config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common()
+ config.use_cache = False
+ config.return_dict = True
+ model = model_class(config)
+
+ model.to(torch_device)
+ model.gradient_checkpointing_enable(gradient_checkpointing_kwargs=gradient_checkpointing_kwargs)
+ model.train()
+
+ # The audio encoder weights are not used during the forward pass (only during the generate pass)
+ # So we need to freeze it to be able to train.
+ model.freeze_audio_encoder()
+
+ optimizer = torch.optim.SGD(model.parameters(), lr=0.01)
+
+ inputs = self._prepare_for_class(inputs_dict, model_class, return_labels=True)
+ loss = model(**inputs).loss
+ loss.backward()
+ optimizer.step()
+
+ for k, v in model.named_parameters():
+ if v.requires_grad:
+ self.assertTrue(v.grad is not None, f"{k} in {model_class.__name__} has no gradient!")
+
# Ignore copy
def _check_output_with_attentions(self, outputs, config, input_ids, decoder_input_ids):
decoder_config = config.decoder
@@ -1177,6 +1258,18 @@ def test_tied_model_weights_key_ignore(self):
def test_tied_weights_keys(self):
pass
+ @unittest.skip("No support for low_cpu_mem_usage=True.")
+ def test_save_load_low_cpu_mem_usage(self):
+ pass
+
+ @unittest.skip("No support for low_cpu_mem_usage=True.")
+ def test_save_load_low_cpu_mem_usage_checkpoints(self):
+ pass
+
+ @unittest.skip("No support for low_cpu_mem_usage=True.")
+ def test_save_load_low_cpu_mem_usage_no_safetensors(self):
+ pass
+
# override since changing `output_hidden_states` / `output_attentions` from the top-level model config won't work
# Ignore copy
def test_retain_grad_hidden_states_attentions(self):
@@ -1309,9 +1402,7 @@ def _get_input_ids_and_config(self, batch_size=2):
input_ids = input_ids[:batch_size, :]
attention_mask = torch.ones((batch_size, sequence_length), dtype=torch.long)
- # generate max 3 tokens
- max_length = 3
- return config, input_ids, attention_mask, max_length
+ return config, input_ids, attention_mask
# override since the `input_ids` cannot be used as the `decoder_input_ids` for musicgen_melody (input / outputs are
# different modalities -> different shapes)
@@ -1320,29 +1411,22 @@ def _greedy_generate(
model,
input_ids,
attention_mask,
- max_length,
output_scores=False,
output_attentions=False,
output_hidden_states=False,
return_dict_in_generate=False,
):
- logits_process_kwargs, _ = self._get_logits_processor_and_warper_kwargs(
- input_ids.shape[-1],
- max_length=max_length,
- )
-
model_kwargs = {"attention_mask": attention_mask} if attention_mask is not None else {}
output_generate = model.generate(
input_ids,
do_sample=False,
num_beams=1,
- max_length=max_length,
+ max_new_tokens=self.max_new_tokens,
output_attentions=output_attentions,
output_hidden_states=output_hidden_states,
output_scores=output_scores,
return_dict_in_generate=return_dict_in_generate,
remove_invalid_values=True,
- **logits_process_kwargs,
**model_kwargs,
)
@@ -1355,10 +1439,7 @@ def _sample_generate(
model,
input_ids,
attention_mask,
- max_length,
num_return_sequences,
- logits_warper_kwargs,
- process_kwargs,
output_scores=False,
output_attentions=False,
output_hidden_states=False,
@@ -1370,15 +1451,13 @@ def _sample_generate(
input_ids,
do_sample=True,
num_beams=1,
- max_length=max_length,
+ max_new_tokens=self.max_new_tokens,
num_return_sequences=num_return_sequences,
output_scores=output_scores,
output_attentions=output_attentions,
output_hidden_states=output_hidden_states,
return_dict_in_generate=return_dict_in_generate,
remove_invalid_values=True,
- **logits_warper_kwargs,
- **process_kwargs,
**model_kwargs,
)
@@ -1389,25 +1468,21 @@ def _get_logits_processor_and_warper_kwargs(
input_length,
forced_bos_token_id=None,
forced_eos_token_id=None,
- max_length=None,
):
- process_kwargs = {
- "min_length": input_length + 1 if max_length is None else max_length - 1,
- }
+ process_kwargs = {}
warper_kwargs = {}
return process_kwargs, warper_kwargs
def test_greedy_generate_dict_outputs(self):
for model_class in self.greedy_sample_model_classes:
# disable cache
- config, input_ids, attention_mask, max_length = self._get_input_ids_and_config()
+ config, input_ids, attention_mask = self._get_input_ids_and_config()
config.use_cache = False
model = model_class(config).to(torch_device).eval()
output_generate = self._greedy_generate(
model=model,
input_ids=input_ids.to(torch_device),
attention_mask=attention_mask.to(torch_device),
- max_length=max_length,
output_scores=True,
output_hidden_states=True,
output_attentions=True,
@@ -1421,7 +1496,7 @@ def test_greedy_generate_dict_outputs(self):
def test_greedy_generate_dict_outputs_use_cache(self):
for model_class in self.greedy_sample_model_classes:
# enable cache
- config, input_ids, attention_mask, max_length = self._get_input_ids_and_config()
+ config, input_ids, attention_mask = self._get_input_ids_and_config()
config.use_cache = True
config.is_decoder = True
@@ -1430,7 +1505,6 @@ def test_greedy_generate_dict_outputs_use_cache(self):
model=model,
input_ids=input_ids.to(torch_device),
attention_mask=attention_mask.to(torch_device),
- max_length=max_length,
output_scores=True,
output_hidden_states=True,
output_attentions=True,
@@ -1441,46 +1515,30 @@ def test_greedy_generate_dict_outputs_use_cache(self):
def test_sample_generate(self):
for model_class in self.greedy_sample_model_classes:
- config, input_ids, attention_mask, max_length = self._get_input_ids_and_config()
+ config, input_ids, attention_mask = self._get_input_ids_and_config()
model = model_class(config).to(torch_device).eval()
- process_kwargs, logits_warper_kwargs = self._get_logits_processor_and_warper_kwargs(
- input_ids.shape[-1],
- max_length=max_length,
- )
-
# check `generate()` and `sample()` are equal
output_generate = self._sample_generate(
model=model,
input_ids=input_ids.to(torch_device),
attention_mask=attention_mask.to(torch_device),
- max_length=max_length,
num_return_sequences=1,
- logits_warper_kwargs=logits_warper_kwargs,
- process_kwargs=process_kwargs,
)
self.assertIsInstance(output_generate, torch.Tensor)
def test_sample_generate_dict_output(self):
for model_class in self.greedy_sample_model_classes:
# disable cache
- config, input_ids, attention_mask, max_length = self._get_input_ids_and_config()
+ config, input_ids, attention_mask = self._get_input_ids_and_config()
config.use_cache = False
model = model_class(config).to(torch_device).eval()
- process_kwargs, logits_warper_kwargs = self._get_logits_processor_and_warper_kwargs(
- input_ids.shape[-1],
- max_length=max_length,
- )
-
output_generate = self._sample_generate(
model=model,
input_ids=input_ids.to(torch_device),
attention_mask=attention_mask.to(torch_device),
- max_length=max_length,
num_return_sequences=3,
- logits_warper_kwargs=logits_warper_kwargs,
- process_kwargs=process_kwargs,
output_scores=True,
output_hidden_states=True,
output_attentions=True,
@@ -1490,7 +1548,7 @@ def test_sample_generate_dict_output(self):
self.assertIsInstance(output_generate, GenerateDecoderOnlyOutput)
def test_generate_without_input_ids(self):
- config, _, _, max_length = self._get_input_ids_and_config()
+ config, _, _ = self._get_input_ids_and_config()
# if no bos token id => cannot generate from None
if config.bos_token_id is None:
@@ -1500,10 +1558,13 @@ def test_generate_without_input_ids(self):
model = model_class(config).to(torch_device)
model.eval()
- output_ids_generate = model.generate(do_sample=False, max_length=max_length, remove_invalid_values=True)
+ output_ids_generate = model.generate(
+ do_sample=False, max_new_tokens=self.max_new_tokens, remove_invalid_values=True
+ )
self.assertIsNotNone(output_ids_generate)
@require_torch_fp16
+ @require_torch_accelerator # not all operations are supported in fp16 on CPU
def test_generate_fp16(self):
config, input_dict = self.model_tester.prepare_config_and_inputs()
@@ -1519,7 +1580,7 @@ def test_generate_fp16(self):
def test_greedy_generate_stereo_outputs(self):
for model_class in self.greedy_sample_model_classes:
- config, input_ids, attention_mask, max_length = self._get_input_ids_and_config()
+ config, input_ids, attention_mask = self._get_input_ids_and_config()
config.audio_channels = 2
model = model_class(config).to(torch_device).eval()
@@ -1527,7 +1588,6 @@ def test_greedy_generate_stereo_outputs(self):
model=model,
input_ids=input_ids.to(torch_device),
attention_mask=attention_mask.to(torch_device),
- max_length=max_length,
output_scores=True,
output_hidden_states=True,
output_attentions=True,
@@ -1538,6 +1598,12 @@ def test_greedy_generate_stereo_outputs(self):
self.assertNotIn(config.pad_token_id, output_generate)
+ @unittest.skip(
+ "MusicgenMelodyModel is actually not the base of MusicgenMelodyForCausalLM as the latter is a composit model"
+ )
+ def test_save_load_fast_init_from_base(self):
+ pass
+
@require_flash_attn
@require_torch_gpu
@mark.flash_attn_test
@@ -2171,6 +2237,27 @@ def test_eager_matches_sdpa_generate(self):
self.assertTrue(torch.allclose(res_eager, res_sdpa))
+ def test_requires_grad_with_frozen_encoders(self):
+ config = self.model_tester.get_config()
+ for model_class in self.all_model_classes:
+ model = model_class(config)
+ model.freeze_audio_encoder()
+
+ audio_encoder_grads = [param.requires_grad for param in model.audio_encoder.parameters()]
+ text_encoder_grads = [param.requires_grad for param in model.text_encoder.parameters()]
+
+ self.assertFalse(all(audio_encoder_grads))
+ self.assertTrue(all(text_encoder_grads))
+
+ model = model_class(config)
+ model.freeze_text_encoder()
+
+ audio_encoder_grads = [param.requires_grad for param in model.audio_encoder.parameters()]
+ text_encoder_grads = [param.requires_grad for param in model.text_encoder.parameters()]
+
+ self.assertTrue(all(audio_encoder_grads))
+ self.assertFalse(all(text_encoder_grads))
+
# Copied from tests.models.musicgen.test_modeling_musicgen.get_bip_bip
def get_bip_bip(bip_duration=0.125, duration=0.5, sample_rate=32000):
diff --git a/tests/models/olmo/test_modeling_olmo.py b/tests/models/olmo/test_modeling_olmo.py
index ce354db52b29..906bd73a70d2 100644
--- a/tests/models/olmo/test_modeling_olmo.py
+++ b/tests/models/olmo/test_modeling_olmo.py
@@ -353,11 +353,6 @@ def test_model_rope_scaling(self, scaling_type):
# The output should be different for long inputs
self.assertFalse(torch.allclose(original_long_output, scaled_long_output, atol=1e-5))
- @unittest.skip("TODO @gante fix this for OLMo")
- @parameterized.expand([(1, False), (1, True), (4, False)])
- def test_new_cache_format(self, num_beams, do_sample):
- pass
-
@require_torch
class OlmoIntegrationTest(unittest.TestCase):
diff --git a/tests/models/owlv2/test_image_processor_owlv2.py b/tests/models/owlv2/test_image_processor_owlv2.py
index 62fd25329055..16b6b24df3bf 100644
--- a/tests/models/owlv2/test_image_processor_owlv2.py
+++ b/tests/models/owlv2/test_image_processor_owlv2.py
@@ -17,7 +17,7 @@
import unittest
from transformers.testing_utils import require_torch, require_vision, slow
-from transformers.utils import is_vision_available
+from transformers.utils import is_torch_available, is_vision_available
from ...test_image_processing_common import ImageProcessingTestMixin, prepare_image_inputs
@@ -25,7 +25,10 @@
if is_vision_available():
from PIL import Image
- from transformers import Owlv2ImageProcessor
+ from transformers import AutoProcessor, Owlv2ForObjectDetection, Owlv2ImageProcessor
+
+if is_torch_available():
+ import torch
class Owlv2ImageProcessingTester(unittest.TestCase):
@@ -120,6 +123,25 @@ def test_image_processor_integration_test(self):
mean_value = round(pixel_values.mean().item(), 4)
self.assertEqual(mean_value, 0.2353)
+ @slow
+ def test_image_processor_integration_test_resize(self):
+ checkpoint = "google/owlv2-base-patch16-ensemble"
+ processor = AutoProcessor.from_pretrained(checkpoint)
+ model = Owlv2ForObjectDetection.from_pretrained(checkpoint)
+
+ image = Image.open("./tests/fixtures/tests_samples/COCO/000000039769.png")
+ inputs = processor(text=["cat"], images=image, return_tensors="pt")
+
+ with torch.no_grad():
+ outputs = model(**inputs)
+
+ target_sizes = torch.tensor([image.size[::-1]])
+ results = processor.post_process_object_detection(outputs, threshold=0.2, target_sizes=target_sizes)[0]
+
+ boxes = results["boxes"].tolist()
+ self.assertEqual(boxes[0], [341.66656494140625, 23.38756561279297, 642.321044921875, 371.3482971191406])
+ self.assertEqual(boxes[1], [6.753320693969727, 51.96149826049805, 326.61810302734375, 473.12982177734375])
+
@unittest.skip("OWLv2 doesn't treat 4 channel PIL and numpy consistently yet") # FIXME Amy
def test_call_numpy_4_channels(self):
pass
diff --git a/tests/models/phi3/__init__.py b/tests/models/phi3/__init__.py
new file mode 100644
index 000000000000..e69de29bb2d1
diff --git a/tests/models/phi3/test_modeling_phi3.py b/tests/models/phi3/test_modeling_phi3.py
new file mode 100644
index 000000000000..cc0c00d4e1ea
--- /dev/null
+++ b/tests/models/phi3/test_modeling_phi3.py
@@ -0,0 +1,474 @@
+# coding=utf-8
+# Copyright 2024 Microsoft and the HuggingFace Inc. team. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+""" Testing suite for the PyTorch Phi-3 model. """
+
+
+import unittest
+
+from parameterized import parameterized
+
+from transformers import Phi3Config, is_torch_available, set_seed
+from transformers.testing_utils import (
+ require_torch,
+ slow,
+ torch_device,
+)
+
+from ...generation.test_utils import GenerationTesterMixin
+from ...test_configuration_common import ConfigTester
+from ...test_modeling_common import ModelTesterMixin, ids_tensor
+from ...test_pipeline_mixin import PipelineTesterMixin
+
+
+if is_torch_available():
+ import torch
+
+ from transformers import (
+ AutoTokenizer,
+ Phi3ForCausalLM,
+ Phi3ForSequenceClassification,
+ Phi3ForTokenClassification,
+ Phi3Model,
+ )
+
+
+class Phi3ModelTester:
+ def __init__(
+ self,
+ parent,
+ batch_size=13,
+ seq_length=7,
+ is_training=True,
+ use_input_mask=True,
+ use_token_type_ids=False,
+ use_labels=True,
+ vocab_size=99,
+ hidden_size=32,
+ num_hidden_layers=2,
+ num_attention_heads=4,
+ intermediate_size=37,
+ hidden_act="gelu",
+ hidden_dropout_prob=0.1,
+ attention_probs_dropout_prob=0.1,
+ max_position_embeddings=512,
+ type_vocab_size=16,
+ type_sequence_label_size=2,
+ initializer_range=0.02,
+ num_labels=3,
+ num_choices=4,
+ pad_token_id=0,
+ scope=None,
+ ):
+ self.parent = parent
+ self.batch_size = batch_size
+ self.seq_length = seq_length
+ self.is_training = is_training
+ self.use_input_mask = use_input_mask
+ self.use_token_type_ids = use_token_type_ids
+ self.use_labels = use_labels
+ self.vocab_size = vocab_size
+ self.hidden_size = hidden_size
+ self.num_hidden_layers = num_hidden_layers
+ self.num_attention_heads = num_attention_heads
+ self.intermediate_size = intermediate_size
+ self.hidden_act = hidden_act
+ self.hidden_dropout_prob = hidden_dropout_prob
+ self.attention_probs_dropout_prob = attention_probs_dropout_prob
+ self.max_position_embeddings = max_position_embeddings
+ self.type_vocab_size = type_vocab_size
+ self.type_sequence_label_size = type_sequence_label_size
+ self.initializer_range = initializer_range
+ self.num_labels = num_labels
+ self.num_choices = num_choices
+ self.pad_token_id = pad_token_id
+ self.scope = scope
+
+ # Copied from tests.models.llama.test_modeling_llama.LlamaModelTester.prepare_config_and_inputs
+ def prepare_config_and_inputs(self):
+ input_ids = ids_tensor([self.batch_size, self.seq_length], self.vocab_size)
+
+ input_mask = None
+ if self.use_input_mask:
+ input_mask = torch.tril(torch.ones(self.batch_size, self.seq_length)).to(torch_device)
+
+ token_type_ids = None
+ if self.use_token_type_ids:
+ token_type_ids = ids_tensor([self.batch_size, self.seq_length], self.type_vocab_size)
+
+ sequence_labels = None
+ token_labels = None
+ choice_labels = None
+ if self.use_labels:
+ sequence_labels = ids_tensor([self.batch_size], self.type_sequence_label_size)
+ token_labels = ids_tensor([self.batch_size, self.seq_length], self.num_labels)
+ choice_labels = ids_tensor([self.batch_size], self.num_choices)
+
+ config = self.get_config()
+
+ return config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels
+
+ def get_config(self):
+ return Phi3Config(
+ vocab_size=self.vocab_size,
+ hidden_size=self.hidden_size,
+ num_hidden_layers=self.num_hidden_layers,
+ num_attention_heads=self.num_attention_heads,
+ intermediate_size=self.intermediate_size,
+ hidden_act=self.hidden_act,
+ hidden_dropout_prob=self.hidden_dropout_prob,
+ attention_probs_dropout_prob=self.attention_probs_dropout_prob,
+ max_position_embeddings=self.max_position_embeddings,
+ type_vocab_size=self.type_vocab_size,
+ is_decoder=False,
+ initializer_range=self.initializer_range,
+ pad_token_id=self.pad_token_id,
+ )
+
+ # Copied from tests.models.llama.test_modeling_llama.LlamaModelTester.create_and_check_model with Llama->Phi3
+ def create_and_check_model(
+ self, config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels
+ ):
+ model = Phi3Model(config=config)
+ model.to(torch_device)
+ model.eval()
+ result = model(input_ids, attention_mask=input_mask)
+ result = model(input_ids)
+ self.parent.assertEqual(result.last_hidden_state.shape, (self.batch_size, self.seq_length, self.hidden_size))
+
+ # Copied from tests.models.llama.test_modeling_llama.LlamaModelTester.create_and_check_model_as_decoder with Llama->Phi3
+ def create_and_check_model_as_decoder(
+ self,
+ config,
+ input_ids,
+ token_type_ids,
+ input_mask,
+ sequence_labels,
+ token_labels,
+ choice_labels,
+ encoder_hidden_states,
+ encoder_attention_mask,
+ ):
+ config.add_cross_attention = True
+ model = Phi3Model(config)
+ model.to(torch_device)
+ model.eval()
+ result = model(
+ input_ids,
+ attention_mask=input_mask,
+ encoder_hidden_states=encoder_hidden_states,
+ encoder_attention_mask=encoder_attention_mask,
+ )
+ result = model(
+ input_ids,
+ attention_mask=input_mask,
+ encoder_hidden_states=encoder_hidden_states,
+ )
+ result = model(input_ids, attention_mask=input_mask)
+ self.parent.assertEqual(result.last_hidden_state.shape, (self.batch_size, self.seq_length, self.hidden_size))
+
+ # Copied from tests.models.llama.test_modeling_llama.LlamaModelTester.create_and_check_for_causal_lm with Llama->Phi3
+ def create_and_check_for_causal_lm(
+ self,
+ config,
+ input_ids,
+ token_type_ids,
+ input_mask,
+ sequence_labels,
+ token_labels,
+ choice_labels,
+ encoder_hidden_states,
+ encoder_attention_mask,
+ ):
+ model = Phi3ForCausalLM(config=config)
+ model.to(torch_device)
+ model.eval()
+ result = model(input_ids, attention_mask=input_mask, labels=token_labels)
+ self.parent.assertEqual(result.logits.shape, (self.batch_size, self.seq_length, self.vocab_size))
+
+ # Copied from tests.models.llama.test_modeling_llama.LlamaModelTester.create_and_check_decoder_model_past_large_inputs with Llama->Phi3
+ def create_and_check_decoder_model_past_large_inputs(
+ self,
+ config,
+ input_ids,
+ token_type_ids,
+ input_mask,
+ sequence_labels,
+ token_labels,
+ choice_labels,
+ encoder_hidden_states,
+ encoder_attention_mask,
+ ):
+ config.is_decoder = True
+ config.add_cross_attention = True
+ model = Phi3ForCausalLM(config=config)
+ model.to(torch_device)
+ model.eval()
+
+ # first forward pass
+ outputs = model(
+ input_ids,
+ attention_mask=input_mask,
+ encoder_hidden_states=encoder_hidden_states,
+ encoder_attention_mask=encoder_attention_mask,
+ use_cache=True,
+ )
+ past_key_values = outputs.past_key_values
+
+ # create hypothetical multiple next token and extent to next_input_ids
+ next_tokens = ids_tensor((self.batch_size, 3), config.vocab_size)
+ next_mask = ids_tensor((self.batch_size, 3), vocab_size=2)
+
+ # append to next input_ids and
+ next_input_ids = torch.cat([input_ids, next_tokens], dim=-1)
+ next_attention_mask = torch.cat([input_mask, next_mask], dim=-1)
+
+ output_from_no_past = model(
+ next_input_ids,
+ attention_mask=next_attention_mask,
+ encoder_hidden_states=encoder_hidden_states,
+ encoder_attention_mask=encoder_attention_mask,
+ output_hidden_states=True,
+ )["hidden_states"][0]
+ output_from_past = model(
+ next_tokens,
+ attention_mask=next_attention_mask,
+ encoder_hidden_states=encoder_hidden_states,
+ encoder_attention_mask=encoder_attention_mask,
+ past_key_values=past_key_values,
+ output_hidden_states=True,
+ )["hidden_states"][0]
+
+ # select random slice
+ random_slice_idx = ids_tensor((1,), output_from_past.shape[-1]).item()
+ output_from_no_past_slice = output_from_no_past[:, -3:, random_slice_idx].detach()
+ output_from_past_slice = output_from_past[:, :, random_slice_idx].detach()
+
+ self.parent.assertTrue(output_from_past_slice.shape[1] == next_tokens.shape[1])
+
+ # test that outputs are equal for slice
+ self.parent.assertTrue(torch.allclose(output_from_past_slice, output_from_no_past_slice, atol=1e-3))
+
+ # Copied from tests.models.llama.test_modeling_llama.LlamaModelTester.prepare_config_and_inputs_for_common
+ def prepare_config_and_inputs_for_common(self):
+ config_and_inputs = self.prepare_config_and_inputs()
+ (
+ config,
+ input_ids,
+ token_type_ids,
+ input_mask,
+ sequence_labels,
+ token_labels,
+ choice_labels,
+ ) = config_and_inputs
+ inputs_dict = {"input_ids": input_ids, "attention_mask": input_mask}
+ return config, inputs_dict
+
+
+@require_torch
+class Phi3ModelTest(ModelTesterMixin, GenerationTesterMixin, PipelineTesterMixin, unittest.TestCase):
+ all_model_classes = (
+ (Phi3Model, Phi3ForCausalLM, Phi3ForSequenceClassification, Phi3ForTokenClassification)
+ if is_torch_available()
+ else ()
+ )
+ all_generative_model_classes = (Phi3ForCausalLM,) if is_torch_available() else ()
+ pipeline_model_mapping = (
+ {
+ "feature-extraction": Phi3Model,
+ "text-classification": Phi3ForSequenceClassification,
+ "text-generation": Phi3ForCausalLM,
+ "token-classification": Phi3ForTokenClassification,
+ "zero-shot": Phi3ForSequenceClassification,
+ }
+ if is_torch_available()
+ else {}
+ )
+
+ test_headmasking = False
+ test_pruning = False
+
+ # TODO (ydshieh): Check this. See https://app.circleci.com/pipelines/github/huggingface/transformers/79292/workflows/fa2ba644-8953-44a6-8f67-ccd69ca6a476/jobs/1012905
+ def is_pipeline_test_to_skip(
+ self, pipeline_test_casse_name, config_class, model_architecture, tokenizer_name, processor_name
+ ):
+ return True
+
+ # Copied from tests.models.llama.test_modeling_llama.LlamaModelTest.setUp with Llama->Phi3
+ def setUp(self):
+ self.model_tester = Phi3ModelTester(self)
+ self.config_tester = ConfigTester(self, config_class=Phi3Config, hidden_size=37)
+
+ # Copied from tests.models.llama.test_modeling_llama.LlamaModelTest.test_config
+ def test_config(self):
+ self.config_tester.run_common_tests()
+
+ # Copied from tests.models.llama.test_modeling_llama.LlamaModelTest.test_model
+ def test_model(self):
+ config_and_inputs = self.model_tester.prepare_config_and_inputs()
+ self.model_tester.create_and_check_model(*config_and_inputs)
+
+ # Copied from tests.models.llama.test_modeling_llama.LlamaModelTest.test_llama_sequence_classification_model with Llama->Phi3,llama->phi3
+ def test_phi3_sequence_classification_model(self):
+ config, input_dict = self.model_tester.prepare_config_and_inputs_for_common()
+ config.num_labels = 3
+ input_ids = input_dict["input_ids"]
+ attention_mask = input_ids.ne(1).to(torch_device)
+ sequence_labels = ids_tensor([self.model_tester.batch_size], self.model_tester.type_sequence_label_size)
+ model = Phi3ForSequenceClassification(config)
+ model.to(torch_device)
+ model.eval()
+ result = model(input_ids, attention_mask=attention_mask, labels=sequence_labels)
+ self.assertEqual(result.logits.shape, (self.model_tester.batch_size, self.model_tester.num_labels))
+
+ # Copied from tests.models.llama.test_modeling_llama.LlamaModelTest.test_llama_sequence_classification_model_for_single_label with Llama->Phi3,llama->phi3
+ def test_phi3_sequence_classification_model_for_single_label(self):
+ config, input_dict = self.model_tester.prepare_config_and_inputs_for_common()
+ config.num_labels = 3
+ config.problem_type = "single_label_classification"
+ input_ids = input_dict["input_ids"]
+ attention_mask = input_ids.ne(1).to(torch_device)
+ sequence_labels = ids_tensor([self.model_tester.batch_size], self.model_tester.type_sequence_label_size)
+ model = Phi3ForSequenceClassification(config)
+ model.to(torch_device)
+ model.eval()
+ result = model(input_ids, attention_mask=attention_mask, labels=sequence_labels)
+ self.assertEqual(result.logits.shape, (self.model_tester.batch_size, self.model_tester.num_labels))
+
+ # Copied from tests.models.llama.test_modeling_llama.LlamaModelTest.test_llama_sequence_classification_model_for_multi_label with Llama->Phi3,llama->phi3
+ def test_phi3_sequence_classification_model_for_multi_label(self):
+ config, input_dict = self.model_tester.prepare_config_and_inputs_for_common()
+ config.num_labels = 3
+ config.problem_type = "multi_label_classification"
+ input_ids = input_dict["input_ids"]
+ attention_mask = input_ids.ne(1).to(torch_device)
+ sequence_labels = ids_tensor(
+ [self.model_tester.batch_size, config.num_labels], self.model_tester.type_sequence_label_size
+ ).to(torch.float)
+ model = Phi3ForSequenceClassification(config)
+ model.to(torch_device)
+ model.eval()
+ result = model(input_ids, attention_mask=attention_mask, labels=sequence_labels)
+ self.assertEqual(result.logits.shape, (self.model_tester.batch_size, self.model_tester.num_labels))
+
+ @parameterized.expand([("su",), ("yarn",)])
+ def test_model_rope_scaling_from_config(self, scaling_type):
+ config, _ = self.model_tester.prepare_config_and_inputs_for_common()
+ short_input = ids_tensor([1, 10], config.vocab_size)
+ long_input = ids_tensor([1, int(config.max_position_embeddings * 1.5)], config.vocab_size)
+
+ set_seed(42) # Fixed seed at init time so the two models get the same random weights
+ original_model = Phi3Model(config)
+ original_model.to(torch_device)
+ original_model.eval()
+ original_short_output = original_model(short_input).last_hidden_state
+ original_long_output = original_model(long_input).last_hidden_state
+
+ set_seed(42) # Fixed seed at init time so the two models get the same random weights
+ n_factors = config.hidden_size // config.num_attention_heads // 2
+ config.rope_scaling = {
+ "type": scaling_type,
+ "short_factor": [5.0 for _ in range(n_factors)],
+ "long_factor": [5.0 for _ in range(n_factors)],
+ }
+ scaled_model = Phi3Model(config)
+ scaled_model.to(torch_device)
+ scaled_model.eval()
+ scaled_short_output = scaled_model(short_input).last_hidden_state
+ scaled_long_output = scaled_model(long_input).last_hidden_state
+
+ # Scaling changes the RoPE embeddings, both for the short and long outputs
+ self.assertFalse(torch.allclose(original_short_output, scaled_short_output, atol=1e-5))
+ self.assertFalse(torch.allclose(original_long_output, scaled_long_output, atol=1e-5))
+
+
+@slow
+@require_torch
+class Phi3IntegrationTest(unittest.TestCase):
+ def test_model_phi3_mini_4k_instruct_logits(self):
+ input_ids = {
+ "input_ids": torch.tensor(
+ [[1212, 318, 281, 1672, 2643, 290, 428, 318, 257, 1332]], dtype=torch.long, device=torch_device
+ )
+ }
+
+ model = Phi3ForCausalLM.from_pretrained("microsoft/phi-3-mini-4k-instruct").to(torch_device)
+ model.eval()
+
+ output = model(**input_ids).logits
+
+ EXPECTED_OUTPUT = torch.tensor([[ 0.9979, -1.9449, -2.5613, -2.2110, -0.9323, -2.2726, -3.2468, -2.0122,-1.0021, -1.2764, -1.0876, -1.2358, 3.9385, 6.2152, -0.3695, -2.3285,-1.2907, -1.8238, -1.9941, -2.2098, -0.6923, -1.6793, -1.1660, -2.0469,-0.7369, -1.4101, -1.4091, -3.1694, -1.8383, -1.1952],[ 3.0525, 1.9178, 3.7016, 0.9263, 0.3397, 1.9584, 2.1347, 0.3482, 1.3773, 0.2153, 0.2798, 0.8360, 9.0936, 11.4944, -0.3575, -0.9442,-0.1246, 1.3869, 0.9846, 1.7243, 0.9150, 1.0823, 0.4313, 1.5742, 0.2566, -0.1401, -1.3019, 0.4967, 0.6941, 0.7214]]).to(torch_device) # fmt: skip
+
+ self.assertTrue(torch.allclose(EXPECTED_OUTPUT, output[0, :2, :30], atol=1e-4, rtol=1e-4))
+
+ def test_phi3_mini_4k_instruct_generation(self):
+ model = Phi3ForCausalLM.from_pretrained("microsoft/phi-3-mini-4k-instruct")
+ tokenizer = AutoTokenizer.from_pretrained("microsoft/phi-3-mini-4k-instruct")
+
+ messages = [
+ {
+ "role": "system",
+ "content": "You are a helpful digital assistant. Please provide safe, ethical and accurate information to the user.",
+ },
+ {"role": "user", "content": "Can you provide ways to eat combinations of bananas and dragonfruits?"},
+ ]
+ inputs = tokenizer.apply_chat_template(messages, add_generation_prompt=True, return_tensors="pt")
+
+ outputs = model.generate(inputs, max_new_tokens=32)
+ output_text = tokenizer.batch_decode(outputs)
+
+ EXPECTED_OUTPUT = [
+ "<|system|> You are a helpful digital assistant. Please provide safe, ethical and accurate information to the user.<|end|><|user|> Can you provide ways to eat combinations of bananas and dragonfruits?<|end|><|assistant|> Absolutely! Bananas and dragonfruits are both delicious fruits that can be combined in various ways to create tasty and nutrit"
+ ]
+
+ self.assertListEqual(output_text, EXPECTED_OUTPUT)
+
+ def test_model_phi3_mini_128k_instruct_logits(self):
+ input_ids = {
+ "input_ids": torch.tensor(
+ [[1212, 318, 281, 1672, 2643, 290, 428, 318, 257, 1332]], dtype=torch.long, device=torch_device
+ )
+ }
+
+ model = Phi3ForCausalLM.from_pretrained("microsoft/phi-3-mini-128k-instruct").to(torch_device)
+ model.eval()
+
+ output = model(**input_ids).logits
+
+ EXPECTED_OUTPUT = torch.tensor([[ 1.8478, -0.5709, -1.6792, -1.2133, -0.7809, -0.8817, -2.0969, -1.1191,-0.7731, -1.0483, -0.5961, -1.3067, 3.1325, 6.9442, -0.4803, -0.9154,-1.3085, -1.0822, -1.1433, -0.7660, -0.8531, -0.9150, -0.6179, -1.6153,-0.2239, -1.3207, -1.1187, -2.4795, -1.4733, -0.4931],[ 3.5839, 2.4722, 3.7130, 1.2032, 0.7356, 2.7777, 2.5256, 0.9157, 1.6431, 0.3533, 0.5100, 1.3512, 8.9873, 10.9815, 0.3530, 0.1473, 0.2051, 1.8553, 1.5988, 2.2268, 1.1897, 1.2829, 0.7894, 1.8895, 0.7666, 0.4122, -0.9316, 0.9936, 1.2722, 0.8263]]).to(torch_device) # fmt: skip
+
+ self.assertTrue(torch.allclose(EXPECTED_OUTPUT, output[0, :2, :30], atol=1e-4, rtol=1e-4))
+
+ def test_phi3_mini_128k_instruct_generation(self):
+ model = Phi3ForCausalLM.from_pretrained("microsoft/phi-3-mini-128k-instruct")
+ tokenizer = AutoTokenizer.from_pretrained("microsoft/phi-3-mini-128k-instruct")
+
+ messages = [
+ {
+ "role": "system",
+ "content": "You are a helpful digital assistant. Please provide safe, ethical and accurate information to the user.",
+ },
+ {"role": "user", "content": "Can you provide ways to eat combinations of bananas and dragonfruits?"},
+ ]
+ inputs = tokenizer.apply_chat_template(messages, add_generation_prompt=True, return_tensors="pt")
+
+ outputs = model.generate(inputs, max_new_tokens=32)
+ output_text = tokenizer.batch_decode(outputs)
+
+ EXPECTED_OUTPUT = [
+ "<|system|> You are a helpful digital assistant. Please provide safe, ethical and accurate information to the user.<|end|><|user|> Can you provide ways to eat combinations of bananas and dragonfruits?<|end|><|assistant|> Certainly! Bananas and dragonfruits can be combined in various delicious and healthy ways. Here are some ideas:\n\n1."
+ ]
+
+ self.assertListEqual(output_text, EXPECTED_OUTPUT)
diff --git a/tests/models/recurrent_gemma/test_modeling_recurrent_gemma.py b/tests/models/recurrent_gemma/test_modeling_recurrent_gemma.py
index c46718b68038..599161e8d405 100644
--- a/tests/models/recurrent_gemma/test_modeling_recurrent_gemma.py
+++ b/tests/models/recurrent_gemma/test_modeling_recurrent_gemma.py
@@ -15,8 +15,6 @@
""" Testing suite for the PyTorch RecurrentGemma model. """
import unittest
-from parameterized import parameterized
-
from transformers import AutoModelForCausalLM, AutoTokenizer, RecurrentGemmaConfig, is_torch_available, set_seed
from transformers.testing_utils import (
require_bitsandbytes,
@@ -330,11 +328,6 @@ def test_model_various_embeddings(self):
config_and_inputs[0].position_embedding_type = type
self.model_tester.create_and_check_model(*config_and_inputs)
- @unittest.skip("Recurrent gemma does not use legacy cache")
- @parameterized.expand([(1, False), (1, True), (4, False)])
- def test_new_cache_format(self, num_beams, do_sample):
- pass
-
def test_save_load_fast_init_from_base(self):
pass
diff --git a/tests/models/reformer/test_modeling_reformer.py b/tests/models/reformer/test_modeling_reformer.py
index d3996a31c6a9..3a33a682d186 100644
--- a/tests/models/reformer/test_modeling_reformer.py
+++ b/tests/models/reformer/test_modeling_reformer.py
@@ -686,6 +686,18 @@ def _check_hidden_states_for_generate(
def test_left_padding_compatibility(self):
pass
+ def _get_input_ids_and_config(self, batch_size=2):
+ # override because overwise we hit max possible seq length for model (4*8=32)
+ # decreasing the seq_length in tester causes errors for "training_tests", those need exactly max seq length
+ # NOTE: seq_length has to be multiple of 4, otherwise it fails for other tests
+ config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common()
+ input_ids = inputs_dict[self.input_name]
+ input_ids = input_ids[:batch_size, :16]
+ attention_mask = torch.ones_like(input_ids, dtype=torch.long)[:batch_size, :16]
+ config.eos_token_id = None
+ config.forced_eos_token_id = None
+ return config, input_ids, attention_mask
+
@require_torch
class ReformerLSHAttnModelTest(
diff --git a/tests/models/seamless_m4t/test_modeling_seamless_m4t.py b/tests/models/seamless_m4t/test_modeling_seamless_m4t.py
index c08e55905702..925d7342931b 100644
--- a/tests/models/seamless_m4t/test_modeling_seamless_m4t.py
+++ b/tests/models/seamless_m4t/test_modeling_seamless_m4t.py
@@ -414,9 +414,11 @@ def _get_encoder_outputs(
encoder_outputs["last_hidden_state"] = encoder_outputs.last_hidden_state.repeat_interleave(
num_interleave, dim=0
)
+ generation_config = copy.deepcopy(model.generation_config)
+ model._prepare_special_tokens(generation_config)
input_ids = (
torch.zeros(input_ids.shape[:2], dtype=torch.int64, layout=input_ids.layout, device=input_ids.device)
- + model._get_decoder_start_token_id()
+ + generation_config.decoder_start_token_id
)
attention_mask = None
return encoder_outputs, input_ids, attention_mask
@@ -463,6 +465,10 @@ def test_initialization(self):
def test_inputs_embeds(self):
pass
+ @unittest.skip(reason="SeamlessM4TSpeechEncoder doesn't have an embedding layer")
+ def test_inputs_embeds_matches_input_ids(self):
+ pass
+
@unittest.skip(
reason="Expected missing keys serve when using SeamlessM4TForXXX.from_pretrained from a checkpoint saved by SeamlessM4TModel.save_pretrained."
)
diff --git a/tests/models/seamless_m4t_v2/test_modeling_seamless_m4t_v2.py b/tests/models/seamless_m4t_v2/test_modeling_seamless_m4t_v2.py
index 699641fcfd75..b36e29d79260 100644
--- a/tests/models/seamless_m4t_v2/test_modeling_seamless_m4t_v2.py
+++ b/tests/models/seamless_m4t_v2/test_modeling_seamless_m4t_v2.py
@@ -430,9 +430,11 @@ def _get_encoder_outputs(
encoder_outputs["last_hidden_state"] = encoder_outputs.last_hidden_state.repeat_interleave(
num_interleave, dim=0
)
+ generation_config = copy.deepcopy(model.generation_config)
+ model._prepare_special_tokens(generation_config)
input_ids = (
torch.zeros(input_ids.shape[:2], dtype=torch.int64, layout=input_ids.layout, device=input_ids.device)
- + model._get_decoder_start_token_id()
+ + generation_config.decoder_start_token_id
)
attention_mask = None
return encoder_outputs, input_ids, attention_mask
@@ -479,6 +481,10 @@ def test_initialization(self):
def test_inputs_embeds(self):
pass
+ @unittest.skip(reason="SeamlessM4TSpeechEncoder doesn't have an embedding layer")
+ def test_inputs_embeds_matches_input_ids(self):
+ pass
+
@unittest.skip(
reason="Expected missing keys serve when using SeamlessM4Tv2ForXXX.from_pretrained from a checkpoint saved by SeamlessM4Tv2Model.save_pretrained."
)
diff --git a/tests/models/seggpt/test_image_processing_seggpt.py b/tests/models/seggpt/test_image_processing_seggpt.py
index 46694d6636ea..04cefb70d0ef 100644
--- a/tests/models/seggpt/test_image_processing_seggpt.py
+++ b/tests/models/seggpt/test_image_processing_seggpt.py
@@ -30,6 +30,8 @@
from transformers.models.seggpt.modeling_seggpt import SegGptImageSegmentationOutput
if is_vision_available():
+ from PIL import Image
+
from transformers import SegGptImageProcessor
@@ -147,7 +149,7 @@ def test_mask_equivalence(self):
mask_rgb = mask_binary.convert("RGB")
inputs_binary = image_processor(images=None, prompt_masks=mask_binary, return_tensors="pt")
- inputs_rgb = image_processor(images=None, prompt_masks=mask_rgb, return_tensors="pt")
+ inputs_rgb = image_processor(images=None, prompt_masks=mask_rgb, return_tensors="pt", do_convert_rgb=False)
self.assertTrue((inputs_binary["prompt_masks"] == inputs_rgb["prompt_masks"]).all().item())
@@ -196,7 +198,11 @@ def test_pixel_values(self):
image_processor = SegGptImageProcessor.from_pretrained("BAAI/seggpt-vit-large")
inputs = image_processor(
- images=input_image, prompt_images=prompt_image, prompt_masks=prompt_mask, return_tensors="pt"
+ images=input_image,
+ prompt_images=prompt_image,
+ prompt_masks=prompt_mask,
+ return_tensors="pt",
+ do_convert_rgb=False,
)
# Verify pixel values
@@ -229,3 +235,76 @@ def test_pixel_values(self):
torch.allclose(inputs.prompt_pixel_values[0, :, :3, :3], expected_prompt_pixel_values, atol=1e-4)
)
self.assertTrue(torch.allclose(inputs.prompt_masks[0, :, :3, :3], expected_prompt_masks, atol=1e-4))
+
+ def test_prompt_mask_equivalence(self):
+ image_processor = self.image_processing_class(**self.image_processor_dict)
+ image_size = self.image_processor_tester.image_size
+
+ # Single Mask Examples
+ expected_single_shape = [1, 3, image_size, image_size]
+
+ # Single Semantic Map (2D)
+ image_np_2d = np.ones((image_size, image_size))
+ image_pt_2d = torch.ones((image_size, image_size))
+ image_pil_2d = Image.fromarray(image_np_2d)
+
+ inputs_np_2d = image_processor(images=None, prompt_masks=image_np_2d, return_tensors="pt")
+ inputs_pt_2d = image_processor(images=None, prompt_masks=image_pt_2d, return_tensors="pt")
+ inputs_pil_2d = image_processor(images=None, prompt_masks=image_pil_2d, return_tensors="pt")
+
+ self.assertTrue((inputs_np_2d["prompt_masks"] == inputs_pt_2d["prompt_masks"]).all().item())
+ self.assertTrue((inputs_np_2d["prompt_masks"] == inputs_pil_2d["prompt_masks"]).all().item())
+ self.assertEqual(list(inputs_np_2d["prompt_masks"].shape), expected_single_shape)
+
+ # Single RGB Images (3D)
+ image_np_3d = np.ones((3, image_size, image_size))
+ image_pt_3d = torch.ones((3, image_size, image_size))
+ image_pil_3d = Image.fromarray(image_np_3d.transpose(1, 2, 0).astype(np.uint8))
+
+ inputs_np_3d = image_processor(
+ images=None, prompt_masks=image_np_3d, return_tensors="pt", do_convert_rgb=False
+ )
+ inputs_pt_3d = image_processor(
+ images=None, prompt_masks=image_pt_3d, return_tensors="pt", do_convert_rgb=False
+ )
+ inputs_pil_3d = image_processor(
+ images=None, prompt_masks=image_pil_3d, return_tensors="pt", do_convert_rgb=False
+ )
+
+ self.assertTrue((inputs_np_3d["prompt_masks"] == inputs_pt_3d["prompt_masks"]).all().item())
+ self.assertTrue((inputs_np_3d["prompt_masks"] == inputs_pil_3d["prompt_masks"]).all().item())
+ self.assertEqual(list(inputs_np_3d["prompt_masks"].shape), expected_single_shape)
+
+ # Batched Examples
+ expected_batched_shape = [2, 3, image_size, image_size]
+
+ # Batched Semantic Maps (3D)
+ image_np_2d_batched = np.ones((2, image_size, image_size))
+ image_pt_2d_batched = torch.ones((2, image_size, image_size))
+
+ inputs_np_2d_batched = image_processor(images=None, prompt_masks=image_np_2d_batched, return_tensors="pt")
+ inputs_pt_2d_batched = image_processor(images=None, prompt_masks=image_pt_2d_batched, return_tensors="pt")
+
+ self.assertTrue((inputs_np_2d_batched["prompt_masks"] == inputs_pt_2d_batched["prompt_masks"]).all().item())
+ self.assertEqual(list(inputs_np_2d_batched["prompt_masks"].shape), expected_batched_shape)
+
+ # Batched RGB images
+ image_np_4d = np.ones((2, 3, image_size, image_size))
+ image_pt_4d = torch.ones((2, 3, image_size, image_size))
+
+ inputs_np_4d = image_processor(
+ images=None, prompt_masks=image_np_4d, return_tensors="pt", do_convert_rgb=False
+ )
+ inputs_pt_4d = image_processor(
+ images=None, prompt_masks=image_pt_4d, return_tensors="pt", do_convert_rgb=False
+ )
+
+ self.assertTrue((inputs_np_4d["prompt_masks"] == inputs_pt_4d["prompt_masks"]).all().item())
+ self.assertEqual(list(inputs_np_4d["prompt_masks"].shape), expected_batched_shape)
+
+ # Comparing Single and Batched Examples
+ self.assertTrue((inputs_np_2d["prompt_masks"][0] == inputs_np_3d["prompt_masks"][0]).all().item())
+ self.assertTrue((inputs_np_2d_batched["prompt_masks"][0] == inputs_np_2d["prompt_masks"][0]).all().item())
+ self.assertTrue((inputs_np_2d_batched["prompt_masks"][0] == inputs_np_3d["prompt_masks"][0]).all().item())
+ self.assertTrue((inputs_np_2d_batched["prompt_masks"][0] == inputs_np_4d["prompt_masks"][0]).all().item())
+ self.assertTrue((inputs_np_2d_batched["prompt_masks"][0] == inputs_np_3d["prompt_masks"][0]).all().item())
diff --git a/tests/models/seggpt/test_modeling_seggpt.py b/tests/models/seggpt/test_modeling_seggpt.py
index d4a8a46f0378..efa0231c1e81 100644
--- a/tests/models/seggpt/test_modeling_seggpt.py
+++ b/tests/models/seggpt/test_modeling_seggpt.py
@@ -16,6 +16,7 @@
import inspect
+import math
import unittest
from datasets import load_dataset
@@ -39,6 +40,7 @@
from torch import nn
from transformers import SegGptForImageSegmentation, SegGptModel
+ from transformers.models.seggpt.modeling_seggpt import SegGptLoss
if is_vision_available():
@@ -298,6 +300,22 @@ def recursive_check(batched_object, single_row_object, model_name, key):
model_row_output[key] = model_row_output[key][1:]
recursive_check(model_batched_output[key], model_row_output[key], model_name, key)
+ def test_seggpt_loss(self):
+ torch.manual_seed(100)
+ config = self.model_tester.get_config()
+
+ prompt_masks = torch.rand(1, config.num_channels, config.image_size, config.image_size)
+ label = torch.rand(1, config.num_channels, config.image_size, config.image_size)
+ pred_masks = torch.rand(1, config.num_channels, config.image_size * 2, config.image_size)
+ # seq_len x 2 because the loss concatenates prompt_masks and labels as pred_masks is concatenated
+ bool_masked_pos = torch.rand(1, self.model_tester.seq_length * 2) > 0.5
+
+ loss = SegGptLoss(config)
+ loss_value = loss(prompt_masks, pred_masks, label, bool_masked_pos)
+ expected_loss_value = torch.tensor(0.3340)
+
+ self.assertTrue(torch.allclose(loss_value, expected_loss_value, atol=1e-4))
+
@slow
def test_model_from_pretrained(self):
model_name = "BAAI/seggpt-vit-large"
@@ -312,6 +330,20 @@ def prepare_img():
return images, masks
+def prepare_bool_masked_pos(config: SegGptConfig):
+ num_patches = math.prod([i // config.patch_size for i in config.image_size])
+ mask_ratio = 0.75
+ torch.manual_seed(2)
+ num_masked_patches = int(num_patches * mask_ratio)
+ shuffle_idx = torch.randperm(num_patches)
+ bool_masked_pos = torch.FloatTensor([0] * (num_patches - num_masked_patches) + [1] * num_masked_patches)[
+ shuffle_idx
+ ]
+ bool_masked_pos = bool_masked_pos.unsqueeze(0).bool()
+
+ return bool_masked_pos
+
+
@require_torch
@require_vision
class SegGptModelIntegrationTest(unittest.TestCase):
@@ -331,7 +363,11 @@ def test_one_shot_inference(self):
prompt_mask = masks[0]
inputs = image_processor(
- images=input_image, prompt_images=prompt_image, prompt_masks=prompt_mask, return_tensors="pt"
+ images=input_image,
+ prompt_images=prompt_image,
+ prompt_masks=prompt_mask,
+ return_tensors="pt",
+ do_convert_rgb=False,
)
inputs = inputs.to(torch_device)
@@ -372,7 +408,11 @@ def test_few_shot_inference(self):
prompt_masks = [masks[0], masks[2]]
inputs = image_processor(
- images=input_images, prompt_images=prompt_images, prompt_masks=prompt_masks, return_tensors="pt"
+ images=input_images,
+ prompt_images=prompt_images,
+ prompt_masks=prompt_masks,
+ return_tensors="pt",
+ do_convert_rgb=False,
)
inputs = {k: v.to(torch_device) for k, v in inputs.items()}
@@ -390,3 +430,36 @@ def test_few_shot_inference(self):
self.assertEqual(outputs.pred_masks.shape, expected_shape)
self.assertTrue(torch.allclose(outputs.pred_masks[0, :, 448:451, :3], expected_slice, atol=4e-4))
+
+ @slow
+ def test_one_shot_with_label(self):
+ model = SegGptForImageSegmentation.from_pretrained("BAAI/seggpt-vit-large").to(torch_device)
+
+ image_processor = self.default_image_processor
+
+ images, masks = prepare_img()
+
+ input_image = images[1]
+ label = masks[1]
+ prompt_image = images[0]
+ prompt_mask = masks[0]
+
+ inputs = image_processor(
+ images=input_image,
+ prompt_masks=prompt_mask,
+ prompt_images=prompt_image,
+ return_tensors="pt",
+ do_convert_rgb=False,
+ ).to(torch_device)
+
+ labels = image_processor(images=None, prompt_masks=label, return_tensors="pt", do_convert_rgb=False)[
+ "prompt_masks"
+ ].to(torch_device)
+
+ bool_masked_pos = prepare_bool_masked_pos(model.config).to(torch_device)
+
+ with torch.no_grad():
+ outputs = model(**inputs, labels=labels, bool_masked_pos=bool_masked_pos)
+
+ expected_loss = torch.tensor(0.0074).to(torch_device)
+ self.assertTrue(torch.allclose(outputs.loss, expected_loss, atol=1e-4))
diff --git a/tests/models/sew/test_modeling_sew.py b/tests/models/sew/test_modeling_sew.py
index 528d5f84185e..5342df9e0880 100644
--- a/tests/models/sew/test_modeling_sew.py
+++ b/tests/models/sew/test_modeling_sew.py
@@ -356,6 +356,18 @@ def test_resize_tokens_embeddings(self):
def test_model_common_attributes(self):
pass
+ @unittest.skip("No support for low_cpu_mem_usage=True.")
+ def test_save_load_low_cpu_mem_usage(self):
+ pass
+
+ @unittest.skip("No support for low_cpu_mem_usage=True.")
+ def test_save_load_low_cpu_mem_usage_checkpoints(self):
+ pass
+
+ @unittest.skip("No support for low_cpu_mem_usage=True.")
+ def test_save_load_low_cpu_mem_usage_no_safetensors(self):
+ pass
+
def test_retain_grad_hidden_states_attentions(self):
config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common()
config.output_hidden_states = True
diff --git a/tests/models/sew_d/test_modeling_sew_d.py b/tests/models/sew_d/test_modeling_sew_d.py
index 6fda7963a800..1980bd3ab121 100644
--- a/tests/models/sew_d/test_modeling_sew_d.py
+++ b/tests/models/sew_d/test_modeling_sew_d.py
@@ -460,6 +460,18 @@ def _mock_init_weights(self, module):
def test_feed_forward_chunking(self):
pass
+ @unittest.skip("No support for low_cpu_mem_usage=True.")
+ def test_save_load_low_cpu_mem_usage(self):
+ pass
+
+ @unittest.skip("No support for low_cpu_mem_usage=True.")
+ def test_save_load_low_cpu_mem_usage_checkpoints(self):
+ pass
+
+ @unittest.skip("No support for low_cpu_mem_usage=True.")
+ def test_save_load_low_cpu_mem_usage_no_safetensors(self):
+ pass
+
@slow
def test_model_from_pretrained(self):
model = SEWDModel.from_pretrained("asapp/sew-d-tiny-100k")
diff --git a/tests/models/siglip/test_modeling_siglip.py b/tests/models/siglip/test_modeling_siglip.py
index 8880168484ec..e0a4825d0ede 100644
--- a/tests/models/siglip/test_modeling_siglip.py
+++ b/tests/models/siglip/test_modeling_siglip.py
@@ -687,3 +687,25 @@ def test_inference(self):
probs = torch.sigmoid(logits_per_image) # these are the probabilities
expected_probs = torch.tensor([[3.1937e-01, 3.2463e-05]], device=torch_device)
self.assertTrue(torch.allclose(probs, expected_probs, atol=1e-3))
+
+ @slow
+ def test_inference_interpolate_pos_encoding(self):
+ model_name = "google/siglip-base-patch16-224"
+ model = SiglipModel.from_pretrained(model_name).to(torch_device)
+
+ # 640 x 480 image
+ image = Image.open("./tests/fixtures/tests_samples/COCO/000000039769.png")
+ processor = SiglipProcessor.from_pretrained(model_name, do_resize=False, size={"height": 480, "width": 640})
+
+ inputs = processor(text="what's in the image", images=image, return_tensors="pt").to(torch_device)
+
+ # forward pass
+ with torch.no_grad():
+ outputs = model(**inputs, interpolate_pos_encoding=True)
+
+ # verify the shape
+ # patch size = 16
+ # batch size 1, (640/16) * (480/16) = 1200 patches, 768 hidden size
+ expected_shape = torch.Size((1, 1200, 768))
+
+ self.assertEqual(outputs.vision_model_output.last_hidden_state.shape, expected_shape)
diff --git a/tests/models/speech_to_text/test_modeling_speech_to_text.py b/tests/models/speech_to_text/test_modeling_speech_to_text.py
index 36a973d99dad..5d0e8f3a07af 100644
--- a/tests/models/speech_to_text/test_modeling_speech_to_text.py
+++ b/tests/models/speech_to_text/test_modeling_speech_to_text.py
@@ -285,7 +285,7 @@ class Speech2TextModelTest(ModelTesterMixin, GenerationTesterMixin, PipelineTest
input_name = "input_features"
def _get_input_ids_and_config(self, batch_size=2):
- config, input_ids, attention_mask, max_length = GenerationTesterMixin._get_input_ids_and_config(self)
+ config, input_ids, attention_mask = GenerationTesterMixin._get_input_ids_and_config(self)
# `input_ids` is actually `input_features` which is a 3D tensor.
# We must overwrite the mask to make it 2D since the original `_get_input_ids_and_config` creates an
@@ -294,7 +294,7 @@ def _get_input_ids_and_config(self, batch_size=2):
sequence_length = input_ids.shape[1]
attention_mask = torch.ones((batch_size, sequence_length), dtype=torch.long, device=attention_mask.device)
- return config, input_ids, attention_mask, max_length
+ return config, input_ids, attention_mask
def setUp(self):
self.model_tester = Speech2TextModelTester(self)
@@ -645,7 +645,9 @@ def _get_encoder_outputs(
num_interleave, dim=0
)
input_ids = input_ids[:, :, 0]
- input_ids = torch.zeros_like(input_ids[:, :1], dtype=torch.long) + model._get_decoder_start_token_id()
+ generation_config = copy.deepcopy(model.generation_config)
+ model._prepare_special_tokens(generation_config)
+ input_ids = torch.zeros_like(input_ids[:, :1]) + generation_config.decoder_start_token_id
attention_mask = None
return encoder_outputs, input_ids, attention_mask
diff --git a/tests/models/superpoint/test_modeling_superpoint.py b/tests/models/superpoint/test_modeling_superpoint.py
index 080eda385b9e..6e10a8a21dd0 100644
--- a/tests/models/superpoint/test_modeling_superpoint.py
+++ b/tests/models/superpoint/test_modeling_superpoint.py
@@ -27,7 +27,6 @@
import torch
from transformers import (
- SUPERPOINT_PRETRAINED_MODEL_ARCHIVE_LIST,
SuperPointForKeypointDetection,
)
@@ -121,6 +120,7 @@ class SuperPointModelTest(ModelTesterMixin, unittest.TestCase):
test_resize_embeddings = False
test_head_masking = False
has_attentions = False
+ from_pretrained_id = "magic-leap-community/superpoint"
def setUp(self):
self.model_tester = SuperPointModelTester(self)
@@ -222,9 +222,8 @@ def check_hidden_states_output(inputs_dict, config, model_class):
@slow
def test_model_from_pretrained(self):
- for model_name in SUPERPOINT_PRETRAINED_MODEL_ARCHIVE_LIST[:1]:
- model = SuperPointForKeypointDetection.from_pretrained(model_name)
- self.assertIsNotNone(model)
+ model = SuperPointForKeypointDetection.from_pretrained(self.from_pretrained_id)
+ self.assertIsNotNone(model)
def test_forward_labels_should_be_none(self):
config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common()
diff --git a/tests/models/swiftformer/test_modeling_tf_swiftformer.py b/tests/models/swiftformer/test_modeling_tf_swiftformer.py
new file mode 100644
index 000000000000..e73d38605d60
--- /dev/null
+++ b/tests/models/swiftformer/test_modeling_tf_swiftformer.py
@@ -0,0 +1,272 @@
+# coding=utf-8
+# Copyright 2024 The HuggingFace Inc. team. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+""" Testing suite for the TensorFlow SwiftFormer model. """
+
+
+import inspect
+import unittest
+
+from transformers import SwiftFormerConfig
+from transformers.testing_utils import (
+ require_tf,
+ require_vision,
+ slow,
+)
+from transformers.utils import cached_property, is_tf_available, is_vision_available
+
+from ...test_configuration_common import ConfigTester
+from ...test_modeling_tf_common import TFModelTesterMixin, floats_tensor, ids_tensor
+from ...test_pipeline_mixin import PipelineTesterMixin
+
+
+if is_tf_available():
+ import tensorflow as tf
+
+ from transformers import TFSwiftFormerForImageClassification, TFSwiftFormerModel
+ from transformers.modeling_tf_utils import keras
+
+
+if is_vision_available():
+ from PIL import Image
+
+ from transformers import ViTImageProcessor
+
+
+class TFSwiftFormerModelTester:
+ def __init__(
+ self,
+ parent,
+ batch_size=1,
+ num_channels=3,
+ is_training=True,
+ use_labels=True,
+ hidden_dropout_prob=0.1,
+ attention_probs_dropout_prob=0.1,
+ image_size=224,
+ num_labels=2,
+ layer_depths=[3, 3, 6, 4],
+ embed_dims=[48, 56, 112, 220],
+ ):
+ self.parent = parent
+ self.batch_size = batch_size
+ self.num_channels = num_channels
+ self.is_training = is_training
+ self.use_labels = use_labels
+ self.hidden_dropout_prob = hidden_dropout_prob
+ self.attention_probs_dropout_prob = attention_probs_dropout_prob
+ self.num_labels = num_labels
+ self.image_size = image_size
+ self.layer_depths = layer_depths
+ self.embed_dims = embed_dims
+
+ def prepare_config_and_inputs(self):
+ pixel_values = floats_tensor([self.batch_size, self.num_channels, self.image_size, self.image_size])
+
+ labels = None
+ if self.use_labels:
+ labels = ids_tensor([self.batch_size], self.num_labels)
+
+ config = self.get_config()
+
+ return config, pixel_values, labels
+
+ def get_config(self):
+ return SwiftFormerConfig(
+ depths=self.layer_depths,
+ embed_dims=self.embed_dims,
+ mlp_ratio=4,
+ downsamples=[True, True, True, True],
+ hidden_act="gelu",
+ num_labels=self.num_labels,
+ down_patch_size=3,
+ down_stride=2,
+ down_pad=1,
+ drop_rate=0.0,
+ drop_path_rate=0.0,
+ use_layer_scale=True,
+ layer_scale_init_value=1e-5,
+ )
+
+ def create_and_check_model(self, config, pixel_values, labels):
+ model = TFSwiftFormerModel(config=config)
+ result = model(pixel_values)
+ self.parent.assertEqual(result.last_hidden_state.shape, (self.batch_size, self.embed_dims[-1], 7, 7))
+
+ def create_and_check_for_image_classification(self, config, pixel_values, labels):
+ config.num_labels = self.num_labels
+ model = TFSwiftFormerForImageClassification(config)
+ result = model(pixel_values, labels=labels)
+ self.parent.assertEqual(result.logits.shape, (self.batch_size, self.num_labels))
+
+ model = TFSwiftFormerForImageClassification(config)
+
+ pixel_values = floats_tensor([self.batch_size, self.num_channels, self.image_size, self.image_size])
+ result = model(pixel_values)
+ self.parent.assertEqual(result.logits.shape, (self.batch_size, self.num_labels))
+
+ def prepare_config_and_inputs_for_common(self):
+ (config, pixel_values, labels) = self.prepare_config_and_inputs()
+ inputs_dict = {"pixel_values": pixel_values}
+ return config, inputs_dict
+
+
+@require_tf
+class TFSwiftFormerModelTest(TFModelTesterMixin, PipelineTesterMixin, unittest.TestCase):
+ """
+ Here we also overwrite some of the tests of test_modeling_common.py, as SwiftFormer does not use input_ids, inputs_embeds,
+ attention_mask and seq_length.
+ """
+
+ all_model_classes = (TFSwiftFormerModel, TFSwiftFormerForImageClassification) if is_tf_available() else ()
+
+ pipeline_model_mapping = (
+ {"feature-extraction": TFSwiftFormerModel, "image-classification": TFSwiftFormerForImageClassification}
+ if is_tf_available()
+ else {}
+ )
+
+ fx_compatible = False
+ test_pruning = False
+ test_resize_embeddings = False
+ test_head_masking = False
+ has_attentions = False
+ test_onnx = False
+ from_pretrained_id = "MBZUAI/swiftformer-xs"
+
+ def setUp(self):
+ self.model_tester = TFSwiftFormerModelTester(self)
+ self.config_tester = ConfigTester(
+ self,
+ config_class=SwiftFormerConfig,
+ has_text_modality=False,
+ hidden_size=37,
+ num_attention_heads=12,
+ num_hidden_layers=12,
+ )
+
+ def test_config(self):
+ self.config_tester.run_common_tests()
+
+ @unittest.skip(reason="TFSwiftFormer does not use inputs_embeds")
+ def test_inputs_embeds(self):
+ pass
+
+ def test_model_common_attributes(self):
+ config, _ = self.model_tester.prepare_config_and_inputs_for_common()
+
+ for model_class in self.all_model_classes:
+ model = model_class(config)
+ x = model.get_output_embeddings()
+ self.assertTrue(x is None or isinstance(x, keras.layers.Dense))
+
+ # Copied from transformers.tests.models.deit.test_modeling_tf_deit.py
+ def test_forward_signature(self):
+ config, _ = self.model_tester.prepare_config_and_inputs_for_common()
+
+ for model_class in self.all_model_classes:
+ model = model_class(config)
+ signature = inspect.signature(model.call)
+ # signature.parameters is an OrderedDict => so arg_names order is deterministic
+ arg_names = [*signature.parameters.keys()]
+
+ expected_arg_names = ["pixel_values"]
+ self.assertListEqual(arg_names[:1], expected_arg_names)
+
+ def test_model(self):
+ config_and_inputs = self.model_tester.prepare_config_and_inputs()
+ self.model_tester.create_and_check_model(*config_and_inputs)
+
+ def test_for_image_classification(self):
+ config_and_inputs = self.model_tester.prepare_config_and_inputs()
+ self.model_tester.create_and_check_for_image_classification(*config_and_inputs)
+
+ @slow
+ def test_model_from_pretrained(self):
+ model = TFSwiftFormerModel.from_pretrained(self.from_pretrained_id)
+ self.assertIsNotNone(model)
+
+ @unittest.skip(reason="TFSwiftFormer does not output attentions")
+ def test_attention_outputs(self):
+ pass
+
+ def test_hidden_states_output(self):
+ def check_hidden_states_output(inputs_dict, config, model_class):
+ model = model_class(config)
+
+ outputs = model(**self._prepare_for_class(inputs_dict, model_class))
+
+ hidden_states = outputs.hidden_states
+
+ expected_num_stages = 8
+ self.assertEqual(len(hidden_states), expected_num_stages)
+
+ # SwiftFormer's feature maps are of shape (batch_size, embed_dims, height, width)
+ # with the width and height being successively divided by 2, after every 2 blocks
+ for i in range(len(hidden_states)):
+ self.assertEqual(
+ hidden_states[i].shape,
+ tf.TensorShape(
+ [
+ self.model_tester.batch_size,
+ self.model_tester.embed_dims[i // 2],
+ (self.model_tester.image_size // 4) // 2 ** (i // 2),
+ (self.model_tester.image_size // 4) // 2 ** (i // 2),
+ ]
+ ),
+ )
+
+ config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common()
+
+ for model_class in self.all_model_classes:
+ inputs_dict["output_hidden_states"] = True
+ check_hidden_states_output(inputs_dict, config, model_class)
+
+ # check that output_hidden_states also work using config
+ del inputs_dict["output_hidden_states"]
+ config.output_hidden_states = True
+
+ check_hidden_states_output(inputs_dict, config, model_class)
+
+
+# We will verify our results on an image of cute cats
+def prepare_img():
+ image = Image.open("./tests/fixtures/tests_samples/COCO/000000039769.png")
+ return image
+
+
+@require_tf
+@require_vision
+class TFSwiftFormerModelIntegrationTest(unittest.TestCase):
+ @cached_property
+ def default_image_processor(self):
+ return ViTImageProcessor.from_pretrained("MBZUAI/swiftformer-xs") if is_vision_available() else None
+
+ @slow
+ def test_inference_image_classification_head(self):
+ model = TFSwiftFormerForImageClassification.from_pretrained("MBZUAI/swiftformer-xs")
+
+ feature_extractor = self.default_feature_extractor
+ image = prepare_img()
+ inputs = feature_extractor(images=image, return_tensors="tf")
+
+ # forward pass
+ outputs = model(**inputs)
+
+ # verify the logits
+ expected_shape = tf.TensorShape((1, 1000))
+ self.assertEqual(outputs.logits.shape, expected_shape)
+
+ expected_slice = tf.constant([[-2.1703e00, 2.1107e00, -2.0811e00]])
+ tf.debugging.assert_near(outputs.logits[0, :3], expected_slice, atol=1e-4)
diff --git a/tests/models/table_transformer/test_modeling_table_transformer.py b/tests/models/table_transformer/test_modeling_table_transformer.py
index 79da1d191063..989517eb8c09 100644
--- a/tests/models/table_transformer/test_modeling_table_transformer.py
+++ b/tests/models/table_transformer/test_modeling_table_transformer.py
@@ -261,6 +261,10 @@ def test_table_transformer_no_timm_backbone(self):
def test_inputs_embeds(self):
pass
+ @unittest.skip(reason="Table Transformer does not use inputs_embeds")
+ def test_inputs_embeds_matches_input_ids(self):
+ pass
+
@unittest.skip(reason="Table Transformer does not have a get_input_embeddings method")
def test_model_common_attributes(self):
pass
@@ -456,6 +460,9 @@ def test_different_timm_backbone(self):
# let's pick a random timm backbone
config.backbone = "tf_mobilenetv3_small_075"
+ config.backbone_config = None
+ config.use_timm_backbone = True
+ config.backbone_kwargs = {"out_indices": [2, 3, 4]}
for model_class in self.all_model_classes:
model = model_class(config)
@@ -471,6 +478,11 @@ def test_different_timm_backbone(self):
self.model_tester.num_labels + 1,
)
self.assertEqual(outputs.logits.shape, expected_shape)
+ # Confirm out_indices was propogated to backbone
+ self.assertEqual(len(model.model.backbone.conv_encoder.intermediate_channel_sizes), 3)
+ else:
+ # Confirm out_indices was propogated to backbone
+ self.assertEqual(len(model.backbone.conv_encoder.intermediate_channel_sizes), 3)
self.assertTrue(outputs)
diff --git a/tests/models/tapas/test_modeling_tapas.py b/tests/models/tapas/test_modeling_tapas.py
index 6a482d03bed9..7918cad2b98c 100644
--- a/tests/models/tapas/test_modeling_tapas.py
+++ b/tests/models/tapas/test_modeling_tapas.py
@@ -520,8 +520,13 @@ def test_for_sequence_classification(self):
self.model_tester.create_and_check_for_sequence_classification(*config_and_inputs)
@require_tensorflow_probability
+ @unittest.skip("tfp is not defined even if installed. FIXME @Arthur in a followup PR!")
def test_pt_tf_model_equivalence(self):
- super().test_pt_tf_model_equivalence()
+ pass
+
+ @unittest.skip("tfp is not defined even if installed. FIXME @Arthur in a followup PR!")
+ def test_tf_from_pt_safetensors(self):
+ pass
def prepare_tapas_single_inputs_for_inference():
diff --git a/tests/models/tapas/test_modeling_tf_tapas.py b/tests/models/tapas/test_modeling_tf_tapas.py
index 7687144eaf2f..0da8b2879e8c 100644
--- a/tests/models/tapas/test_modeling_tf_tapas.py
+++ b/tests/models/tapas/test_modeling_tf_tapas.py
@@ -528,6 +528,10 @@ def test_keras_fit(self):
def test_loss_computation(self):
pass
+ @unittest.skip("tfp is not defined even if installed. FIXME @Arthur in a followup PR!")
+ def test_pt_tf_model_equivalence(self):
+ pass
+
def prepare_tapas_single_inputs_for_inference():
# Here we prepare a single table-question pair to test TAPAS inference on:
diff --git a/tests/models/timm_backbone/test_modeling_timm_backbone.py b/tests/models/timm_backbone/test_modeling_timm_backbone.py
index 60ab9e2a217e..1cd04cd48439 100644
--- a/tests/models/timm_backbone/test_modeling_timm_backbone.py
+++ b/tests/models/timm_backbone/test_modeling_timm_backbone.py
@@ -169,6 +169,18 @@ def test_from_pretrained_no_checkpoint(self):
def test_save_load(self):
pass
+ @unittest.skip("No support for low_cpu_mem_usage=True.")
+ def test_save_load_low_cpu_mem_usage(self):
+ pass
+
+ @unittest.skip("No support for low_cpu_mem_usage=True.")
+ def test_save_load_low_cpu_mem_usage_checkpoints(self):
+ pass
+
+ @unittest.skip("No support for low_cpu_mem_usage=True.")
+ def test_save_load_low_cpu_mem_usage_no_safetensors(self):
+ pass
+
@unittest.skip("model weights aren't tied in TimmBackbone.")
def test_tie_model_weights(self):
pass
diff --git a/tests/models/vilt/test_modeling_vilt.py b/tests/models/vilt/test_modeling_vilt.py
index 4c877c2e1852..3e25fc3bbac7 100644
--- a/tests/models/vilt/test_modeling_vilt.py
+++ b/tests/models/vilt/test_modeling_vilt.py
@@ -357,6 +357,13 @@ def test_batching_equivalence(self):
def test_model_outputs_equivalence(self):
pass
+ @unittest.skip(
+ reason="""VilT samples image tokens from a multinomial distribution, resulting in not deterministic
+ hidden states. Cannot test equivalence on logit level"""
+ )
+ def test_inputs_embeds_matches_input_ids(self):
+ pass
+
def test_attention_outputs(self):
config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common()
config.return_dict = True
diff --git a/tests/models/vipllava/test_modeling_vipllava.py b/tests/models/vipllava/test_modeling_vipllava.py
index e783b3470070..ff84f717847b 100644
--- a/tests/models/vipllava/test_modeling_vipllava.py
+++ b/tests/models/vipllava/test_modeling_vipllava.py
@@ -14,7 +14,6 @@
# limitations under the License.
""" Testing suite for the PyTorch VipLlava model. """
-import copy
import gc
import unittest
@@ -186,171 +185,6 @@ def test_training_gradient_checkpointing_use_reentrant(self):
def test_training_gradient_checkpointing_use_reentrant_false(self):
pass
- # Copied from tests.test_modeling_common.ModelTesterMixin.test_resize_tokens_embeddings with config.vocab_size->config.text_config.vocab_size
- def test_resize_tokens_embeddings(self):
- (
- original_config,
- inputs_dict,
- ) = self.model_tester.prepare_config_and_inputs_for_common()
- if not self.test_resize_embeddings:
- return
-
- for model_class in self.all_model_classes:
- config = copy.deepcopy(original_config)
- model = model_class(config)
- model.to(torch_device)
-
- if self.model_tester.is_training is False:
- model.eval()
-
- model_vocab_size = config.text_config.vocab_size
- # Retrieve the embeddings and clone theme
- model_embed = model.resize_token_embeddings(model_vocab_size)
- cloned_embeddings = model_embed.weight.clone()
-
- # Check that resizing the token embeddings with a larger vocab size increases the model's vocab size
- model_embed = model.resize_token_embeddings(model_vocab_size + 10)
- self.assertEqual(model.config.text_config.vocab_size, model_vocab_size + 10)
- # Check that it actually resizes the embeddings matrix
- self.assertEqual(model_embed.weight.shape[0], cloned_embeddings.shape[0] + 10)
- # Check that the model can still do a forward pass successfully (every parameter should be resized)
- model(**self._prepare_for_class(inputs_dict, model_class))
-
- # Check that resizing the token embeddings with a smaller vocab size decreases the model's vocab size
- model_embed = model.resize_token_embeddings(model_vocab_size - 15)
- self.assertEqual(model.config.text_config.vocab_size, model_vocab_size - 15)
- # Check that it actually resizes the embeddings matrix
- self.assertEqual(model_embed.weight.shape[0], cloned_embeddings.shape[0] - 15)
-
- # Check that the model can still do a forward pass successfully (every parameter should be resized)
- # Input ids should be clamped to the maximum size of the vocabulary
- inputs_dict["input_ids"].clamp_(max=model_vocab_size - 15 - 1)
-
- # make sure that decoder_input_ids are resized as well
- if "decoder_input_ids" in inputs_dict:
- inputs_dict["decoder_input_ids"].clamp_(max=model_vocab_size - 15 - 1)
- model(**self._prepare_for_class(inputs_dict, model_class))
-
- # Check that adding and removing tokens has not modified the first part of the embedding matrix.
- models_equal = True
- for p1, p2 in zip(cloned_embeddings, model_embed.weight):
- if p1.data.ne(p2.data).sum() > 0:
- models_equal = False
-
- self.assertTrue(models_equal)
-
- config = copy.deepcopy(original_config)
- model = model_class(config)
- model.to(torch_device)
-
- model_vocab_size = config.text_config.vocab_size
- model.resize_token_embeddings(model_vocab_size + 10, pad_to_multiple_of=1)
- self.assertTrue(model.config.text_config.vocab_size + 10, model_vocab_size)
-
- model_embed = model.resize_token_embeddings(model_vocab_size, pad_to_multiple_of=64)
- self.assertTrue(model_embed.weight.shape[0] // 64, 0)
-
- self.assertTrue(model_embed.weight.shape[0], model.config.text_config.vocab_size)
- self.assertTrue(model.config.text_config.vocab_size, model.vocab_size)
-
- model_embed = model.resize_token_embeddings(model_vocab_size + 13, pad_to_multiple_of=64)
- self.assertTrue(model_embed.weight.shape[0] // 64, 0)
-
- # Check that resizing a model to a multiple of pad_to_multiple leads to a model of exactly that size
- target_dimension = 128
- model_embed = model.resize_token_embeddings(target_dimension, pad_to_multiple_of=64)
- self.assertTrue(model_embed.weight.shape[0], target_dimension)
-
- with self.assertRaisesRegex(
- ValueError,
- "Asking to pad the embedding matrix to a multiple of `1.3`, which is not and integer. Please make sure to pass an integer",
- ):
- model.resize_token_embeddings(model_vocab_size, pad_to_multiple_of=1.3)
-
- # Copied from tests.test_modeling_common.ModelTesterMixin.test_resize_embeddings_untied with config.vocab_size->config.text_config.vocab_size
- def test_resize_embeddings_untied(self):
- (
- original_config,
- inputs_dict,
- ) = self.model_tester.prepare_config_and_inputs_for_common()
- if not self.test_resize_embeddings:
- return
-
- original_config.tie_word_embeddings = False
-
- # if model cannot untied embeddings -> leave test
- if original_config.tie_word_embeddings:
- return
-
- for model_class in self.all_model_classes:
- config = copy.deepcopy(original_config)
- model = model_class(config).to(torch_device)
-
- # if no output embeddings -> leave test
- if model.get_output_embeddings() is None:
- continue
-
- # Check that resizing the token embeddings with a larger vocab size increases the model's vocab size
- model_vocab_size = config.text_config.vocab_size
- model.resize_token_embeddings(model_vocab_size + 10)
- self.assertEqual(model.config.text_config.vocab_size, model_vocab_size + 10)
- output_embeds = model.get_output_embeddings()
- self.assertEqual(output_embeds.weight.shape[0], model_vocab_size + 10)
- # Check bias if present
- if output_embeds.bias is not None:
- self.assertEqual(output_embeds.bias.shape[0], model_vocab_size + 10)
- # Check that the model can still do a forward pass successfully (every parameter should be resized)
- model(**self._prepare_for_class(inputs_dict, model_class))
-
- # Check that resizing the token embeddings with a smaller vocab size decreases the model's vocab size
- model.resize_token_embeddings(model_vocab_size - 15)
- self.assertEqual(model.config.text_config.vocab_size, model_vocab_size - 15)
- # Check that it actually resizes the embeddings matrix
- output_embeds = model.get_output_embeddings()
- self.assertEqual(output_embeds.weight.shape[0], model_vocab_size - 15)
- # Check bias if present
- if output_embeds.bias is not None:
- self.assertEqual(output_embeds.bias.shape[0], model_vocab_size - 15)
- # Check that the model can still do a forward pass successfully (every parameter should be resized)
- # Input ids should be clamped to the maximum size of the vocabulary
- inputs_dict["input_ids"].clamp_(max=model_vocab_size - 15 - 1)
- if "decoder_input_ids" in inputs_dict:
- inputs_dict["decoder_input_ids"].clamp_(max=model_vocab_size - 15 - 1)
- # Check that the model can still do a forward pass successfully (every parameter should be resized)
- model(**self._prepare_for_class(inputs_dict, model_class))
-
- # Copied from tests.test_modeling_common.ModelTesterMixin.test_tie_model_weights with config.vocab_size->config.text_config.vocab_size
- def test_tie_model_weights(self):
- if not self.test_torchscript:
- return
-
- config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common()
-
- def check_same_values(layer_1, layer_2):
- equal = True
- for p1, p2 in zip(layer_1.weight, layer_2.weight):
- if p1.data.ne(p2.data).sum() > 0:
- equal = False
- return equal
-
- for model_class in self.all_model_classes:
- config.torchscript = True
- model_not_tied = model_class(config)
- if model_not_tied.get_output_embeddings() is None:
- continue
-
- config_tied = copy.deepcopy(config)
- config_tied.torchscript = False
- model_tied = model_class(config_tied)
- params_tied = list(model_tied.parameters())
- # Check that the embedding layer and decoding layer are the same in size and in value
- # self.assertTrue(check_same_values(embeddings, decoding))
-
- # Check that after resize they remain tied.
- model_tied.resize_token_embeddings(config.text_config.vocab_size + 10)
- params_tied_2 = list(model_tied.parameters())
- self.assertEqual(len(params_tied_2), len(params_tied))
-
@require_torch
class VipLlavaForConditionalGenerationIntegrationTest(unittest.TestCase):
diff --git a/tests/models/vitmatte/test_image_processing_vitmatte.py b/tests/models/vitmatte/test_image_processing_vitmatte.py
index e1009c759283..e86cfde1e5cb 100644
--- a/tests/models/vitmatte/test_image_processing_vitmatte.py
+++ b/tests/models/vitmatte/test_image_processing_vitmatte.py
@@ -192,3 +192,7 @@ def test_padding(self):
image = np.random.randn(3, 249, 491)
images = image_processing.pad_image(image)
assert images.shape == (3, 256, 512)
+
+ image = np.random.randn(3, 249, 512)
+ images = image_processing.pad_image(image)
+ assert images.shape == (3, 256, 512)
diff --git a/tests/models/vivit/test_modeling_vivit.py b/tests/models/vivit/test_modeling_vivit.py
index 9b299c9afa40..cbb45731ae55 100644
--- a/tests/models/vivit/test_modeling_vivit.py
+++ b/tests/models/vivit/test_modeling_vivit.py
@@ -353,3 +353,26 @@ def test_inference_for_video_classification(self):
expected_slice = torch.tensor([-0.9498, 2.7971, -1.4049, 0.1024, -1.8353]).to(torch_device)
self.assertTrue(torch.allclose(outputs.logits[0, :5], expected_slice, atol=1e-4))
+
+ @slow
+ def test_inference_interpolate_pos_encoding(self):
+ # Vivit models have an `interpolate_pos_encoding` argument in their forward method,
+ # allowing to interpolate the pre-trained position embeddings in order to use
+ # the model on higher resolutions. The DINO model by Facebook AI leverages this
+ # to visualize self-attention on higher resolution images.
+ model = VivitModel.from_pretrained("google/vivit-b-16x2").to(torch_device)
+
+ image_processor = VivitImageProcessor.from_pretrained("google/vivit-b-16x2")
+ video = prepare_video()
+ inputs = image_processor(
+ video, size={"shortest_edge": 480}, crop_size={"height": 480, "width": 480}, return_tensors="pt"
+ )
+ pixel_values = inputs.pixel_values.to(torch_device)
+
+ # forward pass
+ with torch.no_grad():
+ outputs = model(pixel_values, interpolate_pos_encoding=True)
+
+ # verify the logits shape
+ expected_shape = torch.Size((1, 3137, 768))
+ self.assertEqual(outputs.last_hidden_state.shape, expected_shape)
diff --git a/tests/models/wav2vec2/test_modeling_wav2vec2.py b/tests/models/wav2vec2/test_modeling_wav2vec2.py
index a5757571a11a..9d86fb245c02 100644
--- a/tests/models/wav2vec2/test_modeling_wav2vec2.py
+++ b/tests/models/wav2vec2/test_modeling_wav2vec2.py
@@ -25,6 +25,7 @@
import numpy as np
from datasets import load_dataset
+from pytest import mark
from transformers import Wav2Vec2Config, is_torch_available
from transformers.testing_utils import (
@@ -33,9 +34,11 @@
is_pt_flax_cross_test,
is_pyctcdecode_available,
is_torchaudio_available,
+ require_flash_attn,
require_pyctcdecode,
require_soundfile,
require_torch,
+ require_torch_gpu,
require_torchaudio,
run_test_in_subprocess,
slow,
@@ -1995,3 +1998,52 @@ def run_model(lang):
for lang in LANG_MAP.keys():
assert run_model(lang) == TRANSCRIPTIONS[lang]
+
+ @require_flash_attn
+ @require_torch_gpu
+ @mark.flash_attn_test
+ def test_inference_ctc_fa2(self):
+ model_fa = Wav2Vec2ForCTC.from_pretrained(
+ "facebook/wav2vec2-base-960h", attn_implementation="flash_attention_2", torch_dtype=torch.bfloat16
+ )
+ model_fa.to(torch_device)
+ processor = Wav2Vec2Processor.from_pretrained("facebook/wav2vec2-base-960h", do_lower_case=True)
+ input_speech = self._load_datasamples(1)
+
+ input_values = processor(input_speech, return_tensors="pt").input_values.to(torch_device)
+
+ with torch.no_grad():
+ logits = model_fa(input_values.to(torch.bfloat16)).logits
+
+ predicted_ids = torch.argmax(logits, dim=-1)
+ predicted_trans = processor.batch_decode(predicted_ids)
+
+ EXPECTED_TRANSCRIPTIONS = ["a man said to the universe sir i exist"]
+ self.assertListEqual(predicted_trans, EXPECTED_TRANSCRIPTIONS)
+
+ @require_flash_attn
+ @require_torch_gpu
+ @mark.flash_attn_test
+ def test_inference_ctc_fa2_batched(self):
+ model_fa = Wav2Vec2ForCTC.from_pretrained(
+ "facebook/wav2vec2-base-960h", attn_implementation="flash_attention_2", torch_dtype=torch.bfloat16
+ )
+ model_fa.to(torch_device)
+ processor = Wav2Vec2Processor.from_pretrained("facebook/wav2vec2-base-960h", do_lower_case=True)
+
+ input_speech = self._load_datasamples(2)
+
+ inputs = processor(input_speech, return_tensors="pt", padding=True, return_attention_mask=True)
+ inputs = inputs.to(torch_device)
+
+ with torch.no_grad():
+ logits = model_fa(inputs.input_values.to(torch.bfloat16), attention_mask=inputs.attention_mask).logits
+
+ predicted_ids = torch.argmax(logits, dim=-1)
+ predicted_trans = processor.batch_decode(predicted_ids)
+
+ EXPECTED_TRANSCRIPTIONS = [
+ "a man said to the universe sir i exist",
+ "sweat covered brion's body trickling into the tight lowing cloth that was the only garment he wore",
+ ]
+ self.assertListEqual(predicted_trans, EXPECTED_TRANSCRIPTIONS)
diff --git a/tests/models/wavlm/test_modeling_wavlm.py b/tests/models/wavlm/test_modeling_wavlm.py
index c0a8eed2096f..3cf4348f6c83 100644
--- a/tests/models/wavlm/test_modeling_wavlm.py
+++ b/tests/models/wavlm/test_modeling_wavlm.py
@@ -288,6 +288,15 @@ def check_seq_classifier_training(self, config, input_values, *args):
loss.backward()
+ def check_output_attentions(self, config, input_values, attention_mask):
+ model = WavLMModel(config=config)
+ model.config.layerdrop = 1.0
+ model.to(torch_device)
+ model.train()
+
+ outputs = model(input_values, attention_mask=attention_mask, output_attentions=True)
+ self.parent.assertTrue(len(outputs.attentions) > 0)
+
def check_labels_out_of_vocab(self, config, input_values, *args):
model = WavLMForCTC(config)
model.to(torch_device)
@@ -354,6 +363,10 @@ def test_seq_classifier_train(self):
config_and_inputs = self.model_tester.prepare_config_and_inputs()
self.model_tester.check_seq_classifier_training(*config_and_inputs)
+ def test_output_attentions(self):
+ config_and_inputs = self.model_tester.prepare_config_and_inputs()
+ self.model_tester.check_output_attentions(*config_and_inputs)
+
def test_labels_out_of_vocab(self):
config_and_inputs = self.model_tester.prepare_config_and_inputs()
self.model_tester.check_labels_out_of_vocab(*config_and_inputs)
diff --git a/tests/models/whisper/test_modeling_whisper.py b/tests/models/whisper/test_modeling_whisper.py
index 6acecb8a48cf..32b13bd5425f 100644
--- a/tests/models/whisper/test_modeling_whisper.py
+++ b/tests/models/whisper/test_modeling_whisper.py
@@ -477,13 +477,11 @@ def _get_input_ids_and_config(self, batch_size=3):
# cut to half length & take max batch_size=batch_size
input_ids = input_ids[:batch_size, :, :]
- # generate max 3 tokens
- max_length = 4
if config.eos_token_id is not None and config.pad_token_id is None:
# hack to allow generate for models such as GPT2 as is done in `generate()`
config.pad_token_id = config.eos_token_id
- return config, input_ids, None, max_length
+ return config, input_ids, None
def test_inputs_embeds(self):
config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common()
@@ -835,10 +833,10 @@ def _get_encoder_outputs(
encoder_outputs["last_hidden_state"] = encoder_outputs.last_hidden_state.repeat_interleave(
num_interleave, dim=0
)
+ generation_config = copy.deepcopy(model.generation_config)
+ model._prepare_special_tokens(generation_config)
input_ids = input_ids[:, :, 0]
- input_ids = torch.zeros_like(input_ids[:, :1], dtype=torch.long) + torch.tensor(
- [model._get_decoder_start_token_id()], device=input_ids.device
- )
+ input_ids = torch.zeros_like(input_ids[:, :1], dtype=torch.long) + generation_config.decoder_start_token_id
attention_mask = None
return encoder_outputs, input_ids, attention_mask
diff --git a/tests/models/xlnet/test_modeling_xlnet.py b/tests/models/xlnet/test_modeling_xlnet.py
index ff89a9aca3ec..e2c0f6d7e70d 100644
--- a/tests/models/xlnet/test_modeling_xlnet.py
+++ b/tests/models/xlnet/test_modeling_xlnet.py
@@ -646,7 +646,8 @@ def _check_hidden_states_for_generate(
seq_len = 1
else:
# for first item dummy PAD token is appended so need one more
- seq_len = (min_length + 1) if idx == 0 else min_length
+ # else offset+dummy_token when using cache
+ seq_len = (min_length + 1) if idx == 0 else 3
expected_shape = (batch_size * num_beam_groups, seq_len, config.hidden_size)
self.assertEqual(layer_hidden_states.shape, expected_shape)
@@ -665,8 +666,11 @@ def _check_attentions_for_generate(
tgt_len = min_length
# for first item dummy PAD token is appended so need one more
+ # every token after consists of offset+dummy_token length when using cache
if idx == 0:
tgt_len += 1
+ else:
+ tgt_len = 3
src_len = min_length + idx + 1
diff --git a/tests/models/yolos/test_image_processing_yolos.py b/tests/models/yolos/test_image_processing_yolos.py
index a1bc2ff172f7..f7465779b594 100644
--- a/tests/models/yolos/test_image_processing_yolos.py
+++ b/tests/models/yolos/test_image_processing_yolos.py
@@ -18,6 +18,8 @@
import pathlib
import unittest
+from parameterized import parameterized
+
from transformers.testing_utils import require_torch, require_vision, slow
from transformers.utils import is_torch_available, is_vision_available
@@ -98,7 +100,7 @@ def get_expected_values(self, image_inputs, batched=False):
if max_original_size / min_original_size * size > max_size:
size = int(round(max_size * min_original_size / max_original_size))
- if width < height and width != size:
+ if width <= height and width != size:
height = int(size * height / width)
width = size
elif height < width and height != size:
@@ -183,17 +185,32 @@ def test_equivalence_padding(self):
torch.allclose(encoded_images_with_method["pixel_values"], encoded_images["pixel_values"], atol=1e-4)
)
- def test_resize_max_size_respected(self):
+ @parameterized.expand(
+ [
+ ((3, 100, 1500), 1333, 800),
+ ((3, 400, 400), 1333, 800),
+ ((3, 1500, 1500), 1333, 800),
+ ((3, 800, 1333), 1333, 800),
+ ((3, 1333, 800), 1333, 800),
+ ((3, 800, 800), 400, 400),
+ ]
+ )
+ def test_resize_max_size_respected(self, image_size, longest_edge, shortest_edge):
image_processor = self.image_processing_class(**self.image_processor_dict)
# create torch tensors as image
- image = torch.randint(0, 256, (3, 100, 1500), dtype=torch.uint8)
+ image = torch.randint(0, 256, image_size, dtype=torch.uint8)
processed_image = image_processor(
- image, size={"longest_edge": 1333, "shortest_edge": 800}, do_pad=False, return_tensors="pt"
+ image,
+ size={"longest_edge": longest_edge, "shortest_edge": shortest_edge},
+ do_pad=False,
+ return_tensors="pt",
)["pixel_values"]
- self.assertTrue(processed_image.shape[-1] <= 1333)
- self.assertTrue(processed_image.shape[-2] <= 800)
+ shape = list(processed_image.shape[-2:])
+ max_size, min_size = max(shape), min(shape)
+ self.assertTrue(max_size <= 1333, f"Expected max_size <= 1333, got image shape {shape}")
+ self.assertTrue(min_size <= 800, f"Expected min_size <= 800, got image shape {shape}")
@slow
def test_call_pytorch_with_coco_detection_annotations(self):
diff --git a/tests/optimization/test_optimization.py b/tests/optimization/test_optimization.py
index 0ee8513dacde..6d6707db5a4b 100644
--- a/tests/optimization/test_optimization.py
+++ b/tests/optimization/test_optimization.py
@@ -36,6 +36,7 @@
get_inverse_sqrt_schedule,
get_linear_schedule_with_warmup,
get_polynomial_decay_schedule_with_warmup,
+ get_wsd_schedule,
)
@@ -150,6 +151,10 @@ def test_schedulers(self):
{"num_warmup_steps": 2},
[0.0, 5.0, 10.0, 8.165, 7.071, 6.325, 5.774, 5.345, 5.0, 4.714],
),
+ get_wsd_schedule: (
+ {"num_warmup_steps": 2, "num_stable_steps": 2, "num_decay_steps": 3, "min_lr_ratio": 0.1},
+ [0.0, 5.0, 10.0, 10.0, 10.0, 7.75, 3.25, 1.0, 1.0, 1.0],
+ ),
}
for scheduler_func, data in scheds.items():
diff --git a/tests/pipelines/test_pipelines_automatic_speech_recognition.py b/tests/pipelines/test_pipelines_automatic_speech_recognition.py
index ddf901180893..a1ab2947830b 100644
--- a/tests/pipelines/test_pipelines_automatic_speech_recognition.py
+++ b/tests/pipelines/test_pipelines_automatic_speech_recognition.py
@@ -755,6 +755,94 @@ def test_whisper_timestamp_prediction(self):
},
)
+ @slow
+ @require_torch
+ def test_whisper_large_timestamp_prediction(self):
+ ds = load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean", split="validation").sort("id")
+ array = np.concatenate(
+ [ds[40]["audio"]["array"], ds[41]["audio"]["array"], ds[42]["audio"]["array"], ds[43]["audio"]["array"]]
+ )
+ pipe = pipeline(model="openai/whisper-large-v3", return_timestamps=True)
+
+ output = pipe(ds[40]["audio"])
+ self.assertDictEqual(
+ output,
+ {
+ "text": " A man said to the universe, Sir, I exist.",
+ "chunks": [{"text": " A man said to the universe, Sir, I exist.", "timestamp": (0.0, 4.08)}],
+ },
+ )
+
+ output = pipe(array, chunk_length_s=10)
+
+ self.assertDictEqual(
+ nested_simplify(output),
+ {
+ "chunks": [
+ {"timestamp": (0.0, 2.0), "text": (" A man said to the universe,")},
+ {"timestamp": (2.0, 4.1), "text": (" Sir, I exist.")},
+ {"timestamp": (5.14, 5.96), "text": (" Sweat covered")},
+ {"timestamp": (5.96, 8.02), "text": (" Breon's body, trickling into")},
+ {"timestamp": (8.02, 10.67), "text": (" the tight loincloth that was the only garment he wore,")},
+ {"timestamp": (10.67, 13.67), "text": (" the cut on his chest still dripping blood,")},
+ {"timestamp": (13.67, 17.61), "text": (" the ache of his overstrained eyes.")},
+ {
+ "timestamp": (17.61, 24.0),
+ "text": (
+ " Even the soaring arena around him with thousands of spectators were trivialities not worth thinking about."
+ ),
+ },
+ {
+ "timestamp": (24.0, 29.94),
+ "text": (" His instant of panic was followed by a small, sharp blow high on his chest."),
+ },
+ ],
+ "text": (
+ " A man said to the universe, Sir, I exist. Sweat covered Breon's"
+ " body, trickling into the tight loincloth that was the only garment"
+ " he wore, the cut on his chest still dripping blood, the ache of his"
+ " overstrained eyes. Even the soaring arena around him with thousands"
+ " of spectators were trivialities not worth thinking about. His "
+ "instant of panic was followed by a small, sharp blow high on his chest."
+ ),
+ },
+ )
+
+ output = pipe(array)
+ self.assertDictEqual(
+ output,
+ {
+ "chunks": [
+ {"timestamp": (0.0, 1.96), "text": " A man said to the universe,"},
+ {"timestamp": (2.7, 4.1), "text": " Sir, I exist."},
+ {"timestamp": (5.14, 6.84), "text": " Sweat covered Brion's body,"},
+ {
+ "timestamp": (7.4, 10.68),
+ "text": " trickling into the tight loincloth that was the only garment he wore,",
+ },
+ {"timestamp": (11.6, 13.94), "text": " the cut on his chest still dripping blood,"},
+ {"timestamp": (14.78, 16.72), "text": " the ache of his overstrained eyes,"},
+ {
+ "timestamp": (17.32, 21.16),
+ "text": " even the soaring arena around him with the thousands of spectators",
+ },
+ {"timestamp": (21.16, 23.94), "text": " were trivialities not worth thinking about."},
+ {
+ "timestamp": (24.42, 29.94),
+ "text": " His instant panic was followed by a small sharp blow high on his chest.",
+ },
+ ],
+ "text": (
+ " A man said to the universe, Sir, I exist. Sweat covered Brion's body,"
+ " trickling into the tight loincloth that was the only garment he wore, "
+ "the cut on his chest still dripping blood, the ache of his overstrained "
+ "eyes, even the soaring arena around him with the thousands of spectators "
+ "were trivialities not worth thinking about. His instant panic was followed "
+ "by a small sharp blow high on his chest."
+ ),
+ },
+ )
+
@slow
@require_torch
def test_whisper_word_timestamps_batched(self):
@@ -799,6 +887,49 @@ def test_whisper_word_timestamps_batched(self):
output = pipe(sample, batch_size=2)
self.assertDictEqual(output, EXPECTED_OUTPUT)
+ @slow
+ @require_torch
+ def test_whisper_large_word_timestamps_batched(self):
+ pipe = pipeline(
+ task="automatic-speech-recognition",
+ model="openai/whisper-large-v3",
+ return_timestamps="word",
+ )
+ data = load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean", split="validation")
+ sample = data[0]["audio"]
+
+ # not the same output as test_simple_whisper_asr because of chunking
+ EXPECTED_OUTPUT = {
+ "text": " Mr. Quilter is the apostle of the middle classes, and we are glad to welcome his gospel.",
+ "chunks": [
+ {"text": " Mr.", "timestamp": (0.0, 0.74)},
+ {"text": " Quilter", "timestamp": (0.74, 1.04)},
+ {"text": " is", "timestamp": (1.04, 1.3)},
+ {"text": " the", "timestamp": (1.3, 1.44)},
+ {"text": " apostle", "timestamp": (1.44, 1.74)},
+ {"text": " of", "timestamp": (1.74, 2.18)},
+ {"text": " the", "timestamp": (2.18, 2.28)},
+ {"text": " middle", "timestamp": (2.28, 2.5)},
+ {"text": " classes,", "timestamp": (2.5, 3.0)},
+ {"text": " and", "timestamp": (3.0, 3.4)},
+ {"text": " we", "timestamp": (3.4, 3.5)},
+ {"text": " are", "timestamp": (3.5, 3.6)},
+ {"text": " glad", "timestamp": (3.6, 3.84)},
+ {"text": " to", "timestamp": (3.84, 4.1)},
+ {"text": " welcome", "timestamp": (4.1, 4.4)},
+ {"text": " his", "timestamp": (4.4, 4.7)},
+ {"text": " gospel.", "timestamp": (4.7, 5.34)},
+ ],
+ }
+
+ # batch size 1: copy the audio sample since pipeline consumes it
+ output = pipe(sample.copy(), batch_size=1)
+ self.assertDictEqual(output, EXPECTED_OUTPUT)
+
+ # batch size 2: input audio is chunked into smaller pieces so it's testing batching
+ output = pipe(sample, batch_size=2)
+ self.assertDictEqual(output, EXPECTED_OUTPUT)
+
@require_torch
@slow
def test_torch_speech_encoder_decoder(self):
diff --git a/tests/pipelines/test_pipelines_common.py b/tests/pipelines/test_pipelines_common.py
index 7b7301d6d8cd..763c7d1a8833 100644
--- a/tests/pipelines/test_pipelines_common.py
+++ b/tests/pipelines/test_pipelines_common.py
@@ -48,6 +48,7 @@
require_tf,
require_torch,
require_torch_accelerator,
+ require_torch_multi_accelerator,
require_torch_or_tf,
slow,
torch_device,
@@ -519,6 +520,52 @@ def test_pipeline_negative_device(self):
actual_output = classifier("Test input.")
self.assertEqual(expected_output, actual_output)
+ @require_torch_accelerator
+ def test_pipeline_no_device(self):
+ # Test when no device is passed to pipeline
+ import torch
+
+ from transformers import AutoModelForCausalLM
+
+ tokenizer = AutoTokenizer.from_pretrained("hf-internal-testing/tiny-random-bert")
+ # Case 1: Model is manually moved to device
+ model = AutoModelForCausalLM.from_pretrained(
+ "hf-internal-testing/tiny-random-bert", torch_dtype=torch.float16
+ ).to(torch_device)
+ model_device = model.device
+ pipe = pipeline("text-generation", model=model, tokenizer=tokenizer)
+ self.assertEqual(pipe.model.device, model_device)
+ # Case 2: Model is loaded by accelerate
+ model = AutoModelForCausalLM.from_pretrained(
+ "hf-internal-testing/tiny-random-bert", device_map=torch_device, torch_dtype=torch.float16
+ )
+ model_device = model.device
+ pipe = pipeline("text-generation", model=model, tokenizer=tokenizer)
+ self.assertEqual(pipe.model.device, model_device)
+ # Case 3: device_map is passed to model and device is passed to pipeline
+ model = AutoModelForCausalLM.from_pretrained(
+ "hf-internal-testing/tiny-random-bert", device_map=torch_device, torch_dtype=torch.float16
+ )
+ with self.assertRaises(ValueError):
+ pipe = pipeline("text-generation", model=model, device="cpu", tokenizer=tokenizer)
+
+ @require_torch_multi_accelerator
+ def test_pipeline_device_not_equal_model_device(self):
+ # Test when device ids are different, pipeline should move the model to the passed device id
+ import torch
+
+ from transformers import AutoModelForCausalLM
+
+ tokenizer = AutoTokenizer.from_pretrained("hf-internal-testing/tiny-random-bert")
+ model_device = f"{torch_device}:1"
+ model = AutoModelForCausalLM.from_pretrained(
+ "hf-internal-testing/tiny-random-bert", torch_dtype=torch.float16
+ ).to(model_device)
+ target_device = f"{torch_device}:0"
+ self.assertNotEqual(model_device, target_device)
+ pipe = pipeline("text-generation", model=model, device=target_device, tokenizer=tokenizer)
+ self.assertEqual(pipe.model.device, torch.device(target_device))
+
@slow
@require_torch
def test_load_default_pipelines_pt(self):
@@ -541,11 +588,10 @@ def test_load_default_pipelines_pt(self):
@slow
@require_tf
def test_load_default_pipelines_tf(self):
- import tensorflow as tf
-
+ from transformers.modeling_tf_utils import keras
from transformers.pipelines import SUPPORTED_TASKS
- set_seed_fn = lambda: tf.random.set_seed(0) # noqa: E731
+ set_seed_fn = lambda: keras.utils.set_random_seed(0) # noqa: E731
for task in SUPPORTED_TASKS.keys():
if task == "table-question-answering":
# test table in seperate test due to more dependencies
@@ -553,7 +599,7 @@ def test_load_default_pipelines_tf(self):
self.check_default_pipeline(task, "tf", set_seed_fn, self.check_models_equal_tf)
- # clean-up as much as possible GPU memory occupied by PyTorch
+ # clean-up as much as possible GPU memory occupied by TF
gc.collect()
@slow
diff --git a/tests/pipelines/test_pipelines_text_generation.py b/tests/pipelines/test_pipelines_text_generation.py
index ada04c7dbeda..542f393b2025 100644
--- a/tests/pipelines/test_pipelines_text_generation.py
+++ b/tests/pipelines/test_pipelines_text_generation.py
@@ -177,6 +177,48 @@ def test_small_chat_model_pt(self):
],
)
+ @require_torch
+ def test_small_chat_model_with_dataset_pt(self):
+ from torch.utils.data import Dataset
+
+ from transformers.pipelines.pt_utils import KeyDataset
+
+ class MyDataset(Dataset):
+ data = [
+ [
+ {"role": "system", "content": "This is a system message."},
+ {"role": "user", "content": "This is a test"},
+ {"role": "assistant", "content": "This is a reply"},
+ ],
+ ]
+
+ def __len__(self):
+ return 1
+
+ def __getitem__(self, i):
+ return {"text": self.data[i]}
+
+ text_generator = pipeline(
+ task="text-generation", model="rocketknight1/tiny-gpt2-with-chatml-template", framework="pt"
+ )
+
+ dataset = MyDataset()
+ key_dataset = KeyDataset(dataset, "text")
+
+ for outputs in text_generator(key_dataset, do_sample=False, max_new_tokens=10):
+ expected_chat = dataset.data[0] + [
+ {
+ "role": "assistant",
+ "content": " factors factors factors factors factors factors factors factors factors factors",
+ }
+ ]
+ self.assertEqual(
+ outputs,
+ [
+ {"generated_text": expected_chat},
+ ],
+ )
+
@require_tf
def test_small_model_tf(self):
text_generator = pipeline(task="text-generation", model="sshleifer/tiny-ctrl", framework="tf")
diff --git a/tests/quantization/aqlm_integration/test_aqlm.py b/tests/quantization/aqlm_integration/test_aqlm.py
index 46b64573b938..3b0dd99adcd9 100644
--- a/tests/quantization/aqlm_integration/test_aqlm.py
+++ b/tests/quantization/aqlm_integration/test_aqlm.py
@@ -196,9 +196,14 @@ def test_quantized_model_compile(self):
"""
# Sample tokens greedily
- def decode_one_tokens(model, cur_token, input_pos, cache_position):
+ def decode_one_tokens(model, cur_token, input_pos, cache_position, past_key_values):
logits = model(
- cur_token, position_ids=input_pos, cache_position=cache_position, return_dict=False, use_cache=True
+ cur_token,
+ position_ids=input_pos,
+ cache_position=cache_position,
+ past_key_values=past_key_values,
+ return_dict=False,
+ use_cache=True,
)[0]
new_token = torch.argmax(logits[:, [-1]], dim=-1).to(torch.int)
@@ -209,7 +214,13 @@ def decode_one_tokens(model, cur_token, input_pos, cache_position):
seq_length = input_ids.shape[1]
# Setup static KV cache for generation
- self.quantized_model._setup_cache(StaticCache, 1, max_cache_len=seq_length + self.max_new_tokens + 1)
+ past_key_values = StaticCache(
+ config=self.quantized_model.config,
+ max_batch_size=1,
+ max_cache_len=seq_length + self.max_new_tokens + 1,
+ device=torch_device,
+ dtype=self.quantized_model.config._pre_quantization_dtype,
+ )
# Allocate token ids to be generated and copy prefix ids
cache_position = torch.arange(seq_length, device=torch_device)
@@ -217,7 +228,13 @@ def decode_one_tokens(model, cur_token, input_pos, cache_position):
generated_ids[:, cache_position] = input_ids.to(torch_device).to(torch.int)
# Do a forward pass to fill the prefix cache and compile the kernels if necessary
- logits = self.quantized_model(input_ids, cache_position=cache_position, return_dict=False, use_cache=True)[0]
+ logits = self.quantized_model(
+ input_ids,
+ cache_position=cache_position,
+ past_key_values=past_key_values,
+ return_dict=False,
+ use_cache=True,
+ )[0]
next_token = torch.argmax(logits[:, [-1]], dim=-1).to(torch.int)
generated_ids[:, [seq_length]] = next_token
@@ -229,7 +246,9 @@ def decode_one_tokens(model, cur_token, input_pos, cache_position):
cache_position = torch.tensor([seq_length + 1], device=torch_device)
for _ in range(1, self.max_new_tokens):
with torch.backends.cuda.sdp_kernel(enable_flash=False, enable_mem_efficient=False, enable_math=True):
- next_token = decode_one_tokens(self.quantized_model, next_token.clone(), None, cache_position)
+ next_token = decode_one_tokens(
+ self.quantized_model, next_token.clone(), None, cache_position, past_key_values
+ )
generated_ids.index_copy_(1, cache_position, next_token)
cache_position += 1
diff --git a/tests/quantization/autoawq/test_awq.py b/tests/quantization/autoawq/test_awq.py
index e2369f07b231..20ecd783cf04 100644
--- a/tests/quantization/autoawq/test_awq.py
+++ b/tests/quantization/autoawq/test_awq.py
@@ -471,3 +471,22 @@ def test_generation_mixtral_fused(self):
outputs = model.generate(**inputs, max_new_tokens=12)
self.assertEqual(tokenizer.decode(outputs[0], skip_special_tokens=True), self.EXPECTED_GENERATION_MIXTRAL)
+
+
+@slow
+@require_torch_gpu
+@require_auto_awq
+@require_accelerate
+class AwqScaleTest(unittest.TestCase):
+ model_name = "TechxGenus/starcoder2-3b-AWQ"
+
+ def test_load_quantized_model(self):
+ from awq.modules.act import ScaledActivation
+
+ """
+ Simple test that checks if the scales have been replaced in the quantized model
+ """
+ quantized_model = AutoModelForCausalLM.from_pretrained(
+ "TechxGenus/starcoder2-3b-AWQ", torch_dtype=torch.float16, device_map="cuda"
+ )
+ self.assertTrue(isinstance(quantized_model.model.layers[0].mlp.act, ScaledActivation))
diff --git a/tests/quantization/eetq_integration/__init__.py b/tests/quantization/eetq_integration/__init__.py
new file mode 100644
index 000000000000..e69de29bb2d1
diff --git a/tests/quantization/eetq_integration/test_eetq.py b/tests/quantization/eetq_integration/test_eetq.py
new file mode 100644
index 000000000000..2c01f8145cba
--- /dev/null
+++ b/tests/quantization/eetq_integration/test_eetq.py
@@ -0,0 +1,171 @@
+# coding=utf-8
+# Copyright 2024 The HuggingFace Team. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import gc
+import tempfile
+import unittest
+
+from transformers import AutoConfig, AutoModelForCausalLM, AutoTokenizer, EetqConfig, OPTForCausalLM
+from transformers.testing_utils import (
+ require_accelerate,
+ require_eetq,
+ require_torch_gpu,
+ require_torch_multi_gpu,
+ slow,
+ torch_device,
+)
+from transformers.utils import is_accelerate_available, is_torch_available
+
+
+if is_torch_available():
+ import torch
+
+if is_accelerate_available():
+ from accelerate import init_empty_weights
+
+
+@require_torch_gpu
+class EetqConfigTest(unittest.TestCase):
+ def test_to_dict(self):
+ """
+ Simple test that checks if one uses a config and converts it to a dict, the dict is the same as the config object
+ """
+ quantization_config = EetqConfig()
+ config_to_dict = quantization_config.to_dict()
+
+ for key in config_to_dict:
+ self.assertEqual(getattr(quantization_config, key), config_to_dict[key])
+
+ def test_from_dict(self):
+ """
+ Simple test that checks if one uses a dict and converts it to a config object, the config object is the same as the dict
+ """
+ dict = {"modules_to_not_convert": ["lm_head.weight"], "quant_method": "eetq", "weights": "int8"}
+ quantization_config = EetqConfig.from_dict(dict)
+
+ self.assertEqual(dict["modules_to_not_convert"], quantization_config.modules_to_not_convert)
+ self.assertEqual(dict["quant_method"], quantization_config.quant_method)
+ self.assertEqual(dict["weights"], quantization_config.weights)
+
+
+@slow
+@require_torch_gpu
+@require_eetq
+@require_accelerate
+class EetqTest(unittest.TestCase):
+ model_name = "facebook/opt-350m"
+
+ input_text = "What are we having for dinner?"
+ max_new_tokens = 9
+
+ EXPECTED_OUTPUT = "What are we having for dinner?\nI'm having a steak and a salad"
+
+ device_map = "cuda"
+
+ # called only once for all test in this class
+ @classmethod
+ def setUpClass(cls):
+ """
+ Setup quantized model
+ """
+ quantization_config = EetqConfig(weights="int8")
+ cls.tokenizer = AutoTokenizer.from_pretrained(cls.model_name)
+ cls.quantized_model = AutoModelForCausalLM.from_pretrained(
+ cls.model_name, device_map=cls.device_map, quantization_config=quantization_config
+ )
+
+ def tearDown(self):
+ gc.collect()
+ torch.cuda.empty_cache()
+ gc.collect()
+
+ def test_quantized_model_conversion(self):
+ """
+ Simple test that checks if the quantized model has been converted properly
+ """
+ from eetq import EetqLinear
+
+ from transformers.integrations import replace_with_eetq_linear
+
+ model_id = "facebook/opt-350m"
+ config = AutoConfig.from_pretrained(model_id, revision="cb32f77e905cccbca1d970436fb0f5e6b58ee3c5")
+ quantization_config = EetqConfig(weights="int8")
+
+ with init_empty_weights():
+ model = OPTForCausalLM(config)
+
+ nb_linears = 0
+ for module in model.modules():
+ if isinstance(module, torch.nn.Linear):
+ nb_linears += 1
+
+ model = replace_with_eetq_linear(model, quantization_config=quantization_config)
+ nb_eetq_linear = 0
+ for module in model.modules():
+ if isinstance(module, EetqLinear):
+ nb_eetq_linear += 1
+
+ self.assertEqual(nb_linears - 1, nb_eetq_linear)
+
+ # Try with `linear_weights_not_to_quantize`
+ with init_empty_weights():
+ model = OPTForCausalLM(config)
+ quantization_config = EetqConfig(modules_to_not_convert=["fc1"])
+ model = replace_with_eetq_linear(model, quantization_config=quantization_config)
+ nb_eetq_linear = 0
+ for module in model.modules():
+ if isinstance(module, EetqLinear):
+ nb_eetq_linear += 1
+
+ self.assertEqual(nb_linears - 25, nb_eetq_linear)
+
+ def test_quantized_model(self):
+ """
+ Simple test that checks if the quantized model is working properly
+ """
+ input_ids = self.tokenizer(self.input_text, return_tensors="pt").to(torch_device)
+
+ output = self.quantized_model.generate(**input_ids, max_new_tokens=self.max_new_tokens)
+ self.assertEqual(self.tokenizer.decode(output[0], skip_special_tokens=True), self.EXPECTED_OUTPUT)
+
+ def test_save_pretrained(self):
+ """
+ Simple test that checks if the quantized model is working properly after being saved and loaded
+ """
+ with tempfile.TemporaryDirectory() as tmpdirname:
+ self.quantized_model.save_pretrained(tmpdirname)
+
+ model = AutoModelForCausalLM.from_pretrained(tmpdirname, device_map=self.device_map)
+
+ input_ids = self.tokenizer(self.input_text, return_tensors="pt").to(torch_device)
+
+ output = model.generate(**input_ids, max_new_tokens=self.max_new_tokens)
+ self.assertEqual(self.tokenizer.decode(output[0], skip_special_tokens=True), self.EXPECTED_OUTPUT)
+
+ @require_torch_multi_gpu
+ def test_quantized_model_multi_gpu(self):
+ """
+ Simple test that checks if the quantized model is working properly with multiple GPUs
+ set CUDA_VISIBLE_DEVICES=0,1 if you have more than 2 GPUS
+ """
+ input_ids = self.tokenizer(self.input_text, return_tensors="pt").to(torch_device)
+ quantization_config = EetqConfig()
+ quantized_model = AutoModelForCausalLM.from_pretrained(
+ self.model_name, device_map="auto", quantization_config=quantization_config
+ )
+ self.assertTrue(set(quantized_model.hf_device_map.values()) == {0, 1})
+
+ output = quantized_model.generate(**input_ids, max_new_tokens=self.max_new_tokens)
+ self.assertEqual(self.tokenizer.decode(output[0], skip_special_tokens=True), self.EXPECTED_OUTPUT)
diff --git a/tests/quantization/ggml/__init__.py b/tests/quantization/ggml/__init__.py
new file mode 100644
index 000000000000..e69de29bb2d1
diff --git a/tests/quantization/ggml/test_ggml.py b/tests/quantization/ggml/test_ggml.py
new file mode 100644
index 000000000000..09a1ea51d227
--- /dev/null
+++ b/tests/quantization/ggml/test_ggml.py
@@ -0,0 +1,215 @@
+# coding=utf-8
+# Copyright 2024 The HuggingFace Team. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import tempfile
+import unittest
+
+from transformers import AddedToken, AutoModelForCausalLM, AutoTokenizer
+from transformers.testing_utils import require_gguf, require_torch_gpu, slow, torch_device
+from transformers.utils import is_torch_available
+
+
+if is_torch_available():
+ import torch
+
+
+@require_gguf
+@require_torch_gpu
+@slow
+class GgufIntegrationTests(unittest.TestCase):
+ original_model_id = "TinyLlama/TinyLlama-1.1B-Chat-v1.0"
+ model_id = "TheBloke/TinyLlama-1.1B-Chat-v1.0-GGUF"
+ mistral_model_id = "TheBloke/Mistral-7B-Instruct-v0.2-GGUF"
+
+ q4_0_gguf_model_id = "tinyllama-1.1b-chat-v1.0.Q4_0.gguf"
+ q4_k_gguf_model_id = "tinyllama-1.1b-chat-v1.0.Q4_K_M.gguf"
+ q2_k_gguf_model_id = "tinyllama-1.1b-chat-v1.0.Q2_K.gguf"
+ q3_k_gguf_model_id = "tinyllama-1.1b-chat-v1.0.Q3_K_L.gguf"
+ q5_k_gguf_model_id = "tinyllama-1.1b-chat-v1.0.Q5_K_M.gguf"
+ q6_k_gguf_model_id = "tinyllama-1.1b-chat-v1.0.Q6_K.gguf"
+ q8_0_gguf_model_id = "tinyllama-1.1b-chat-v1.0.Q8_0.gguf"
+
+ q4_0_mistral_model_id = "mistral-7b-instruct-v0.2.Q4_0.gguf"
+
+ example_text = "Hello"
+
+ def test_q2_k(self):
+ tokenizer = AutoTokenizer.from_pretrained(self.model_id, gguf_file=self.q2_k_gguf_model_id)
+ model = AutoModelForCausalLM.from_pretrained(self.model_id, gguf_file=self.q2_k_gguf_model_id).to(torch_device)
+
+ text = tokenizer(self.example_text, return_tensors="pt").to(torch_device)
+ out = model.generate(**text, max_new_tokens=10)
+
+ EXPECTED_TEXT = "Hello, World!\n\n[10:0"
+ self.assertEqual(tokenizer.decode(out[0], skip_special_tokens=True), EXPECTED_TEXT)
+
+ def test_q2_k_serialization(self):
+ tokenizer = AutoTokenizer.from_pretrained(self.model_id, gguf_file=self.q2_k_gguf_model_id)
+ model = AutoModelForCausalLM.from_pretrained(self.model_id, gguf_file=self.q2_k_gguf_model_id).to(torch_device)
+
+ with tempfile.TemporaryDirectory() as tmpdirname:
+ model.save_pretrained(tmpdirname)
+ tokenizer.save_pretrained(tmpdirname)
+
+ model = AutoModelForCausalLM.from_pretrained(tmpdirname).to(torch_device)
+ tokenizer = AutoTokenizer.from_pretrained(tmpdirname)
+
+ text = tokenizer(self.example_text, return_tensors="pt").to(torch_device)
+ out = model.generate(**text, max_new_tokens=10)
+
+ EXPECTED_TEXT = "Hello, World!\n\n[10:0"
+ self.assertEqual(tokenizer.decode(out[0], skip_special_tokens=True), EXPECTED_TEXT)
+
+ def test_q3_k(self):
+ tokenizer = AutoTokenizer.from_pretrained(self.model_id, gguf_file=self.q3_k_gguf_model_id)
+ model = AutoModelForCausalLM.from_pretrained(self.model_id, gguf_file=self.q3_k_gguf_model_id).to(torch_device)
+
+ text = tokenizer(self.example_text, return_tensors="pt").to(torch_device)
+ out = model.generate(**text, max_new_tokens=10)
+
+ EXPECTED_TEXT = "Hello, World!\n\n```\n<|user"
+ self.assertEqual(tokenizer.decode(out[0], skip_special_tokens=True), EXPECTED_TEXT)
+
+ def test_q5_k(self):
+ tokenizer = AutoTokenizer.from_pretrained(self.model_id, gguf_file=self.q5_k_gguf_model_id)
+ model = AutoModelForCausalLM.from_pretrained(self.model_id, gguf_file=self.q5_k_gguf_model_id).to(torch_device)
+
+ text = tokenizer(self.example_text, return_tensors="pt").to(torch_device)
+ out = model.generate(**text, max_new_tokens=10)
+
+ EXPECTED_TEXT = "Hello, World!\n\nStep 3: Add"
+ self.assertEqual(tokenizer.decode(out[0], skip_special_tokens=True), EXPECTED_TEXT)
+
+ def test_q4_0(self):
+ tokenizer = AutoTokenizer.from_pretrained(self.model_id, gguf_file=self.q4_0_gguf_model_id)
+ model = AutoModelForCausalLM.from_pretrained(self.model_id, gguf_file=self.q4_0_gguf_model_id).to(torch_device)
+
+ text = tokenizer(self.example_text, return_tensors="pt").to(torch_device)
+ out = model.generate(**text, max_new_tokens=10)
+
+ EXPECTED_TEXT = "Hello, World!\n\nStep 3: Add"
+ self.assertEqual(tokenizer.decode(out[0], skip_special_tokens=True), EXPECTED_TEXT)
+
+ def test_q4_k_m(self):
+ tokenizer = AutoTokenizer.from_pretrained(self.model_id, gguf_file=self.q4_k_gguf_model_id)
+ model = AutoModelForCausalLM.from_pretrained(self.model_id, gguf_file=self.q4_k_gguf_model_id).to(torch_device)
+
+ text = tokenizer(self.example_text, return_tensors="pt").to(torch_device)
+ out = model.generate(**text, max_new_tokens=10)
+
+ EXPECTED_TEXT = "Hello, World!\n\n5. Python:\n"
+ self.assertEqual(tokenizer.decode(out[0], skip_special_tokens=True), EXPECTED_TEXT)
+
+ def test_q6_k(self):
+ tokenizer = AutoTokenizer.from_pretrained(self.model_id, gguf_file=self.q6_k_gguf_model_id)
+ model = AutoModelForCausalLM.from_pretrained(self.model_id, gguf_file=self.q6_k_gguf_model_id).to(torch_device)
+
+ text = tokenizer(self.example_text, return_tensors="pt").to(torch_device)
+ out = model.generate(**text, max_new_tokens=10)
+
+ EXPECTED_TEXT = "Hello, World!\n\nStep 3: Add"
+ self.assertEqual(tokenizer.decode(out[0], skip_special_tokens=True), EXPECTED_TEXT)
+
+ def test_q6_k_fp16(self):
+ tokenizer = AutoTokenizer.from_pretrained(self.model_id, gguf_file=self.q6_k_gguf_model_id)
+ model = AutoModelForCausalLM.from_pretrained(
+ self.model_id, gguf_file=self.q6_k_gguf_model_id, torch_dtype=torch.float16
+ ).to(torch_device)
+
+ self.assertTrue(model.lm_head.weight.dtype == torch.float16)
+
+ text = tokenizer(self.example_text, return_tensors="pt").to(torch_device)
+ out = model.generate(**text, max_new_tokens=10)
+
+ EXPECTED_TEXT = "Hello, World!\n\nStep 3: Add"
+ self.assertEqual(tokenizer.decode(out[0], skip_special_tokens=True), EXPECTED_TEXT)
+
+ def test_q8_0(self):
+ tokenizer = AutoTokenizer.from_pretrained(self.model_id, gguf_file=self.q8_0_gguf_model_id)
+ model = AutoModelForCausalLM.from_pretrained(self.model_id, gguf_file=self.q8_0_gguf_model_id).to(torch_device)
+
+ text = tokenizer(self.example_text, return_tensors="pt").to(torch_device)
+ out = model.generate(**text, max_new_tokens=10)
+
+ EXPECTED_TEXT = "Hello, World!\n\n5. Use a library"
+ self.assertEqual(tokenizer.decode(out[0], skip_special_tokens=True), EXPECTED_TEXT)
+
+ def test_mistral_q4_0(self):
+ tokenizer = AutoTokenizer.from_pretrained(self.mistral_model_id, gguf_file=self.q4_0_mistral_model_id)
+ model = AutoModelForCausalLM.from_pretrained(
+ self.mistral_model_id, gguf_file=self.q4_0_mistral_model_id, device_map="auto", torch_dtype=torch.float16
+ )
+
+ text = tokenizer(self.example_text, return_tensors="pt").to(torch_device)
+ out = model.generate(**text, max_new_tokens=10)
+
+ EXPECTED_TEXT = "Hello,\n\nI'm trying to create a"
+ self.assertEqual(tokenizer.decode(out[0], skip_special_tokens=True), EXPECTED_TEXT)
+
+ def test_tokenization_xnli(self):
+ import tqdm
+ from datasets import load_dataset
+
+ gguf_tokenizer = AutoTokenizer.from_pretrained(self.model_id, gguf_file=self.q8_0_gguf_model_id)
+ original_tokenizer = AutoTokenizer.from_pretrained(self.original_model_id)
+
+ dataset = load_dataset("code_x_glue_ct_code_to_text", "go")
+ for item in tqdm.tqdm(dataset["validation"]):
+ string = item["code"]
+ encoded1 = gguf_tokenizer.encode(string)
+ encoded2 = original_tokenizer.encode(string)
+
+ self.assertEqual(encoded1, encoded2)
+
+ decoded1 = gguf_tokenizer.decode(encoded1, skip_special_tokens=True)
+ decoded2 = original_tokenizer.decode(encoded2, skip_special_tokens=True)
+
+ self.assertEqual(decoded1, decoded2)
+
+ dataset = load_dataset("xnli", "all_languages")
+
+ for i, item in enumerate(tqdm.tqdm(dataset["train"].select(range(100)))):
+ for string in item["premise"].values():
+ encoded1 = gguf_tokenizer.encode(string)
+ encoded2 = original_tokenizer.encode(string)
+
+ self.assertEqual(encoded1, encoded2)
+
+ decoded1 = gguf_tokenizer.decode(encoded1, skip_special_tokens=True)
+ decoded2 = original_tokenizer.decode(encoded2, skip_special_tokens=True)
+
+ self.assertEqual(decoded1, decoded2)
+
+ # With special tokens
+ gguf_tokenizer = AutoTokenizer.from_pretrained(self.model_id, gguf_file=self.q8_0_gguf_model_id)
+ original_tokenizer = AutoTokenizer.from_pretrained(self.original_model_id)
+
+ gguf_tokenizer.add_special_tokens(
+ {"additional_special_tokens": [AddedToken("", rstrip=False, lstrip=False)]}
+ )
+ original_tokenizer.add_special_tokens(
+ {"additional_special_tokens": [AddedToken("", rstrip=False, lstrip=False)]}
+ )
+
+ text = "Hello . Hello"
+
+ encoded1 = gguf_tokenizer.encode(text)
+ encoded2 = original_tokenizer.encode(text)
+
+ self.assertEqual(encoded1, encoded2)
+
+ decoded1 = gguf_tokenizer.decode(encoded1, skip_special_tokens=True)
+ decoded2 = original_tokenizer.decode(encoded2, skip_special_tokens=True)
+
+ self.assertEqual(decoded1, decoded2)
diff --git a/tests/quantization/hqq/test_hqq.py b/tests/quantization/hqq/test_hqq.py
new file mode 100755
index 000000000000..45c64676a7e4
--- /dev/null
+++ b/tests/quantization/hqq/test_hqq.py
@@ -0,0 +1,167 @@
+# coding=utf-8
+# Copyright 2024 The HuggingFace Team. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import gc
+import unittest
+
+from transformers import AutoModelForCausalLM, AutoTokenizer, HqqConfig
+from transformers.testing_utils import (
+ require_accelerate,
+ require_torch_gpu,
+ require_torch_multi_gpu,
+ slow,
+ torch_device,
+)
+from transformers.utils import is_hqq_available, is_torch_available
+
+
+if is_torch_available():
+ import torch
+
+if is_hqq_available():
+ from hqq.core.quantize import HQQBackend, HQQLinear
+
+
+class HQQLLMRunner:
+ def __init__(self, model_id, quant_config, compute_dtype, device, cache_dir=None):
+ self.model = AutoModelForCausalLM.from_pretrained(
+ model_id,
+ torch_dtype=compute_dtype,
+ device_map=device,
+ quantization_config=quant_config,
+ low_cpu_mem_usage=True,
+ cache_dir=cache_dir,
+ )
+ self.tokenizer = AutoTokenizer.from_pretrained(model_id, cache_dir=cache_dir)
+ self.device = self.model.device
+ HQQLinear.set_backend(HQQBackend.PYTORCH)
+
+
+def cleanup():
+ torch.cuda.empty_cache()
+ gc.collect()
+
+
+def check_hqqlayer(test_module, hqq_layer, batch_size=1, context_size=1024):
+ # Test HQQ layer
+ W_dequant = hqq_layer.dequantize() # Reconstructed weights
+ inputs = (
+ torch.randn(
+ (batch_size, context_size, hqq_layer.meta["shape"][1]),
+ device=hqq_layer.device,
+ dtype=hqq_layer.compute_dtype,
+ )
+ / 10.0
+ )
+ with torch.no_grad():
+ outputs = hqq_layer(inputs)
+ test_module.assertEqual(outputs.shape[-1], W_dequant.shape[0])
+ test_module.assertEqual(outputs.dtype, hqq_layer.compute_dtype)
+ del W_dequant, inputs, outputs
+ cleanup()
+
+
+def check_forward(test_module, model, batch_size=1, context_size=1024):
+ # Test forward pass
+ with torch.no_grad():
+ out = model(torch.zeros([batch_size, context_size], device=model.device, dtype=torch.int32)).logits
+ test_module.assertEqual(out.shape[0], batch_size)
+ test_module.assertEqual(out.shape[1], context_size)
+ cleanup()
+
+
+MODEL_ID = "TinyLlama/TinyLlama-1.1B-Chat-v1.0"
+
+
+@require_torch_gpu
+class HqqConfigTest(unittest.TestCase):
+ def test_to_dict(self):
+ """
+ Makes sure the config format is properly set
+ """
+ quantization_config = HqqConfig()
+ hqq_orig_config = quantization_config.to_dict()
+
+ for key in hqq_orig_config:
+ self.assertEqual(quantization_config.quant_config[key], hqq_orig_config[key])
+
+
+@slow
+@require_torch_gpu
+@require_accelerate
+class HQQTest(unittest.TestCase):
+ def tearDown(self):
+ cleanup()
+
+ def test_fp16_quantized_model(self):
+ """
+ Simple LLM model testing fp16
+ """
+ quant_config = HqqConfig(nbits=8, group_size=64, quant_zero=False, quant_scale=False, axis=0)
+
+ hqq_runner = HQQLLMRunner(
+ model_id=MODEL_ID, quant_config=quant_config, compute_dtype=torch.float16, device=torch_device
+ )
+
+ check_hqqlayer(self, hqq_runner.model.model.layers[0].self_attn.v_proj)
+ check_forward(self, hqq_runner.model)
+
+ def test_f16_quantized_model_with_offloading(self):
+ """
+ Simple LLM model testing bfp16 with meta-data offloading
+ """
+ q4_config = {"nbits": 4, "group_size": 64, "quant_zero": False, "quant_scale": False}
+ q3_config = {"nbits": 3, "group_size": 32, "quant_zero": False, "quant_scale": False, "offload_meta": True}
+ quant_config = HqqConfig(
+ dynamic_config={
+ "self_attn.q_proj": q4_config,
+ "self_attn.k_proj": q4_config,
+ "self_attn.v_proj": q4_config,
+ "self_attn.o_proj": q4_config,
+ "mlp.gate_proj": q3_config,
+ "mlp.up_proj": q3_config,
+ "mlp.down_proj": q3_config,
+ }
+ )
+
+ hqq_runner = HQQLLMRunner(
+ model_id=MODEL_ID, quant_config=quant_config, compute_dtype=torch.float16, device=torch_device
+ )
+
+ check_hqqlayer(self, hqq_runner.model.model.layers[0].self_attn.v_proj)
+ check_forward(self, hqq_runner.model)
+
+
+@slow
+@require_torch_gpu
+@require_torch_multi_gpu
+@require_accelerate
+class HQQTestMultiGPU(unittest.TestCase):
+ def tearDown(self):
+ cleanup()
+
+ def test_fp16_quantized_model_multipgpu(self):
+ """
+ Simple LLM model testing fp16 with multi-gpu
+ """
+
+ quant_config = HqqConfig(nbits=8, group_size=64, quant_zero=False, quant_scale=False, axis=0)
+
+ hqq_runner = HQQLLMRunner(
+ model_id=MODEL_ID, quant_config=quant_config, compute_dtype=torch.float16, device="auto"
+ )
+
+ check_hqqlayer(self, hqq_runner.model.model.layers[0].self_attn.v_proj)
+ check_forward(self, hqq_runner.model)
diff --git a/tests/test_modeling_common.py b/tests/test_modeling_common.py
index f1e9c7f2d16c..cd46934b5fcf 100755
--- a/tests/test_modeling_common.py
+++ b/tests/test_modeling_common.py
@@ -18,10 +18,10 @@
import inspect
import os
import os.path
-import pickle
import random
import re
import tempfile
+import unittest
import warnings
from collections import defaultdict
from typing import Dict, List, Tuple
@@ -438,6 +438,91 @@ class CopyClass(model_class):
max_diff = (model_slow_init.state_dict()[key] - model_fast_init.state_dict()[key]).sum().item()
self.assertLessEqual(max_diff, 1e-3, msg=f"{key} not identical")
+ @slow
+ @require_accelerate
+ @mark.accelerate_tests
+ @unittest.skip("Need to fix since we have a device mismatch")
+ def test_save_load_low_cpu_mem_usage(self):
+ config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common()
+ with tempfile.TemporaryDirectory() as saved_model_path:
+ for model_class in self.all_model_classes:
+ model_to_save = model_class(config)
+ model_to_save.save_pretrained(saved_model_path)
+
+ self._check_save_load_low_cpu_mem_usage(model_class, saved_model_path)
+
+ @slow
+ @require_accelerate
+ @mark.accelerate_tests
+ @unittest.skip("Need to fix since we have a device mismatch")
+ def test_save_load_low_cpu_mem_usage_checkpoints(self):
+ config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common()
+ with tempfile.TemporaryDirectory() as saved_model_path:
+ for model_class in self.all_model_classes:
+ model_to_save = model_class(config)
+ model_to_save.config.save_pretrained(saved_model_path)
+ torch.save(model_to_save.state_dict(), os.path.join(saved_model_path, "pytorch_model.bin"))
+
+ self._check_save_load_low_cpu_mem_usage(model_class, saved_model_path)
+
+ @slow
+ @require_accelerate
+ @mark.accelerate_tests
+ @unittest.skip("Need to fix since we have a device mismatch")
+ def test_save_load_low_cpu_mem_usage_no_safetensors(self):
+ with tempfile.TemporaryDirectory() as saved_model_path:
+ for model_class in self.all_model_classes:
+ config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common()
+ model_to_save = model_class(config)
+
+ model_to_save.save_pretrained(saved_model_path, safe_serialization=False)
+ self._check_save_load_low_cpu_mem_usage(model_class, saved_model_path)
+
+ def _check_save_load_low_cpu_mem_usage(self, model_class, saved_model_path):
+ # Load the low usage and the normal models.
+ model_low_usage, loading_info = model_class.from_pretrained(
+ saved_model_path,
+ low_cpu_mem_usage=True,
+ output_loading_info=True,
+ )
+ model_non_low_usage = model_class.from_pretrained(saved_model_path)
+
+ # Check that there were no missing keys.
+ self.assertEqual(loading_info["missing_keys"], [])
+
+ # The low_cpu_mem_usage=True causes the model params to be initialized with device=meta, and then
+ # subsequently loaded with the correct values and onto the correct device. We check if there are any
+ # remaining params that were not properly loaded.
+ for name, param in model_low_usage.named_parameters():
+ self.assertNotEqual(
+ param.device,
+ torch.device("meta"),
+ "Parameter '" + name + "' has not been properly loaded and has device=meta.",
+ )
+
+ # Tests moving the model to a device other than meta.
+ model_low_usage.to(torch_device)
+
+ # Check that the parameters are equal.
+ for p1, p2 in zip(model_low_usage.parameters(), model_non_low_usage.parameters()):
+ self.assertEquals(p1.data.ne(p2.data).sum(), 0)
+
+ # Check that the state dict keys are equal.
+ self.assertEqual(set(model_low_usage.state_dict().keys()), set(model_non_low_usage.state_dict().keys()))
+
+ # Check that the shared tensors are equal.
+ tensor_ptrs1 = collections.defaultdict(list)
+ for name, tensor in model_low_usage.state_dict().items():
+ tensor_ptrs1[id_tensor_storage(tensor)].append(name)
+ tied_params1 = [names for _, names in tensor_ptrs1.items() if len(names) > 1]
+
+ tensor_ptrs2 = collections.defaultdict(list)
+ for name, tensor in model_non_low_usage.state_dict().items():
+ tensor_ptrs2[id_tensor_storage(tensor)].append(name)
+ tied_params2 = [names for _, names in tensor_ptrs2.items() if len(names) > 1]
+
+ self.assertEqual(tied_params1, tied_params2)
+
def test_fast_init_context_manager(self):
# 1. Create a dummy class. Should have buffers as well? To make sure we test __init__
class MyClass(PreTrainedModel):
@@ -1279,26 +1364,6 @@ def flatten_output(output):
f"traced {i}th output doesn't match model {i}th output for {model_class}",
)
- # Test that the model can be serialized and restored properly
- with tempfile.TemporaryDirectory() as tmp_dir_name:
- pkl_file_name = os.path.join(tmp_dir_name, "model.pkl")
- try:
- with open(pkl_file_name, "wb") as f:
- pickle.dump(traced_model, f)
- with open(pkl_file_name, "rb") as f:
- loaded = pickle.load(f)
- except Exception as e:
- self.fail(f"Couldn't serialize / deserialize the traced model: {e}")
-
- loaded_output = loaded(**filtered_inputs)
- loaded_output = flatten_output(loaded_output)
-
- for i in range(num_outputs):
- self.assertTrue(
- torch.allclose(model_output[i], loaded_output[i]),
- f"serialized model {i}th output doesn't match model {i}th output for {model_class}",
- )
-
# Avoid memory leak. Without this, each call increase RAM usage by ~20MB.
# (Even with this call, there are still memory leak by ~0.04MB)
self.clear_torch_jit_class_registry()
@@ -1762,14 +1827,19 @@ def test_resize_tokens_embeddings(self):
if self.model_tester.is_training is False:
model.eval()
- model_vocab_size = config.vocab_size
+ model_vocab_size = config.text_config.vocab_size if hasattr(config, "text_config") else config.vocab_size
# Retrieve the embeddings and clone theme
model_embed = model.resize_token_embeddings(model_vocab_size)
cloned_embeddings = model_embed.weight.clone()
# Check that resizing the token embeddings with a larger vocab size increases the model's vocab size
model_embed = model.resize_token_embeddings(model_vocab_size + 10)
- self.assertEqual(model.config.vocab_size, model_vocab_size + 10)
+ new_model_vocab_size = (
+ model.config.text_config.vocab_size
+ if hasattr(model.config, "text_config")
+ else model.config.vocab_size
+ )
+ self.assertEqual(new_model_vocab_size, model_vocab_size + 10)
# Check that it actually resizes the embeddings matrix
self.assertEqual(model_embed.weight.shape[0], cloned_embeddings.shape[0] + 10)
# Check that the model can still do a forward pass successfully (every parameter should be resized)
@@ -1777,7 +1847,12 @@ def test_resize_tokens_embeddings(self):
# Check that resizing the token embeddings with a smaller vocab size decreases the model's vocab size
model_embed = model.resize_token_embeddings(model_vocab_size - 15)
- self.assertEqual(model.config.vocab_size, model_vocab_size - 15)
+ new_model_vocab_size = (
+ model.config.text_config.vocab_size
+ if hasattr(model.config, "text_config")
+ else model.config.vocab_size
+ )
+ self.assertEqual(new_model_vocab_size, model_vocab_size - 15)
# Check that it actually resizes the embeddings matrix
self.assertEqual(model_embed.weight.shape[0], cloned_embeddings.shape[0] - 15)
@@ -1802,15 +1877,25 @@ def test_resize_tokens_embeddings(self):
model = model_class(config)
model.to(torch_device)
- model_vocab_size = config.vocab_size
+ model_vocab_size = config.text_config.vocab_size if hasattr(config, "text_config") else config.vocab_size
model.resize_token_embeddings(model_vocab_size + 10, pad_to_multiple_of=1)
- self.assertTrue(model.config.vocab_size + 10, model_vocab_size)
+ new_model_vocab_size = (
+ model.config.text_config.vocab_size
+ if hasattr(model.config, "text_config")
+ else model.config.vocab_size
+ )
+ self.assertTrue(new_model_vocab_size + 10, model_vocab_size)
model_embed = model.resize_token_embeddings(model_vocab_size, pad_to_multiple_of=64)
+ new_model_vocab_size = (
+ model.config.text_config.vocab_size
+ if hasattr(model.config, "text_config")
+ else model.config.vocab_size
+ )
self.assertTrue(model_embed.weight.shape[0] // 64, 0)
- self.assertTrue(model_embed.weight.shape[0], model.config.vocab_size)
- self.assertTrue(model.config.vocab_size, model.vocab_size)
+ self.assertTrue(model_embed.weight.shape[0], new_model_vocab_size)
+ self.assertTrue(new_model_vocab_size, model.vocab_size)
model_embed = model.resize_token_embeddings(model_vocab_size + 13, pad_to_multiple_of=64)
self.assertTrue(model_embed.weight.shape[0] // 64, 0)
@@ -1849,9 +1934,14 @@ def test_resize_embeddings_untied(self):
continue
# Check that resizing the token embeddings with a larger vocab size increases the model's vocab size
- model_vocab_size = config.vocab_size
+ model_vocab_size = config.text_config.vocab_size if hasattr(config, "text_config") else config.vocab_size
model.resize_token_embeddings(model_vocab_size + 10)
- self.assertEqual(model.config.vocab_size, model_vocab_size + 10)
+ new_model_vocab_size = (
+ model.config.text_config.vocab_size
+ if hasattr(model.config, "text_config")
+ else model.config.vocab_size
+ )
+ self.assertEqual(new_model_vocab_size, model_vocab_size + 10)
output_embeds = model.get_output_embeddings()
self.assertEqual(output_embeds.weight.shape[0], model_vocab_size + 10)
# Check bias if present
@@ -1862,7 +1952,12 @@ def test_resize_embeddings_untied(self):
# Check that resizing the token embeddings with a smaller vocab size decreases the model's vocab size
model.resize_token_embeddings(model_vocab_size - 15)
- self.assertEqual(model.config.vocab_size, model_vocab_size - 15)
+ new_model_vocab_size = (
+ model.config.text_config.vocab_size
+ if hasattr(model.config, "text_config")
+ else model.config.vocab_size
+ )
+ self.assertEqual(new_model_vocab_size, model_vocab_size - 15)
# Check that it actually resizes the embeddings matrix
output_embeds = model.get_output_embeddings()
self.assertEqual(output_embeds.weight.shape[0], model_vocab_size - 15)
@@ -1949,7 +2044,8 @@ def check_same_values(layer_1, layer_2):
# self.assertTrue(check_same_values(embeddings, decoding))
# Check that after resize they remain tied.
- model_tied.resize_token_embeddings(config.vocab_size + 10)
+ vocab_size = config.text_config.vocab_size if hasattr(config, "text_config") else config.vocab_size
+ model_tied.resize_token_embeddings(vocab_size + 10)
params_tied_2 = list(model_tied.parameters())
self.assertEqual(len(params_tied_2), len(params_tied))
@@ -2025,8 +2121,8 @@ def test_tied_weights_keys(self):
tied_weight_keys = model_tied._tied_weights_keys if model_tied._tied_weights_keys is not None else []
# Detect we get a hit for each key
for key in tied_weight_keys:
- if not any(re.search(key, p) for group in tied_params for p in group):
- raise ValueError(f"{key} is not a tied weight key for {model_class}.")
+ is_tied_key = any(re.search(key, p) for group in tied_params for p in group)
+ self.assertTrue(is_tied_key, f"{key} is not a tied weight key for {model_class}.")
# Removed tied weights found from tied params -> there should only be one left after
for key in tied_weight_keys:
@@ -2736,6 +2832,51 @@ def test_inputs_embeds(self):
with torch.no_grad():
model(**inputs)[0]
+ def test_inputs_embeds_matches_input_ids(self):
+ config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common()
+
+ for model_class in self.all_model_classes:
+ if model_class.__name__ not in get_values(MODEL_MAPPING_NAMES):
+ continue
+ model = model_class(config)
+ model.to(torch_device)
+ model.eval()
+
+ model_forward_args = inspect.signature(model.forward).parameters
+ if "inputs_embeds" not in model_forward_args:
+ self.skipTest("This model doesn't use `inputs_embeds`")
+
+ inputs = copy.deepcopy(self._prepare_for_class(inputs_dict, model_class))
+ pad_token_id = config.pad_token_id if config.pad_token_id is not None else 1
+
+ wte = model.get_input_embeddings()
+ if not self.is_encoder_decoder:
+ input_ids = inputs["input_ids"]
+ # some models infer position ids/attn mask differently when input ids
+ # by check if pad_token let's make sure no padding is in input ids
+ not_pad_token_id = pad_token_id + 1 if max(0, pad_token_id - 1) == 0 else pad_token_id - 1
+ input_ids[input_ids == pad_token_id] = not_pad_token_id
+ del inputs["input_ids"]
+ inputs_embeds = wte(input_ids)
+ with torch.no_grad():
+ out_ids = model(input_ids=input_ids, **inputs)[0]
+ out_embeds = model(inputs_embeds=inputs_embeds, **inputs)[0]
+ else:
+ encoder_input_ids = inputs["input_ids"]
+ decoder_input_ids = inputs.get("decoder_input_ids", encoder_input_ids)
+ encoder_input_ids[encoder_input_ids == pad_token_id] = max(0, pad_token_id + 1)
+ decoder_input_ids[decoder_input_ids == pad_token_id] = max(0, pad_token_id + 1)
+ del inputs["input_ids"]
+ inputs.pop("decoder_input_ids", None)
+ inputs_embeds = wte(encoder_input_ids)
+ decoder_inputs_embeds = wte(decoder_input_ids)
+ with torch.no_grad():
+ out_ids = model(input_ids=encoder_input_ids, decoder_input_ids=decoder_input_ids, **inputs)[0]
+ out_embeds = model(
+ inputs_embeds=inputs_embeds, decoder_inputs_embeds=decoder_inputs_embeds, **inputs
+ )[0]
+ self.assertTrue(torch.allclose(out_embeds, out_ids))
+
@require_torch_multi_gpu
def test_multi_gpu_data_parallel_forward(self):
config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common()
@@ -3603,12 +3744,14 @@ def get_mean_reldiff(failcase, x, ref, atol, rtol):
self.assertTrue(model_eager.config._attn_implementation == "eager")
for name, submodule in model_eager.named_modules():
- if "SdpaAttention" in submodule.__class__.__name__:
+ class_name = submodule.__class__.__name__
+ if "SdpaAttention" in class_name or "SdpaSelfAttention" in class_name:
raise ValueError("The eager model should not have SDPA attention layers")
has_sdpa = False
for name, submodule in model_sdpa.named_modules():
- if "SdpaAttention" in submodule.__class__.__name__:
+ class_name = submodule.__class__.__name__
+ if "SdpaAttention" in class_name or "SdpaSelfAttention" in class_name:
has_sdpa = True
break
if not has_sdpa and model_sdpa.config.model_type != "falcon":
@@ -3691,19 +3834,21 @@ def get_mean_reldiff(failcase, x, ref, atol, rtol):
decoder_input_ids = decoder_input_ids.to(torch_device)
# TODO: never an `attention_mask` arg here?
- other_inputs = {
+ processed_inputs = {
+ model.main_input_name: dummy_input,
"decoder_input_ids": decoder_input_ids,
"decoder_attention_mask": dummy_attention_mask,
"output_hidden_states": True,
}
else:
- other_inputs = {
+ processed_inputs = {
+ model.main_input_name: dummy_input,
"output_hidden_states": True,
}
# Otherwise fails for e.g. WhisperEncoderModel
if "attention_mask" in inspect.signature(model_eager.forward).parameters:
- other_inputs["attention_mask"] = dummy_attention_mask
+ processed_inputs["attention_mask"] = dummy_attention_mask
# TODO: test gradients as well (& for FA2 as well!)
with torch.no_grad():
@@ -3712,8 +3857,9 @@ def get_mean_reldiff(failcase, x, ref, atol, rtol):
enable_math=True,
enable_mem_efficient=enable_kernels,
):
- outputs_eager = model_eager(dummy_input, **other_inputs)
- outputs_sdpa = model_sdpa(dummy_input, **other_inputs)
+ prepared_inputs = self._prepare_for_class(processed_inputs, model_class)
+ outputs_eager = model_eager(**prepared_inputs)
+ outputs_sdpa = model_sdpa(**prepared_inputs)
logits_eager = (
outputs_eager.hidden_states[-1]
@@ -3799,6 +3945,7 @@ def test_sdpa_can_dispatch_on_flash(self):
self.skipTest(f"{model_class.__name__} does not support SDPA")
config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common()
+ inputs_dict = self._prepare_for_class(inputs_dict, model_class)
if config.model_type in ["llava", "llava_next", "vipllava"]:
self.skipTest("Llava-like models currently (transformers==4.39.1) requires an attention_mask input")
if config.model_type in ["idefics"]:
@@ -3867,12 +4014,14 @@ def test_eager_matches_sdpa_generate(self):
self.assertTrue(model_eager.config._attn_implementation == "eager")
for name, submodule in model_eager.named_modules():
- if "SdpaAttention" in submodule.__class__.__name__:
+ class_name = submodule.__class__.__name__
+ if "SdpaAttention" in class_name or "SdpaSelfAttention" in class_name:
raise ValueError("The eager model should not have SDPA attention layers")
has_sdpa = False
for name, submodule in model_sdpa.named_modules():
- if "SdpaAttention" in submodule.__class__.__name__:
+ class_name = submodule.__class__.__name__
+ if "SdpaAttention" in class_name or "SdpaSelfAttention" in class_name:
has_sdpa = True
break
if not has_sdpa:
@@ -4132,6 +4281,80 @@ def test_flash_attn_2_from_config(self):
self.assertFalse(fa2_correctly_converted)
+ def _get_custom_4d_mask_test_data(self):
+ # Sequence in which all but the last token is the same
+ input_ids = torch.tensor(
+ [[10, 11, 12, 13], [10, 11, 12, 14], [10, 11, 12, 15]], device=torch_device, dtype=torch.int64
+ )
+ position_ids = torch.tensor([[0, 1, 2, 3]] * 3, device=torch_device, dtype=torch.int64)
+
+ # Combining common prefix with the unique ending tokens:
+ input_ids_shared_prefix = torch.cat([input_ids[0][:-1], input_ids[:, -1]]).unsqueeze(0)
+
+ # Creating a 4D mask where each of the last 3 tokens do not attend to each other.
+ mask_shared_prefix = torch.tensor(
+ [
+ [
+ [
+ [1, 0, 0, 0, 0, 0],
+ [1, 1, 0, 0, 0, 0],
+ [1, 1, 1, 0, 0, 0],
+ [1, 1, 1, 1, 0, 0],
+ [1, 1, 1, 0, 1, 0],
+ [1, 1, 1, 0, 0, 1],
+ ]
+ ]
+ ],
+ )
+ # inverting the attention mask
+ mask_dtype = torch.float32
+ min_dtype = torch.finfo(mask_dtype).min
+ mask_shared_prefix = (mask_shared_prefix.eq(0.0)).to(dtype=mask_dtype, device=torch_device) * min_dtype
+
+ # Creating a position_ids tensor. note the repeating figures in the end.
+ position_ids_shared_prefix = torch.tensor([[0, 1, 2, 3, 3, 3]], device=torch_device, dtype=torch.int64)
+
+ return input_ids, position_ids, input_ids_shared_prefix, mask_shared_prefix, position_ids_shared_prefix
+
+ def test_custom_4d_attention_mask(self):
+ if len(self.all_generative_model_classes) == 0:
+ self.skipTest("Model architecture has no generative classes, and thus not necessarily supporting 4D masks")
+
+ for model_class in self.all_generative_model_classes:
+ if not model_class._supports_cache_class:
+ self.skipTest(f"{model_class.__name__} is not guaranteed to work with custom 4D attention masks")
+ config, _ = self.model_tester.prepare_config_and_inputs_for_common()
+ model = model_class(config).to(device=torch_device, dtype=torch.float32)
+
+ (
+ input_ids,
+ position_ids,
+ input_ids_shared_prefix,
+ mask_shared_prefix,
+ position_ids_shared_prefix,
+ ) = self._get_custom_4d_mask_test_data()
+
+ logits = model.forward(input_ids, position_ids=position_ids).logits
+ # logits.shape == torch.Size([3, 4, ...])
+
+ logits_shared_prefix = model(
+ input_ids_shared_prefix,
+ attention_mask=mask_shared_prefix,
+ position_ids=position_ids_shared_prefix,
+ )[0]
+ # logits_shared_prefix.shape == torch.Size([1, 6, ...])
+
+ out_last_tokens = logits[:, -1, :] # last tokens in each batch line
+ out_shared_prefix_last_tokens = logits_shared_prefix[0, -3:, :] # last three tokens
+
+ # comparing greedily-chosen tokens:
+ assert torch.equal(out_last_tokens.max(axis=1).indices, out_shared_prefix_last_tokens.max(axis=1).indices)
+
+ # comparing softmax-normalized logits:
+ normalized_0 = F.softmax(out_last_tokens)
+ normalized_1 = F.softmax(out_shared_prefix_last_tokens)
+ torch.testing.assert_close(normalized_0, normalized_1, rtol=1e-3, atol=1e-4)
+
global_rng = random.Random()
diff --git a/tests/test_modeling_tf_common.py b/tests/test_modeling_tf_common.py
index f396875570c9..2cf272f4aac1 100644
--- a/tests/test_modeling_tf_common.py
+++ b/tests/test_modeling_tf_common.py
@@ -380,7 +380,9 @@ def test_keras_save_load(self):
main_layer = main_layer_class(config)
symbolic_inputs = {
- name: keras.Input(tensor.shape[1:], dtype=tensor.dtype) for name, tensor in inputs_dict.items()
+ name: keras.Input(tensor.shape[1:], dtype=tensor.dtype)
+ for name, tensor in inputs_dict.items()
+ if tf.is_tensor(tensor)
}
model = keras.Model(symbolic_inputs, outputs=main_layer(symbolic_inputs))
@@ -1689,7 +1691,11 @@ def test_dataset_conversion(self):
tf_inputs_dict = self._prepare_for_class(inputs_dict, model_class, return_labels=True)
if "labels" not in tf_inputs_dict:
return # This model isn't giving us labels after all, don't try training with it
- tf_inputs_dict = {key: val for key, val in tf_inputs_dict.items() if "head_mask" not in key}
+ tf_inputs_dict = {
+ key: val
+ for key, val in tf_inputs_dict.items()
+ if "head_mask" not in key and isinstance(val, tf.Tensor)
+ }
tf_inputs_dict["extra_unwanted_column"] = list(tf_inputs_dict.values())[0] # Use a random other tensor
input_dataset = Dataset.from_dict(tf_inputs_dict)
tf_dataset = model.prepare_tf_dataset(
@@ -1853,8 +1859,8 @@ def ids_tensor(shape, vocab_size, rng=None, name=None, dtype=None):
def random_attention_mask(shape, rng=None, name=None, dtype=None):
attn_mask = ids_tensor(shape, vocab_size=2, rng=None, name=None, dtype=dtype)
- # make sure that at least one token is attended to for each batch
- attn_mask = tf.concat([attn_mask[:, :-1], tf.ones_like(attn_mask[:, -1:], dtype=dtype)], axis=-1)
+ # Mark the first token as 1 (matches behaviour of PyTorch/Flax function)
+ attn_mask = tf.concat([tf.ones_like(attn_mask[:, :1]), attn_mask[:, 1:]], axis=1)
return attn_mask
diff --git a/tests/test_modeling_utils.py b/tests/test_modeling_utils.py
index b6c1e99737fb..9a00340d14b6 100755
--- a/tests/test_modeling_utils.py
+++ b/tests/test_modeling_utils.py
@@ -13,7 +13,6 @@
# See the License for the specific language governing permissions and
# limitations under the License.
import copy
-import gc
import glob
import json
import os
@@ -53,7 +52,6 @@
require_tf,
require_torch,
require_torch_accelerator,
- require_torch_gpu,
require_torch_multi_accelerator,
require_usr_bin_time,
slow,
@@ -101,7 +99,12 @@
_prepare_4d_attention_mask,
_prepare_4d_causal_attention_mask,
)
- from transformers.modeling_utils import _find_disjoint, _find_identical, shard_checkpoint
+ from transformers.modeling_utils import (
+ _find_disjoint,
+ _find_identical,
+ dtype_byte_size,
+ shard_checkpoint,
+ )
# Fake pretrained models for tests
class BaseModel(PreTrainedModel):
@@ -427,6 +430,69 @@ def remove_torch_dtype(model_path):
model = AutoModel.from_pretrained(TINY_BERT_FOR_TOKEN_CLASSIFICATION, torch_dtype="auto")
self.assertEqual(model.dtype, torch.float32)
+ def test_model_from_pretrained_attn_implementation(self):
+ # test that the model can be instantiated with attn_implementation of either
+ # 1. explicit from_pretrained's attn_implementation argument
+ # 2. explicit from_pretrained's attn_implementation argument with a config argument
+ attn_implementation_available = ["eager"]
+ if is_torch_sdpa_available():
+ attn_implementation_available.append("sdpa")
+
+ if is_flash_attn_2_available():
+ attn_implementation_available.append("flash_attention_2")
+
+ mistral_attention_classes = {
+ "eager": "MistralAttention",
+ "sdpa": "MistralSdpaAttention",
+ "flash_attention_2": "MistralFlashAttention2",
+ }
+ for requested_attn_implementation in attn_implementation_available:
+ model = AutoModelForCausalLM.from_pretrained(
+ TINY_MISTRAL, attn_implementation=requested_attn_implementation
+ )
+ self.assertEqual(model.config._attn_implementation, requested_attn_implementation)
+ for module in model.modules():
+ if "Attention" in module.__class__.__name__:
+ self.assertEqual(
+ module.__class__.__name__, mistral_attention_classes[requested_attn_implementation]
+ )
+
+ config = AutoConfig.from_pretrained(TINY_MISTRAL)
+ model = AutoModelForCausalLM.from_pretrained(
+ TINY_MISTRAL, config=config, attn_implementation=requested_attn_implementation
+ )
+ self.assertEqual(model.config._attn_implementation, requested_attn_implementation)
+ for module in model.modules():
+ if "Attention" in module.__class__.__name__:
+ self.assertEqual(
+ module.__class__.__name__, mistral_attention_classes[requested_attn_implementation]
+ )
+
+ def test_torch_dtype_byte_sizes(self):
+ torch_dtypes_and_bytes = [
+ (torch.double, 8),
+ (torch.float64, 8),
+ (torch.float, 4),
+ (torch.float32, 4),
+ (torch.half, 2),
+ (torch.float16, 2),
+ (torch.bfloat16, 2),
+ (torch.long, 8),
+ (torch.int64, 8),
+ (torch.int, 4),
+ (torch.int32, 4),
+ (torch.short, 2),
+ (torch.int16, 2),
+ (torch.uint8, 1),
+ (torch.int8, 1),
+ (torch.float8_e4m3fn, 1),
+ (torch.float8_e5m2, 1),
+ (torch.bool, 0.125),
+ ]
+
+ for torch_dtype, bytes_per_element in torch_dtypes_and_bytes:
+ self.assertEqual(dtype_byte_size(torch_dtype), bytes_per_element)
+
def test_no_super_init_config_and_model(self):
config = NoSuperInitConfig(attribute=32)
model = NoSuperInitModel(config)
@@ -783,26 +849,26 @@ def test_model_parallelism_gpt2(self):
@require_accelerate
@mark.accelerate_tests
- @require_torch_gpu
+ @require_torch_accelerator
def test_from_pretrained_disk_offload_task_model(self):
model = AutoModel.from_pretrained("hf-internal-testing/tiny-random-gpt2")
device_map = {
- "transformer.wte": 0,
- "transformer.wpe": 0,
+ "transformer.wte": f"{torch_device}:0",
+ "transformer.wpe": f"{torch_device}:0",
"transformer.h.0": "cpu",
"transformer.h.1": "cpu",
"transformer.h.2": "cpu",
"transformer.h.3": "disk",
"transformer.h.4": "disk",
- "transformer.ln_f": 0,
- "lm_head": 0,
+ "transformer.ln_f": f"{torch_device}:0",
+ "lm_head": f"{torch_device}:0",
}
with tempfile.TemporaryDirectory() as tmp_dir:
- inputs = torch.tensor([[1, 2, 3]]).to(0)
+ inputs = torch.tensor([[1, 2, 3]]).to(f"{torch_device}:0")
model.save_pretrained(tmp_dir)
- new_model = AutoModelForCausalLM.from_pretrained(tmp_dir).to(0)
- outputs1 = new_model.to(0)(inputs)
+ new_model = AutoModelForCausalLM.from_pretrained(tmp_dir).to(f"{torch_device}:0")
+ outputs1 = new_model.to(f"{torch_device}:0")(inputs)
offload_folder = os.path.join(tmp_dir, "offload")
new_model_with_offload = AutoModelForCausalLM.from_pretrained(
@@ -813,7 +879,6 @@ def test_from_pretrained_disk_offload_task_model(self):
self.assertTrue(torch.allclose(outputs1.logits.cpu(), outputs2.logits.cpu()))
# With state dict temp offload
- offload_folder = os.path.join(tmp_dir, "offload")
new_model_with_offload = AutoModelForCausalLM.from_pretrained(
tmp_dir,
device_map=device_map,
@@ -821,30 +886,29 @@ def test_from_pretrained_disk_offload_task_model(self):
offload_state_dict=True,
)
outputs2 = new_model_with_offload(inputs)
-
self.assertTrue(torch.allclose(outputs1.logits.cpu(), outputs2.logits.cpu()))
@require_accelerate
@mark.accelerate_tests
- @require_torch_gpu
+ @require_torch_accelerator
def test_from_pretrained_disk_offload_derived_to_base_model(self):
derived_model = AutoModelForCausalLM.from_pretrained("hf-internal-testing/tiny-random-gpt2")
device_map = {
- "wte": 0,
- "wpe": 0,
+ "wte": f"{torch_device}:0",
+ "wpe": f"{torch_device}:0",
"h.0": "cpu",
"h.1": "cpu",
"h.2": "cpu",
"h.3": "disk",
"h.4": "disk",
- "ln_f": 0,
+ "ln_f": f"{torch_device}:0",
}
with tempfile.TemporaryDirectory() as tmp_dir:
- inputs = torch.tensor([[1, 2, 3]]).to(0)
+ inputs = torch.tensor([[1, 2, 3]]).to(f"{torch_device}:0")
derived_model.save_pretrained(tmp_dir, use_safetensors=True)
base_model = AutoModel.from_pretrained(tmp_dir)
- outputs1 = base_model.to(0)(inputs)
+ outputs1 = base_model.to(f"{torch_device}:0")(inputs)
# with disk offload
offload_folder = os.path.join(tmp_dir, "offload")
@@ -935,6 +999,26 @@ def test_use_safetensors(self):
self.assertTrue(any(f.endswith("safetensors") for f in all_downloaded_files))
self.assertFalse(any(f.endswith("bin") for f in all_downloaded_files))
+ # test no model file found when use_safetensors=None (default when safetensors package available)
+ with self.assertRaises(OSError) as missing_model_file_error:
+ BertModel.from_pretrained("hf-internal-testing/config-no-model")
+
+ self.assertTrue(
+ "does not appear to have a file named pytorch_model.bin, model.safetensors,"
+ in str(missing_model_file_error.exception)
+ )
+
+ with self.assertRaises(OSError) as missing_model_file_error:
+ with tempfile.TemporaryDirectory() as tmp_dir:
+ with open(os.path.join(tmp_dir, "config.json"), "w") as f:
+ f.write("{}")
+ f.close()
+ BertModel.from_pretrained(tmp_dir)
+
+ self.assertTrue(
+ "Error no file named pytorch_model.bin, model.safetensors" in str(missing_model_file_error.exception)
+ )
+
@require_safetensors
def test_safetensors_save_and_load(self):
model = BertModel.from_pretrained("hf-internal-testing/tiny-random-bert")
@@ -2021,229 +2105,6 @@ def test_not_available_sdpa(self):
self.assertTrue("PyTorch SDPA requirements in Transformers are not met" in str(cm.exception))
-@require_torch_gpu
-class Mask4DTestBase(unittest.TestCase):
- def tearDown(self):
- gc.collect()
- torch.cuda.empty_cache()
-
- def get_test_data(self):
- texts = ["the cat sat", "the cat had", "the cat is"]
- encoded = [self.tokenizer.encode(t) for t in texts]
- input_0 = torch.tensor(encoded, device=torch_device)
- # tensor([[ 1, 278, 6635, 3290],
- # [ 1, 278, 6635, 750],
- # [ 1, 278, 6635, 338]], device='cuda:0')
-
- position_ids_0 = torch.tensor([[0, 1, 2, 3]] * 3, device=torch_device, dtype=torch.int64)
-
- # Combining common prefix with the unique ending tokens:
- input_1 = torch.cat([input_0[0][:-1], input_0[:, -1]]).unsqueeze(0)
- # tensor([[ 1, 278, 6635, 3290, 750, 338]], device='cuda:0')
-
- # Creating a 4D mask where each of the last 3 tokens do not attend to each other.
- mask_1 = torch.tensor(
- [
- [
- [
- [1, 0, 0, 0, 0, 0],
- [1, 1, 0, 0, 0, 0],
- [1, 1, 1, 0, 0, 0],
- [1, 1, 1, 1, 0, 0],
- [1, 1, 1, 0, 1, 0],
- [1, 1, 1, 0, 0, 1],
- ]
- ]
- ],
- device="cuda:0",
- dtype=torch.int64,
- )
-
- # Creating a position_ids tensor. note the repeating figures in the end.
- position_ids_1 = torch.tensor([[0, 1, 2, 3, 3, 3]], device=torch_device, dtype=torch.int64)
-
- return input_0, position_ids_0, input_1, mask_1, position_ids_1
-
-
-@require_torch_gpu
-class Mask4DTestFP32(Mask4DTestBase):
- def setUp(self):
- model_name = "JackFram/llama-68m" # small Llama-like model from FlexFlow
- self.model_dtype = torch.float32
- self.tokenizer = AutoTokenizer.from_pretrained(model_name)
- self.model = AutoModelForCausalLM.from_pretrained(model_name, torch_dtype=self.model_dtype).to(torch_device)
-
- def test_attention(self):
- """comparing outputs of attention layer"""
- # Input 0: one row per sentence; Input 1: same data, but stacked into a single row with custom attention
- input_0, position_ids_0, input_1, mask_1, position_ids_1 = self.get_test_data()
- causal_mask_1 = (1 - mask_1).to(self.model_dtype) * torch.finfo(self.model_dtype).min
-
- hid_0 = self.model.model.embed_tokens(input_0)
- outs_0 = self.model.model.layers[0].self_attn.forward(hid_0, position_ids=position_ids_0)[0]
- # outs_0.shape == torch.Size([3, 4, 768])
-
- hid_1 = self.model.model.embed_tokens(input_1)
- outs_1 = self.model.model.layers[0].self_attn.forward(
- hid_1, attention_mask=causal_mask_1, position_ids=position_ids_1
- )[0]
- # outs_1.shape == torch.Size([1, 6, 768])
-
- outs_0_last_tokens = outs_0[:, -1, :] # last tokens in each batch line
- outs_1_last_tokens = outs_1[0, -3:, :] # last three tokens
- torch.testing.assert_close(outs_0_last_tokens, outs_1_last_tokens)
-
- def test_causal_model_logits(self):
- """comparing logits outputs of whole inner model"""
- # Input 0: one row per sentence; Input 1: same data, but stacked into a single row with custom attention
- input_0, position_ids_0, input_1, mask_1, position_ids_1 = self.get_test_data()
-
- logits_0 = self.model.forward(input_0, position_ids=position_ids_0).logits
- logits_1 = self.model.forward(input_1, attention_mask=mask_1.bool(), position_ids=position_ids_1).logits
-
- logits_0_last_tokens = logits_0[:, -1, :] # last tokens in each batch line
- logits_1_last_tokens = logits_1[0, -3:, :] # last three tokens
- torch.testing.assert_close(logits_0_last_tokens, logits_1_last_tokens)
-
-
-@require_torch_gpu
-class Mask4DTestFP16(Mask4DTestBase):
- test_attention = Mask4DTestFP32.test_attention
-
- def setUp(self):
- model_name = "JackFram/llama-68m" # small Llama-like model from FlexFlow
- self.model_dtype = torch.float16
- self.tokenizer = AutoTokenizer.from_pretrained(model_name)
- self.model = AutoModelForCausalLM.from_pretrained(model_name, torch_dtype=self.model_dtype).to(torch_device)
-
- def test_causal_model_logits(self):
- """comparing logits outputs of whole inner model"""
- # Input 0: one row per sentence; Input 1: same data, but stacked into a single row with custom attention
- input_0, position_ids_0, input_1, mask_1, position_ids_1 = self.get_test_data()
-
- logits_0 = self.model.forward(input_0, position_ids=position_ids_0).logits
- logits_1 = self.model.forward(input_1, attention_mask=mask_1.bool(), position_ids=position_ids_1).logits
-
- logits_0_last_tokens = logits_0[:, -1, :] # last tokens in each batch line
- logits_1_last_tokens = logits_1[0, -3:, :] # last three tokens
-
- indices_0 = logits_0_last_tokens.sort(descending=True).indices
- indices_1 = logits_1_last_tokens.sort(descending=True).indices
-
- # checking logits, but note relaxed tolerances for FP16
- torch.testing.assert_close(logits_0_last_tokens, logits_1_last_tokens, atol=0.02, rtol=0.001)
-
- # checking tokens order for the top tokens
- for token_ids_0, token_ids_1 in zip(indices_0, indices_1):
- self.assertTrue(torch.equal(token_ids_0[:128], token_ids_1[:128]))
-
-
-@slow
-@require_torch_gpu
-class Mask4DTestHard(unittest.TestCase):
- def tearDown(self):
- gc.collect()
- torch.cuda.empty_cache()
-
- def setUp(self):
- model_name = "TinyLlama/TinyLlama-1.1B-Chat-v1.0"
- self.model_dtype = torch.float32
- self.tokenizer = AutoTokenizer.from_pretrained(model_name)
- self.model = AutoModelForCausalLM.from_pretrained(model_name, torch_dtype=self.model_dtype).to(torch_device)
-
- def get_test_data(self):
- template = "my favorite {}"
- items = ("pet is a", "artist plays a", "name is L") # same number of tokens in each item
-
- batch_0 = [template.format(x) for x in items] # 3 separate lines
- batch_1 = template.format(" ".join(items)) # 1 line with options concatenated
-
- input_0 = self.tokenizer(batch_0, return_tensors="pt").input_ids.to(torch_device)
- input_1 = self.tokenizer(batch_1, return_tensors="pt").input_ids.to(torch_device)
-
- mask_1 = torch.tensor(
- [
- [
- [
- [1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
- [1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
- [1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0],
- [1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0],
- [1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0],
- [1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0],
- [1, 1, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0],
- [1, 1, 1, 0, 0, 0, 1, 1, 0, 0, 0, 0],
- [1, 1, 1, 0, 0, 0, 1, 1, 1, 0, 0, 0],
- [1, 1, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0],
- [1, 1, 1, 0, 0, 0, 0, 0, 0, 1, 1, 0],
- [1, 1, 1, 0, 0, 0, 0, 0, 0, 1, 1, 1],
- ]
- ]
- ],
- device=torch_device,
- dtype=torch.int64,
- )
-
- position_ids_0 = torch.arange(input_0.shape[1]).tile(input_0.shape[0], 1).to(torch_device)
- # equivalent: position_ids_1 = torch.tensor([[0, 1, 2, 3, 4, 5, 3, 4, 5, 3, 4, 5]]).to(device)
- position_ids_1 = (mask_1.sum(dim=-1) - 1).reshape(1, -1) # same but nicer
-
- return input_0, position_ids_0, input_1, mask_1, position_ids_1
-
- def test_stacked_causal_mask(self):
- # Input 0: one row per sentence; Input 1: same data, but stacked into a single row with custom attention
- input_0, position_ids_0, input_1, mask_1, position_ids_1 = self.get_test_data()
-
- # regular batch
- logits_0 = self.model.forward(input_0, position_ids=position_ids_0).logits
- logits_0_last = logits_0[:, -1, :] # last tokens in each batch line
- decoded_0 = [self.tokenizer.decode(t) for t in logits_0_last.argmax(dim=-1)]
-
- # single forward run with 4D custom mask
- logits_1 = self.model.forward(input_1, attention_mask=mask_1.bool(), position_ids=position_ids_1).logits
- logits_1_last = logits_1[0, torch.where(position_ids_1 == position_ids_1.max())[1], :] # last three tokens
- decoded_1 = [self.tokenizer.decode(t) for t in logits_1_last.argmax(dim=-1)]
-
- self.assertEqual(decoded_0, decoded_1)
-
- def test_partial_stacked_causal_mask(self):
- # Same as the test above, but the input is passed in two groups. It tests that we can pass partial 4D attention
- # masks
-
- # Input 0: one row per sentence; Input 1: same data, but stacked into a single row with custom attention
- input_0, position_ids_0, input_1, mask_1, position_ids_1 = self.get_test_data()
-
- # regular batch
- logits_0 = self.model.forward(input_0, position_ids=position_ids_0).logits
- logits_0_last = logits_0[:, -1, :] # last tokens in each batch line
- decoded_0 = [self.tokenizer.decode(t) for t in logits_0_last.argmax(dim=-1)]
-
- # 2 forward runs with custom 4D masks
- part_a = 3 # split point
-
- input_1a = input_1[:, :part_a]
- position_ids_1a = position_ids_1[:, :part_a]
- mask_1a = mask_1[:, :, :part_a, :part_a]
-
- outs_1a = self.model.forward(input_1a, attention_mask=mask_1a.bool(), position_ids=position_ids_1a)
- past_key_values_a = outs_1a["past_key_values"]
-
- input_1b = input_1[:, part_a:]
- position_ids_1b = position_ids_1[:, part_a:]
- mask_1b = mask_1[:, :, part_a:, :]
-
- outs_1b = self.model.forward(
- input_1b, attention_mask=mask_1b.bool(), position_ids=position_ids_1b, past_key_values=past_key_values_a
- )
-
- decoded_1b = [
- self.tokenizer.decode(t)
- for t in outs_1b.logits.argmax(-1)[0, torch.where(position_ids_1 == position_ids_1.max())[1] - part_a]
- ]
-
- self.assertEqual(decoded_0, decoded_1b)
-
-
@require_torch
class TestTensorSharing(TestCasePlus):
def test_disjoint(self):
diff --git a/tests/tools/test_image_captioning.py b/tests/tools/test_image_captioning.py
deleted file mode 100644
index fcd06eb44435..000000000000
--- a/tests/tools/test_image_captioning.py
+++ /dev/null
@@ -1,53 +0,0 @@
-# coding=utf-8
-# Copyright 2023 HuggingFace Inc.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import unittest
-from pathlib import Path
-
-from transformers import is_vision_available, load_tool
-from transformers.testing_utils import get_tests_dir
-
-from .test_tools_common import ToolTesterMixin
-
-
-if is_vision_available():
- from PIL import Image
-
-
-class ImageCaptioningToolTester(unittest.TestCase, ToolTesterMixin):
- def setUp(self):
- self.tool = load_tool("image-captioning")
- self.tool.setup()
- self.remote_tool = load_tool("image-captioning", remote=True)
-
- def test_exact_match_arg(self):
- image = Image.open(Path(get_tests_dir("fixtures/tests_samples/COCO")) / "000000039769.png")
- result = self.tool(image)
- self.assertEqual(result, "two cats sleeping on a couch")
-
- def test_exact_match_arg_remote(self):
- image = Image.open(Path(get_tests_dir("fixtures/tests_samples/COCO")) / "000000039769.png")
- result = self.remote_tool(image)
- self.assertEqual(result, "two cats sleeping on a couch")
-
- def test_exact_match_kwarg(self):
- image = Image.open(Path(get_tests_dir("fixtures/tests_samples/COCO")) / "000000039769.png")
- result = self.tool(image=image)
- self.assertEqual(result, "two cats sleeping on a couch")
-
- def test_exact_match_kwarg_remote(self):
- image = Image.open(Path(get_tests_dir("fixtures/tests_samples/COCO")) / "000000039769.png")
- result = self.remote_tool(image=image)
- self.assertEqual(result, "two cats sleeping on a couch")
diff --git a/tests/tools/test_image_segmentation.py b/tests/tools/test_image_segmentation.py
deleted file mode 100644
index 2f003f2c8b91..000000000000
--- a/tests/tools/test_image_segmentation.py
+++ /dev/null
@@ -1,53 +0,0 @@
-# coding=utf-8
-# Copyright 2023 HuggingFace Inc.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import unittest
-from pathlib import Path
-
-from transformers import is_vision_available, load_tool
-from transformers.testing_utils import get_tests_dir
-
-from .test_tools_common import ToolTesterMixin
-
-
-if is_vision_available():
- from PIL import Image
-
-
-class ImageSegmentationToolTester(unittest.TestCase, ToolTesterMixin):
- def setUp(self):
- self.tool = load_tool("image-segmentation")
- self.tool.setup()
- self.remote_tool = load_tool("image-segmentation", remote=True)
-
- def test_exact_match_arg(self):
- image = Image.open(Path(get_tests_dir("fixtures/tests_samples/COCO")) / "000000039769.png")
- result = self.tool(image, "cat")
- self.assertTrue(isinstance(result, Image.Image))
-
- def test_exact_match_arg_remote(self):
- image = Image.open(Path(get_tests_dir("fixtures/tests_samples/COCO")) / "000000039769.png")
- result = self.remote_tool(image, "cat")
- self.assertTrue(isinstance(result, Image.Image))
-
- def test_exact_match_kwarg(self):
- image = Image.open(Path(get_tests_dir("fixtures/tests_samples/COCO")) / "000000039769.png")
- result = self.tool(image=image, label="cat")
- self.assertTrue(isinstance(result, Image.Image))
-
- def test_exact_match_kwarg_remote(self):
- image = Image.open(Path(get_tests_dir("fixtures/tests_samples/COCO")) / "000000039769.png")
- result = self.remote_tool(image=image, label="cat")
- self.assertTrue(isinstance(result, Image.Image))
diff --git a/tests/tools/test_python_interpreter.py b/tests/tools/test_python_interpreter.py
deleted file mode 100644
index b9a38b4a21f1..000000000000
--- a/tests/tools/test_python_interpreter.py
+++ /dev/null
@@ -1,131 +0,0 @@
-# coding=utf-8
-# Copyright 2023 HuggingFace Inc.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import unittest
-
-from transformers.testing_utils import CaptureStdout
-from transformers.tools.python_interpreter import evaluate
-
-
-# Fake function we will use as tool
-def add_two(x):
- return x + 2
-
-
-class PythonInterpreterTester(unittest.TestCase):
- def test_evaluate_assign(self):
- code = "x = 3"
- state = {}
- result = evaluate(code, {}, state=state)
- assert result == 3
- self.assertDictEqual(state, {"x": 3})
-
- code = "x = y"
- state = {"y": 5}
- result = evaluate(code, {}, state=state)
- # evaluate returns the value of the last assignment.
- assert result == 5
- self.assertDictEqual(state, {"x": 5, "y": 5})
-
- def test_evaluate_call(self):
- code = "y = add_two(x)"
- state = {"x": 3}
- result = evaluate(code, {"add_two": add_two}, state=state)
- assert result == 5
- self.assertDictEqual(state, {"x": 3, "y": 5})
-
- # Won't work without the tool
- with CaptureStdout() as out:
- result = evaluate(code, {}, state=state)
- assert result is None
- assert "tried to execute add_two" in out.out
-
- def test_evaluate_constant(self):
- code = "x = 3"
- state = {}
- result = evaluate(code, {}, state=state)
- assert result == 3
- self.assertDictEqual(state, {"x": 3})
-
- def test_evaluate_dict(self):
- code = "test_dict = {'x': x, 'y': add_two(x)}"
- state = {"x": 3}
- result = evaluate(code, {"add_two": add_two}, state=state)
- self.assertDictEqual(result, {"x": 3, "y": 5})
- self.assertDictEqual(state, {"x": 3, "test_dict": {"x": 3, "y": 5}})
-
- def test_evaluate_expression(self):
- code = "x = 3\ny = 5"
- state = {}
- result = evaluate(code, {}, state=state)
- # evaluate returns the value of the last assignment.
- assert result == 5
- self.assertDictEqual(state, {"x": 3, "y": 5})
-
- def test_evaluate_f_string(self):
- code = "text = f'This is x: {x}.'"
- state = {"x": 3}
- result = evaluate(code, {}, state=state)
- # evaluate returns the value of the last assignment.
- assert result == "This is x: 3."
- self.assertDictEqual(state, {"x": 3, "text": "This is x: 3."})
-
- def test_evaluate_if(self):
- code = "if x <= 3:\n y = 2\nelse:\n y = 5"
- state = {"x": 3}
- result = evaluate(code, {}, state=state)
- # evaluate returns the value of the last assignment.
- assert result == 2
- self.assertDictEqual(state, {"x": 3, "y": 2})
-
- state = {"x": 8}
- result = evaluate(code, {}, state=state)
- # evaluate returns the value of the last assignment.
- assert result == 5
- self.assertDictEqual(state, {"x": 8, "y": 5})
-
- def test_evaluate_list(self):
- code = "test_list = [x, add_two(x)]"
- state = {"x": 3}
- result = evaluate(code, {"add_two": add_two}, state=state)
- self.assertListEqual(result, [3, 5])
- self.assertDictEqual(state, {"x": 3, "test_list": [3, 5]})
-
- def test_evaluate_name(self):
- code = "y = x"
- state = {"x": 3}
- result = evaluate(code, {}, state=state)
- assert result == 3
- self.assertDictEqual(state, {"x": 3, "y": 3})
-
- def test_evaluate_subscript(self):
- code = "test_list = [x, add_two(x)]\ntest_list[1]"
- state = {"x": 3}
- result = evaluate(code, {"add_two": add_two}, state=state)
- assert result == 5
- self.assertDictEqual(state, {"x": 3, "test_list": [3, 5]})
-
- code = "test_dict = {'x': x, 'y': add_two(x)}\ntest_dict['y']"
- state = {"x": 3}
- result = evaluate(code, {"add_two": add_two}, state=state)
- assert result == 5
- self.assertDictEqual(state, {"x": 3, "test_dict": {"x": 3, "y": 5}})
-
- def test_evaluate_for(self):
- code = "x = 0\nfor i in range(3):\n x = i"
- state = {}
- result = evaluate(code, {"range": range}, state=state)
- assert result == 2
- self.assertDictEqual(state, {"x": 2, "i": 2})
diff --git a/tests/tools/test_text_classification.py b/tests/tools/test_text_classification.py
deleted file mode 100644
index b40067490c61..000000000000
--- a/tests/tools/test_text_classification.py
+++ /dev/null
@@ -1,43 +0,0 @@
-# coding=utf-8
-# Copyright 2023 HuggingFace Inc.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import unittest
-
-from transformers import load_tool
-
-from .test_tools_common import ToolTesterMixin
-
-
-class TextClassificationToolTester(unittest.TestCase, ToolTesterMixin):
- def setUp(self):
- self.tool = load_tool("text-classification")
- self.tool.setup()
- self.remote_tool = load_tool("text-classification", remote=True)
-
- def test_exact_match_arg(self):
- result = self.tool("That's quite cool", ["positive", "negative"])
- self.assertEqual(result, "positive")
-
- def test_exact_match_arg_remote(self):
- result = self.remote_tool("That's quite cool", ["positive", "negative"])
- self.assertEqual(result, "positive")
-
- def test_exact_match_kwarg(self):
- result = self.tool(text="That's quite cool", labels=["positive", "negative"])
- self.assertEqual(result, "positive")
-
- def test_exact_match_kwarg_remote(self):
- result = self.remote_tool(text="That's quite cool", labels=["positive", "negative"])
- self.assertEqual(result, "positive")
diff --git a/tests/tools/test_text_question_answering.py b/tests/tools/test_text_question_answering.py
deleted file mode 100644
index aed2898f0153..000000000000
--- a/tests/tools/test_text_question_answering.py
+++ /dev/null
@@ -1,52 +0,0 @@
-# coding=utf-8
-# Copyright 2023 HuggingFace Inc.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import unittest
-
-from transformers import load_tool
-
-from .test_tools_common import ToolTesterMixin
-
-
-TEXT = """
-Hugging Face was founded in 2016 by French entrepreneurs Clément Delangue, Julien Chaumond, and Thomas Wolf originally as a company that developed a chatbot app targeted at teenagers.[2] After open-sourcing the model behind the chatbot, the company pivoted to focus on being a platform for machine learning.
-
-In March 2021, Hugging Face raised $40 million in a Series B funding round.[3]
-
-On April 28, 2021, the company launched the BigScience Research Workshop in collaboration with several other research groups to release an open large language model.[4] In 2022, the workshop concluded with the announcement of BLOOM, a multilingual large language model with 176 billion parameters.[5]
-"""
-
-
-class TextQuestionAnsweringToolTester(unittest.TestCase, ToolTesterMixin):
- def setUp(self):
- self.tool = load_tool("text-question-answering")
- self.tool.setup()
- self.remote_tool = load_tool("text-question-answering", remote=True)
-
- def test_exact_match_arg(self):
- result = self.tool(TEXT, "What did Hugging Face do in April 2021?")
- self.assertEqual(result, "launched the BigScience Research Workshop")
-
- def test_exact_match_arg_remote(self):
- result = self.remote_tool(TEXT, "What did Hugging Face do in April 2021?")
- self.assertEqual(result, "launched the BigScience Research Workshop")
-
- def test_exact_match_kwarg(self):
- result = self.tool(text=TEXT, question="What did Hugging Face do in April 2021?")
- self.assertEqual(result, "launched the BigScience Research Workshop")
-
- def test_exact_match_kwarg_remote(self):
- result = self.remote_tool(text=TEXT, question="What did Hugging Face do in April 2021?")
- self.assertEqual(result, "launched the BigScience Research Workshop")
diff --git a/tests/tools/test_text_summarization.py b/tests/tools/test_text_summarization.py
deleted file mode 100644
index 162443f373a4..000000000000
--- a/tests/tools/test_text_summarization.py
+++ /dev/null
@@ -1,64 +0,0 @@
-# coding=utf-8
-# Copyright 2023 HuggingFace Inc.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import unittest
-
-from transformers import load_tool
-
-from .test_tools_common import ToolTesterMixin
-
-
-TEXT = """
-Hugging Face was founded in 2016 by French entrepreneurs Clément Delangue, Julien Chaumond, and Thomas Wolf originally as a company that developed a chatbot app targeted at teenagers.[2] After open-sourcing the model behind the chatbot, the company pivoted to focus on being a platform for machine learning.
-
-In March 2021, Hugging Face raised $40 million in a Series B funding round.[3]
-
-On April 28, 2021, the company launched the BigScience Research Workshop in collaboration with several other research groups to release an open large language model.[4] In 2022, the workshop concluded with the announcement of BLOOM, a multilingual large language model with 176 billion parameters.[5]
-"""
-
-
-class TextSummarizationToolTester(unittest.TestCase, ToolTesterMixin):
- def setUp(self):
- self.tool = load_tool("summarization")
- self.tool.setup()
- self.remote_tool = load_tool("summarization", remote=True)
-
- def test_exact_match_arg(self):
- result = self.tool(TEXT)
- self.assertEqual(
- result,
- "Hugging Face was founded in 2016 by French entrepreneurs Clément Delangue, Julien Chaumond, and Thomas Wolf. In March 2021, Hugging Face raised $40 million in a Series B funding round. On April 28, 2021, the company launched the BigScience Research Workshop in collaboration with several other research groups to release an open large language model. In 2022, the workshop concluded with the announcement of BLOOM.",
- )
-
- def test_exact_match_arg_remote(self):
- result = self.remote_tool(TEXT)
- self.assertEqual(
- result,
- "Hugging Face was founded in 2016 by French entrepreneurs Clément Delangue, Julien Chaumond, and Thomas Wolf. In March 2021, Hugging Face raised $40 million in a Series B funding round. On April 28, 2021, the company launched the BigScience Research Workshop in collaboration with several other research groups to release an open large language model. In 2022, the workshop concluded with the announcement of BLOOM.",
- )
-
- def test_exact_match_kwarg(self):
- result = self.tool(text=TEXT)
- self.assertEqual(
- result,
- "Hugging Face was founded in 2016 by French entrepreneurs Clément Delangue, Julien Chaumond, and Thomas Wolf. In March 2021, Hugging Face raised $40 million in a Series B funding round. On April 28, 2021, the company launched the BigScience Research Workshop in collaboration with several other research groups to release an open large language model. In 2022, the workshop concluded with the announcement of BLOOM.",
- )
-
- def test_exact_match_kwarg_remote(self):
- result = self.remote_tool(text=TEXT)
- self.assertEqual(
- result,
- "Hugging Face was founded in 2016 by French entrepreneurs Clément Delangue, Julien Chaumond, and Thomas Wolf. In March 2021, Hugging Face raised $40 million in a Series B funding round. On April 28, 2021, the company launched the BigScience Research Workshop in collaboration with several other research groups to release an open large language model. In 2022, the workshop concluded with the announcement of BLOOM.",
- )
diff --git a/tests/tools/test_tools_common.py b/tests/tools/test_tools_common.py
deleted file mode 100644
index 984edfcd8c60..000000000000
--- a/tests/tools/test_tools_common.py
+++ /dev/null
@@ -1,133 +0,0 @@
-# coding=utf-8
-# Copyright 2023 HuggingFace Inc.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-from pathlib import Path
-from typing import List
-
-from transformers import is_torch_available, is_vision_available
-from transformers.testing_utils import get_tests_dir, is_tool_test
-from transformers.tools.agent_types import AGENT_TYPE_MAPPING, AgentAudio, AgentImage, AgentText
-
-
-if is_torch_available():
- import torch
-
-if is_vision_available():
- from PIL import Image
-
-
-authorized_types = ["text", "image", "audio"]
-
-
-def create_inputs(input_types: List[str]):
- inputs = []
-
- for input_type in input_types:
- if input_type == "text":
- inputs.append("Text input")
- elif input_type == "image":
- inputs.append(
- Image.open(Path(get_tests_dir("fixtures/tests_samples/COCO")) / "000000039769.png").resize((512, 512))
- )
- elif input_type == "audio":
- inputs.append(torch.ones(3000))
- elif isinstance(input_type, list):
- inputs.append(create_inputs(input_type))
- else:
- raise ValueError(f"Invalid type requested: {input_type}")
-
- return inputs
-
-
-def output_types(outputs: List):
- output_types = []
-
- for output in outputs:
- if isinstance(output, (str, AgentText)):
- output_types.append("text")
- elif isinstance(output, (Image.Image, AgentImage)):
- output_types.append("image")
- elif isinstance(output, (torch.Tensor, AgentAudio)):
- output_types.append("audio")
- else:
- raise ValueError(f"Invalid output: {output}")
-
- return output_types
-
-
-@is_tool_test
-class ToolTesterMixin:
- def test_inputs_outputs(self):
- self.assertTrue(hasattr(self.tool, "inputs"))
- self.assertTrue(hasattr(self.tool, "outputs"))
-
- inputs = self.tool.inputs
- for _input in inputs:
- if isinstance(_input, list):
- for __input in _input:
- self.assertTrue(__input in authorized_types)
- else:
- self.assertTrue(_input in authorized_types)
-
- outputs = self.tool.outputs
- for _output in outputs:
- self.assertTrue(_output in authorized_types)
-
- def test_call(self):
- inputs = create_inputs(self.tool.inputs)
- outputs = self.tool(*inputs)
-
- # There is a single output
- if len(self.tool.outputs) == 1:
- outputs = [outputs]
-
- self.assertListEqual(output_types(outputs), self.tool.outputs)
-
- def test_common_attributes(self):
- self.assertTrue(hasattr(self.tool, "description"))
- self.assertTrue(hasattr(self.tool, "default_checkpoint"))
- self.assertTrue(self.tool.description.startswith("This is a tool that"))
-
- def test_agent_types_outputs(self):
- inputs = create_inputs(self.tool.inputs)
- outputs = self.tool(*inputs)
-
- if not isinstance(outputs, list):
- outputs = [outputs]
-
- self.assertEqual(len(outputs), len(self.tool.outputs))
-
- for output, output_type in zip(outputs, self.tool.outputs):
- agent_type = AGENT_TYPE_MAPPING[output_type]
- self.assertTrue(isinstance(output, agent_type))
-
- def test_agent_types_inputs(self):
- inputs = create_inputs(self.tool.inputs)
-
- _inputs = []
-
- for _input, input_type in zip(inputs, self.tool.inputs):
- if isinstance(input_type, list):
- _inputs.append([AGENT_TYPE_MAPPING[_input_type](_input) for _input_type in input_type])
- else:
- _inputs.append(AGENT_TYPE_MAPPING[input_type](_input))
-
- # Should not raise an error
- outputs = self.tool(*inputs)
-
- if not isinstance(outputs, list):
- outputs = [outputs]
-
- self.assertEqual(len(outputs), len(self.tool.outputs))
diff --git a/tests/tools/test_translation.py b/tests/tools/test_translation.py
deleted file mode 100644
index 15e1c8cd6ae4..000000000000
--- a/tests/tools/test_translation.py
+++ /dev/null
@@ -1,86 +0,0 @@
-# coding=utf-8
-# Copyright 2023 HuggingFace Inc.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import unittest
-
-from transformers import load_tool
-from transformers.tools.agent_types import AGENT_TYPE_MAPPING
-
-from .test_tools_common import ToolTesterMixin, output_types
-
-
-class TranslationToolTester(unittest.TestCase, ToolTesterMixin):
- def setUp(self):
- self.tool = load_tool("translation")
- self.tool.setup()
- self.remote_tool = load_tool("translation", remote=True)
-
- def test_exact_match_arg(self):
- result = self.tool("Hey, what's up?", src_lang="English", tgt_lang="French")
- self.assertEqual(result, "- Hé, comment ça va?")
-
- def test_exact_match_arg_remote(self):
- result = self.remote_tool("Hey, what's up?", src_lang="English", tgt_lang="French")
- self.assertEqual(result, "- Hé, comment ça va?")
-
- def test_exact_match_kwarg(self):
- result = self.tool(text="Hey, what's up?", src_lang="English", tgt_lang="French")
- self.assertEqual(result, "- Hé, comment ça va?")
-
- def test_exact_match_kwarg_remote(self):
- result = self.remote_tool(text="Hey, what's up?", src_lang="English", tgt_lang="French")
- self.assertEqual(result, "- Hé, comment ça va?")
-
- def test_call(self):
- inputs = ["Hey, what's up?", "English", "Spanish"]
- outputs = self.tool(*inputs)
-
- # There is a single output
- if len(self.tool.outputs) == 1:
- outputs = [outputs]
-
- self.assertListEqual(output_types(outputs), self.tool.outputs)
-
- def test_agent_types_outputs(self):
- inputs = ["Hey, what's up?", "English", "Spanish"]
- outputs = self.tool(*inputs)
-
- if not isinstance(outputs, list):
- outputs = [outputs]
-
- self.assertEqual(len(outputs), len(self.tool.outputs))
-
- for output, output_type in zip(outputs, self.tool.outputs):
- agent_type = AGENT_TYPE_MAPPING[output_type]
- self.assertTrue(isinstance(output, agent_type))
-
- def test_agent_types_inputs(self):
- inputs = ["Hey, what's up?", "English", "Spanish"]
-
- _inputs = []
-
- for _input, input_type in zip(inputs, self.tool.inputs):
- if isinstance(input_type, list):
- _inputs.append([AGENT_TYPE_MAPPING[_input_type](_input) for _input_type in input_type])
- else:
- _inputs.append(AGENT_TYPE_MAPPING[input_type](_input))
-
- # Should not raise an error
- outputs = self.tool(*inputs)
-
- if not isinstance(outputs, list):
- outputs = [outputs]
-
- self.assertEqual(len(outputs), len(self.tool.outputs))
diff --git a/tests/trainer/test_data_collator.py b/tests/trainer/test_data_collator.py
index f5104cd37507..36e1813258d1 100644
--- a/tests/trainer/test_data_collator.py
+++ b/tests/trainer/test_data_collator.py
@@ -23,6 +23,7 @@
BertTokenizer,
DataCollatorForLanguageModeling,
DataCollatorForPermutationLanguageModeling,
+ DataCollatorForSeq2Seq,
DataCollatorForTokenClassification,
DataCollatorForWholeWordMask,
DataCollatorWithPadding,
@@ -32,6 +33,7 @@
set_seed,
)
from transformers.testing_utils import require_tf, require_torch
+from transformers.utils import PaddingStrategy
if is_torch_available():
@@ -199,6 +201,83 @@ def test_data_collator_for_token_classification_works_with_pt_tensors(self):
self.assertEqual(batch["input_ids"].shape, torch.Size([2, 6]))
self.assertEqual(batch["input_ids"][0].tolist(), [0, 1, 2] + [tokenizer.pad_token_id] * 3)
+ def _test_data_collator_for_seq2seq(self, to_torch):
+ def create_features(to_torch):
+ if to_torch:
+ features = [
+ {"input_ids": torch.tensor(list(range(3))), "labels": torch.tensor(list(range(3)))},
+ {"input_ids": torch.tensor(list(range(6))), "labels": torch.tensor(list(range(6)))},
+ ]
+ else:
+ features = [
+ {"input_ids": list(range(3)), "labels": list(range(3))},
+ {"input_ids": list(range(6)), "labels": list(range(6))},
+ ]
+ return features
+
+ tokenizer = BertTokenizer(self.vocab_file)
+ features = create_features(to_torch)
+
+ data_collator = DataCollatorForSeq2Seq(tokenizer, padding=PaddingStrategy.LONGEST)
+ batch = data_collator(features)
+ self.assertEqual(batch["input_ids"].shape, torch.Size([2, 6]))
+ self.assertEqual(batch["input_ids"][0].tolist(), list(range(3)) + [tokenizer.pad_token_id] * 3)
+ self.assertEqual(batch["input_ids"][1].tolist(), list(range(6)))
+ self.assertEqual(batch["labels"].shape, torch.Size([2, 6]))
+ self.assertEqual(batch["labels"][0].tolist(), list(range(3)) + [-100] * 3)
+ self.assertEqual(batch["labels"][1].tolist(), list(range(6)))
+
+ data_collator = DataCollatorForSeq2Seq(tokenizer, padding=PaddingStrategy.MAX_LENGTH, max_length=7)
+ batch = data_collator(features)
+ self.assertEqual(batch["input_ids"].shape, torch.Size([2, 7]))
+ self.assertEqual(batch["input_ids"][0].tolist(), list(range(3)) + [tokenizer.pad_token_id] * 4)
+ self.assertEqual(batch["input_ids"][1].tolist(), list(range(6)) + [tokenizer.pad_token_id] * 1)
+ self.assertEqual(batch["labels"].shape, torch.Size([2, 7]))
+ self.assertEqual(batch["labels"][0].tolist(), list(range(3)) + [-100] * 4)
+ self.assertEqual(batch["labels"][1].tolist(), list(range(6)) + [-100] * 1)
+
+ data_collator = DataCollatorForSeq2Seq(tokenizer, padding=PaddingStrategy.DO_NOT_PAD)
+ with self.assertRaises(ValueError):
+ # expects an error due to unequal shapes to create tensor
+ data_collator(features)
+ batch = data_collator([features[0], features[0]])
+ input_ids = features[0]["input_ids"] if not to_torch else features[0]["input_ids"].tolist()
+ labels = features[0]["labels"] if not to_torch else features[0]["labels"].tolist()
+ self.assertEqual(batch["input_ids"][0].tolist(), input_ids)
+ self.assertEqual(batch["input_ids"][1].tolist(), input_ids)
+ self.assertEqual(batch["labels"][0].tolist(), labels)
+ self.assertEqual(batch["labels"][1].tolist(), labels)
+
+ data_collator = DataCollatorForSeq2Seq(tokenizer, padding=PaddingStrategy.LONGEST, pad_to_multiple_of=8)
+ batch = data_collator(features)
+ self.assertEqual(batch["input_ids"].shape, torch.Size([2, 8]))
+ self.assertEqual(batch["labels"].shape, torch.Size([2, 8]))
+
+ # side effects on labels cause mismatch on longest strategy
+ features = create_features(to_torch)
+
+ data_collator = DataCollatorForSeq2Seq(tokenizer, padding=PaddingStrategy.LONGEST, label_pad_token_id=-1)
+ batch = data_collator(features)
+ self.assertEqual(batch["input_ids"].shape, torch.Size([2, 6]))
+ self.assertEqual(batch["input_ids"][0].tolist(), list(range(3)) + [tokenizer.pad_token_id] * 3)
+ self.assertEqual(batch["input_ids"][1].tolist(), list(range(6)))
+ self.assertEqual(batch["labels"].shape, torch.Size([2, 6]))
+ self.assertEqual(batch["labels"][0].tolist(), list(range(3)) + [-1] * 3)
+ self.assertEqual(batch["labels"][1].tolist(), list(range(6)))
+
+ for feature in features:
+ feature.pop("labels")
+
+ batch = data_collator(features)
+ self.assertEqual(batch["input_ids"].shape, torch.Size([2, 6]))
+ self.assertEqual(batch["input_ids"][0].tolist(), list(range(3)) + [tokenizer.pad_token_id] * 3)
+
+ def test_data_collator_for_seq2seq_with_lists(self):
+ self._test_data_collator_for_seq2seq(to_torch=False)
+
+ def test_data_collator_for_seq2seq_with_pt(self):
+ self._test_data_collator_for_seq2seq(to_torch=True)
+
def _test_no_pad_and_pad(self, no_pad_features, pad_features):
tokenizer = BertTokenizer(self.vocab_file)
data_collator = DataCollatorForLanguageModeling(tokenizer, mlm=False)
@@ -360,6 +439,330 @@ def test_sop(self):
self.assertEqual(batch["sentence_order_label"].shape, torch.Size((2,)))
+@require_torch
+class DataCollatorImmutabilityTest(unittest.TestCase):
+ def setUp(self):
+ self.tmpdirname = tempfile.mkdtemp()
+
+ vocab_tokens = ["[UNK]", "[CLS]", "[SEP]", "[PAD]", "[MASK]"]
+ self.vocab_file = os.path.join(self.tmpdirname, "vocab.txt")
+ with open(self.vocab_file, "w", encoding="utf-8") as vocab_writer:
+ vocab_writer.write("".join([x + "\n" for x in vocab_tokens]))
+
+ def tearDown(self):
+ shutil.rmtree(self.tmpdirname)
+
+ def _turn_to_none(self, item):
+ """used to convert `item` to `None` type"""
+ return None
+
+ def _validate_original_data_against_collated_data(self, collator, original_data, batch_data):
+ # we only care about side effects, the results are tested elsewhere
+ collator(batch_data)
+
+ # we go through every item and convert to `primitive` datatypes if necessary
+ # then compares for equivalence for the original data and the data that has been passed through the collator
+ for original, batch in zip(original_data, batch_data):
+ for original_val, batch_val in zip(original.values(), batch.values()):
+ if isinstance(original_val, (np.ndarray, torch.Tensor)):
+ self.assertEqual(original_val.tolist(), batch_val.tolist())
+ else:
+ self.assertEqual(original_val, batch_val)
+
+ def _validate_original_data_against_collated_data_on_specified_keys_and_datatypes(
+ self, collator, base_data, input_key, input_datatype, label_key, label_datatype, ignore_label=False
+ ):
+ # using the arguments to recreate the features with their respective (potentially new) datatypes
+ features_original = [
+ {label_key: label_datatype(sample[label_key]), input_key: input_datatype(sample[input_key])}
+ for sample in base_data
+ ]
+ features_batch = [
+ {label_key: label_datatype(sample[label_key]), input_key: input_datatype(sample[input_key])}
+ for sample in base_data
+ ]
+
+ # some collators do not use labels, or sometimes we want to check if the collator with labels can handle such cases
+ if ignore_label:
+ for original, batch in zip(features_original, features_batch):
+ original.pop(label_key)
+ batch.pop(label_key)
+
+ self._validate_original_data_against_collated_data(
+ collator=collator, original_data=features_original, batch_data=features_batch
+ )
+
+ def test_default_collator_immutability(self):
+ features_base_single_label = [{"label": i, "inputs": (0, 1, 2, 3, 4, 5)} for i in range(4)]
+ features_base_multiple_labels = [{"label": (0, 1, 2), "inputs": (0, 1, 2, 3, 4, 5)} for i in range(4)]
+
+ for datatype_input, datatype_label in [
+ (list, int),
+ (list, float),
+ (np.array, int),
+ (np.array, torch.tensor),
+ (list, self._turn_to_none),
+ ]:
+ self._validate_original_data_against_collated_data_on_specified_keys_and_datatypes(
+ collator=default_data_collator,
+ base_data=features_base_single_label,
+ input_key="inputs",
+ input_datatype=datatype_input,
+ label_key="label",
+ label_datatype=datatype_label,
+ )
+
+ for datatype_input, datatype_label in [(list, list), (list, self._turn_to_none)]:
+ self._validate_original_data_against_collated_data_on_specified_keys_and_datatypes(
+ collator=default_data_collator,
+ base_data=features_base_multiple_labels,
+ input_key="inputs",
+ input_datatype=datatype_input,
+ label_key="label",
+ label_datatype=datatype_label,
+ )
+
+ features_base_single_label_alt = [{"input_ids": (0, 1, 2, 3, 4), "label": float(i)} for i in range(4)]
+ self._validate_original_data_against_collated_data_on_specified_keys_and_datatypes(
+ collator=default_data_collator,
+ base_data=features_base_single_label_alt,
+ input_key="input_ids",
+ input_datatype=list,
+ label_key="label",
+ label_datatype=float,
+ )
+
+ def test_with_padding_collator_immutability(self):
+ tokenizer = BertTokenizer(self.vocab_file)
+
+ features_original = [{"input_ids": [0, 1, 2]}, {"input_ids": [0, 1, 2, 3, 4, 5]}]
+ features_batch = [{"input_ids": [0, 1, 2]}, {"input_ids": [0, 1, 2, 3, 4, 5]}]
+
+ data_collator = DataCollatorWithPadding(tokenizer, padding="max_length", max_length=10)
+ self._validate_original_data_against_collated_data(
+ collator=data_collator, original_data=features_original, batch_data=features_batch
+ )
+
+ data_collator = DataCollatorWithPadding(tokenizer, pad_to_multiple_of=8)
+ self._validate_original_data_against_collated_data(
+ collator=data_collator, original_data=features_original, batch_data=features_batch
+ )
+
+ def test_for_token_classification_collator_immutability(self):
+ tokenizer = BertTokenizer(self.vocab_file)
+
+ features_base = [
+ {"input_ids": (0, 1, 2), "labels": (0, 1, 2)},
+ {"input_ids": (0, 1, 2, 3, 4, 5), "labels": (0, 1, 2, 3, 4, 5)},
+ ]
+ token_classification_collators = [
+ DataCollatorForTokenClassification(tokenizer),
+ DataCollatorForTokenClassification(tokenizer, padding="max_length", max_length=10),
+ DataCollatorForTokenClassification(tokenizer, pad_to_multiple_of=8),
+ DataCollatorForTokenClassification(tokenizer, label_pad_token_id=-1),
+ ]
+
+ for datatype_input, datatype_label in [(list, list), (torch.tensor, torch.tensor)]:
+ for collator in token_classification_collators:
+ self._validate_original_data_against_collated_data_on_specified_keys_and_datatypes(
+ collator=collator,
+ base_data=features_base,
+ input_key="input_ids",
+ input_datatype=datatype_input,
+ label_key="labels",
+ label_datatype=datatype_label,
+ )
+
+ self._validate_original_data_against_collated_data_on_specified_keys_and_datatypes(
+ collator=token_classification_collators[-1],
+ base_data=features_base,
+ input_key="input_ids",
+ input_datatype=datatype_input,
+ label_key="labels",
+ label_datatype=datatype_label,
+ ignore_label=True,
+ )
+
+ def test_seq2seq_collator_immutability(self):
+ tokenizer = BertTokenizer(self.vocab_file)
+
+ features_base = [
+ {"input_ids": list(range(3)), "labels": list(range(3))},
+ {"input_ids": list(range(6)), "labels": list(range(6))},
+ ]
+ seq2seq_collators = [
+ DataCollatorForSeq2Seq(tokenizer, padding=PaddingStrategy.LONGEST),
+ DataCollatorForSeq2Seq(tokenizer, padding=PaddingStrategy.MAX_LENGTH, max_length=7),
+ DataCollatorForSeq2Seq(tokenizer, padding=PaddingStrategy.LONGEST, pad_to_multiple_of=8),
+ DataCollatorForSeq2Seq(tokenizer, padding=PaddingStrategy.LONGEST, label_pad_token_id=-1),
+ ]
+
+ for datatype_input, datatype_label in [(list, list), (torch.tensor, torch.tensor)]:
+ for collator in seq2seq_collators:
+ self._validate_original_data_against_collated_data_on_specified_keys_and_datatypes(
+ collator=collator,
+ base_data=features_base,
+ input_key="input_ids",
+ input_datatype=datatype_input,
+ label_key="labels",
+ label_datatype=datatype_label,
+ )
+
+ self._validate_original_data_against_collated_data_on_specified_keys_and_datatypes(
+ collator=seq2seq_collators[-1],
+ base_data=features_base,
+ input_key="input_ids",
+ input_datatype=datatype_input,
+ label_key="labels",
+ label_datatype=datatype_label,
+ ignore_label=True,
+ )
+
+ features_base_no_pad = [
+ {"input_ids": list(range(3)), "labels": list(range(3))},
+ {"input_ids": list(range(3)), "labels": list(range(3))},
+ ]
+ seq2seq_no_padding_collator = DataCollatorForSeq2Seq(tokenizer, padding=PaddingStrategy.DO_NOT_PAD)
+ for datatype_input, datatype_label in [(list, list), (torch.tensor, torch.tensor)]:
+ self._validate_original_data_against_collated_data_on_specified_keys_and_datatypes(
+ collator=seq2seq_no_padding_collator,
+ base_data=features_base_no_pad,
+ input_key="input_ids",
+ input_datatype=datatype_input,
+ label_key="labels",
+ label_datatype=datatype_label,
+ )
+
+ def test_language_modelling_collator_immutability(self):
+ tokenizer = BertTokenizer(self.vocab_file)
+
+ features_base_no_pad = [
+ {"input_ids": tuple(range(10)), "labels": (1,)},
+ {"input_ids": tuple(range(10)), "labels": (1,)},
+ ]
+ features_base_pad = [
+ {"input_ids": tuple(range(5)), "labels": (1,)},
+ {"input_ids": tuple(range(5)), "labels": (1,)},
+ ]
+ lm_collators = [
+ DataCollatorForLanguageModeling(tokenizer, mlm=False),
+ DataCollatorForLanguageModeling(tokenizer, mlm=False, pad_to_multiple_of=8),
+ DataCollatorForLanguageModeling(tokenizer),
+ DataCollatorForLanguageModeling(tokenizer, pad_to_multiple_of=8),
+ ]
+
+ for datatype_input, datatype_label in [(list, list), (torch.tensor, torch.tensor)]:
+ for collator in lm_collators:
+ self._validate_original_data_against_collated_data_on_specified_keys_and_datatypes(
+ collator=collator,
+ base_data=features_base_no_pad,
+ input_key="input_ids",
+ input_datatype=datatype_input,
+ label_key="labels",
+ label_datatype=datatype_label,
+ ignore_label=True,
+ )
+
+ self._validate_original_data_against_collated_data_on_specified_keys_and_datatypes(
+ collator=collator,
+ base_data=features_base_pad,
+ input_key="input_ids",
+ input_datatype=datatype_input,
+ label_key="labels",
+ label_datatype=datatype_label,
+ ignore_label=True,
+ )
+
+ def test_whole_world_masking_collator_immutability(self):
+ tokenizer = BertTokenizer(self.vocab_file)
+
+ features_base = [
+ {"input_ids": list(range(10)), "labels": (1,)},
+ {"input_ids": list(range(10)), "labels": (1,)},
+ ]
+ whole_word_masking_collator = DataCollatorForWholeWordMask(tokenizer, return_tensors="pt")
+
+ for datatype_input, datatype_label in [(list, list), (np.array, np.array)]:
+ self._validate_original_data_against_collated_data_on_specified_keys_and_datatypes(
+ collator=whole_word_masking_collator,
+ base_data=features_base,
+ input_key="input_ids",
+ input_datatype=datatype_input,
+ label_key="labels",
+ label_datatype=datatype_label,
+ ignore_label=True,
+ )
+
+ def test_permutation_language_modelling_collator_immutability(self):
+ tokenizer = BertTokenizer(self.vocab_file)
+
+ plm_collator = DataCollatorForPermutationLanguageModeling(tokenizer)
+
+ no_pad_features_original = [{"input_ids": list(range(10))}, {"input_ids": list(range(10))}]
+ no_pad_features_batch = [{"input_ids": list(range(10))}, {"input_ids": list(range(10))}]
+ self._validate_original_data_against_collated_data(
+ collator=plm_collator, original_data=no_pad_features_original, batch_data=no_pad_features_batch
+ )
+
+ pad_features_original = [{"input_ids": list(range(5))}, {"input_ids": list(range(10))}]
+ pad_features_batch = [{"input_ids": list(range(5))}, {"input_ids": list(range(10))}]
+ self._validate_original_data_against_collated_data(
+ collator=plm_collator, original_data=pad_features_original, batch_data=pad_features_batch
+ )
+
+ def test_next_sentence_prediction_collator_immutability(self):
+ tokenizer = BertTokenizer(self.vocab_file)
+
+ features_original = [
+ {"input_ids": [0, 1, 2, 3, 4], "token_type_ids": [0, 1, 2, 3, 4], "next_sentence_label": i}
+ for i in range(2)
+ ]
+ features_batch = [
+ {"input_ids": [0, 1, 2, 3, 4], "token_type_ids": [0, 1, 2, 3, 4], "next_sentence_label": i}
+ for i in range(2)
+ ]
+
+ nsp_collator = DataCollatorForLanguageModeling(tokenizer)
+ self._validate_original_data_against_collated_data(
+ collator=nsp_collator, original_data=features_original, batch_data=features_batch
+ )
+
+ nsp_collator = DataCollatorForLanguageModeling(tokenizer, pad_to_multiple_of=8)
+ self._validate_original_data_against_collated_data(
+ collator=nsp_collator, original_data=features_original, batch_data=features_batch
+ )
+
+ def test_sentence_order_prediction_collator_immutability(self):
+ tokenizer = BertTokenizer(self.vocab_file)
+
+ features_original = [
+ {
+ "input_ids": torch.tensor([0, 1, 2, 3, 4]),
+ "token_type_ids": torch.tensor([0, 1, 2, 3, 4]),
+ "sentence_order_label": i,
+ }
+ for i in range(2)
+ ]
+ features_batch = [
+ {
+ "input_ids": torch.tensor([0, 1, 2, 3, 4]),
+ "token_type_ids": torch.tensor([0, 1, 2, 3, 4]),
+ "sentence_order_label": i,
+ }
+ for i in range(2)
+ ]
+
+ sop_collator = DataCollatorForLanguageModeling(tokenizer)
+ self._validate_original_data_against_collated_data(
+ collator=sop_collator, original_data=features_original, batch_data=features_batch
+ )
+
+ sop_collator = DataCollatorForLanguageModeling(tokenizer, pad_to_multiple_of=8)
+ self._validate_original_data_against_collated_data(
+ collator=sop_collator, original_data=features_original, batch_data=features_batch
+ )
+
+
@require_tf
class TFDataCollatorIntegrationTest(unittest.TestCase):
def setUp(self):
@@ -484,6 +887,74 @@ def test_data_collator_for_token_classification(self):
self.assertEqual(batch["labels"].shape.as_list(), [2, 6])
self.assertEqual(batch["labels"][0].numpy().tolist(), [0, 1, 2] + [-1] * 3)
+ def test_data_collator_for_seq2seq(self):
+ def create_features():
+ return [
+ {"input_ids": list(range(3)), "labels": list(range(3))},
+ {"input_ids": list(range(6)), "labels": list(range(6))},
+ ]
+
+ tokenizer = BertTokenizer(self.vocab_file)
+ features = create_features()
+
+ data_collator = DataCollatorForSeq2Seq(tokenizer, padding=PaddingStrategy.LONGEST, return_tensors="tf")
+ batch = data_collator(features)
+ self.assertEqual(batch["input_ids"].shape.as_list(), [2, 6])
+ self.assertEqual(batch["input_ids"][0].numpy().tolist(), list(range(3)) + [tokenizer.pad_token_id] * 3)
+ self.assertEqual(batch["input_ids"][1].numpy().tolist(), list(range(6)))
+ self.assertEqual(batch["labels"].shape.as_list(), [2, 6])
+ self.assertEqual(batch["labels"][0].numpy().tolist(), list(range(3)) + [-100] * 3)
+ self.assertEqual(batch["labels"][1].numpy().tolist(), list(range(6)))
+
+ data_collator = DataCollatorForSeq2Seq(
+ tokenizer, padding=PaddingStrategy.MAX_LENGTH, max_length=7, return_tensors="tf"
+ )
+ batch = data_collator(features)
+ self.assertEqual(batch["input_ids"].shape.as_list(), [2, 7])
+ self.assertEqual(batch["input_ids"][0].numpy().tolist(), list(range(3)) + [tokenizer.pad_token_id] * 4)
+ self.assertEqual(batch["input_ids"][1].numpy().tolist(), list(range(6)) + [tokenizer.pad_token_id] * 1)
+ self.assertEqual(batch["labels"].shape.as_list(), [2, 7])
+ self.assertEqual(batch["labels"][0].numpy().tolist(), list(range(3)) + [-100] * 4)
+ self.assertEqual(batch["labels"][1].numpy().tolist(), list(range(6)) + [-100] * 1)
+
+ data_collator = DataCollatorForSeq2Seq(tokenizer, padding=PaddingStrategy.DO_NOT_PAD, return_tensors="tf")
+ with self.assertRaises(ValueError):
+ # expects an error due to unequal shapes to create tensor
+ data_collator(features)
+ batch = data_collator([features[0], features[0]])
+ self.assertEqual(batch["input_ids"][0].numpy().tolist(), features[0]["input_ids"])
+ self.assertEqual(batch["input_ids"][1].numpy().tolist(), features[0]["input_ids"])
+ self.assertEqual(batch["labels"][0].numpy().tolist(), features[0]["labels"])
+ self.assertEqual(batch["labels"][1].numpy().tolist(), features[0]["labels"])
+
+ data_collator = DataCollatorForSeq2Seq(
+ tokenizer, padding=PaddingStrategy.LONGEST, pad_to_multiple_of=8, return_tensors="tf"
+ )
+ batch = data_collator(features)
+ self.assertEqual(batch["input_ids"].shape.as_list(), [2, 8])
+ self.assertEqual(batch["labels"].shape.as_list(), [2, 8])
+
+ # side effects on labels cause mismatch on longest strategy
+ features = create_features()
+
+ data_collator = DataCollatorForSeq2Seq(
+ tokenizer, padding=PaddingStrategy.LONGEST, label_pad_token_id=-1, return_tensors="tf"
+ )
+ batch = data_collator(features)
+ self.assertEqual(batch["input_ids"].shape.as_list(), [2, 6])
+ self.assertEqual(batch["input_ids"][0].numpy().tolist(), list(range(3)) + [tokenizer.pad_token_id] * 3)
+ self.assertEqual(batch["input_ids"][1].numpy().tolist(), list(range(6)))
+ self.assertEqual(batch["labels"].shape.as_list(), [2, 6])
+ self.assertEqual(batch["labels"][0].numpy().tolist(), list(range(3)) + [-1] * 3)
+ self.assertEqual(batch["labels"][1].numpy().tolist(), list(range(6)))
+
+ for feature in features:
+ feature.pop("labels")
+
+ batch = data_collator(features)
+ self.assertEqual(batch["input_ids"].shape.as_list(), [2, 6])
+ self.assertEqual(batch["input_ids"][0].numpy().tolist(), list(range(3)) + [tokenizer.pad_token_id] * 3)
+
def _test_no_pad_and_pad(self, no_pad_features, pad_features):
tokenizer = BertTokenizer(self.vocab_file)
data_collator = DataCollatorForLanguageModeling(tokenizer, mlm=False, return_tensors="tf")
@@ -647,6 +1118,338 @@ def test_sop(self):
self.assertEqual(batch["sentence_order_label"].shape.as_list(), [2])
+@require_tf
+class TFDataCollatorImmutabilityTest(unittest.TestCase):
+ def setUp(self):
+ self.tmpdirname = tempfile.mkdtemp()
+
+ vocab_tokens = ["[UNK]", "[CLS]", "[SEP]", "[PAD]", "[MASK]"]
+ self.vocab_file = os.path.join(self.tmpdirname, "vocab.txt")
+ with open(self.vocab_file, "w", encoding="utf-8") as vocab_writer:
+ vocab_writer.write("".join([x + "\n" for x in vocab_tokens]))
+
+ def tearDown(self):
+ shutil.rmtree(self.tmpdirname)
+
+ def _turn_to_none(self, item):
+ """used to convert `item` to `None` type"""
+ return None
+
+ def _validate_original_data_against_collated_data(self, collator, original_data, batch_data):
+ # we only care about side effects, the results are tested elsewhere
+ collator(batch_data)
+
+ # we go through every item and convert to `primitive` datatypes if necessary
+ # then compares for equivalence for the original data and the data that has been passed through the collator
+ for original, batch in zip(original_data, batch_data):
+ for original_val, batch_val in zip(original.values(), batch.values()):
+ if isinstance(original_val, np.ndarray):
+ self.assertEqual(original_val.tolist(), batch_val.tolist())
+ elif isinstance(original_val, tf.Tensor):
+ self.assertEqual(original_val.numpy().tolist(), batch_val.numpy().tolist())
+ else:
+ self.assertEqual(original_val, batch_val)
+
+ def _validate_original_data_against_collated_data_on_specified_keys_and_datatypes(
+ self, collator, base_data, input_key, input_datatype, label_key, label_datatype, ignore_label=False
+ ):
+ # using the arguments to recreate the features with their respective (potentially new) datatypes
+ features_original = [
+ {label_key: label_datatype(sample[label_key]), input_key: input_datatype(sample[input_key])}
+ for sample in base_data
+ ]
+ features_batch = [
+ {label_key: label_datatype(sample[label_key]), input_key: input_datatype(sample[input_key])}
+ for sample in base_data
+ ]
+
+ # some collators do not use labels, or sometimes we want to check if the collator with labels can handle such cases
+ if ignore_label:
+ for original, batch in zip(features_original, features_batch):
+ original.pop(label_key)
+ batch.pop(label_key)
+
+ self._validate_original_data_against_collated_data(
+ collator=collator, original_data=features_original, batch_data=features_batch
+ )
+
+ def test_default_collator_immutability(self):
+ features_base_single_label = [{"label": i, "inputs": (0, 1, 2, 3, 4, 5)} for i in range(4)]
+ features_base_multiple_labels = [{"label": (0, 1, 2), "inputs": (0, 1, 2, 3, 4, 5)} for i in range(4)]
+
+ for datatype_input, datatype_label in [
+ (list, int),
+ (list, float),
+ (np.array, int),
+ (np.array, tf.constant),
+ (list, self._turn_to_none),
+ ]:
+ self._validate_original_data_against_collated_data_on_specified_keys_and_datatypes(
+ collator=lambda x: default_data_collator(x, return_tensors="tf"),
+ base_data=features_base_single_label,
+ input_key="inputs",
+ input_datatype=datatype_input,
+ label_key="label",
+ label_datatype=datatype_label,
+ )
+
+ for datatype_input, datatype_label in [(list, list), (list, self._turn_to_none)]:
+ self._validate_original_data_against_collated_data_on_specified_keys_and_datatypes(
+ collator=lambda x: default_data_collator(x, return_tensors="tf"),
+ base_data=features_base_multiple_labels,
+ input_key="inputs",
+ input_datatype=datatype_input,
+ label_key="label",
+ label_datatype=datatype_label,
+ )
+
+ features_base_single_label_alt = [{"input_ids": (0, 1, 2, 3, 4), "label": float(i)} for i in range(4)]
+ self._validate_original_data_against_collated_data_on_specified_keys_and_datatypes(
+ collator=lambda x: default_data_collator(x, return_tensors="tf"),
+ base_data=features_base_single_label_alt,
+ input_key="input_ids",
+ input_datatype=list,
+ label_key="label",
+ label_datatype=float,
+ )
+
+ def test_with_padding_collator_immutability(self):
+ tokenizer = BertTokenizer(self.vocab_file)
+
+ features_original = [{"input_ids": [0, 1, 2]}, {"input_ids": [0, 1, 2, 3, 4, 5]}]
+ features_batch = [{"input_ids": [0, 1, 2]}, {"input_ids": [0, 1, 2, 3, 4, 5]}]
+
+ data_collator = DataCollatorWithPadding(tokenizer, padding="max_length", max_length=10, return_tensors="tf")
+ self._validate_original_data_against_collated_data(
+ collator=data_collator, original_data=features_original, batch_data=features_batch
+ )
+
+ data_collator = DataCollatorWithPadding(tokenizer, pad_to_multiple_of=8, return_tensors="tf")
+ self._validate_original_data_against_collated_data(
+ collator=data_collator, original_data=features_original, batch_data=features_batch
+ )
+
+ def test_for_token_classification_collator_immutability(self):
+ tokenizer = BertTokenizer(self.vocab_file)
+
+ features_base = [
+ {"input_ids": (0, 1, 2), "labels": (0, 1, 2)},
+ {"input_ids": (0, 1, 2, 3, 4, 5), "labels": (0, 1, 2, 3, 4, 5)},
+ ]
+ token_classification_collators = [
+ DataCollatorForTokenClassification(tokenizer, return_tensors="tf"),
+ DataCollatorForTokenClassification(tokenizer, padding="max_length", max_length=10, return_tensors="tf"),
+ DataCollatorForTokenClassification(tokenizer, pad_to_multiple_of=8, return_tensors="tf"),
+ DataCollatorForTokenClassification(tokenizer, label_pad_token_id=-1, return_tensors="tf"),
+ ]
+
+ for datatype_input, datatype_label in [(list, list)]:
+ for collator in token_classification_collators:
+ self._validate_original_data_against_collated_data_on_specified_keys_and_datatypes(
+ collator=collator,
+ base_data=features_base,
+ input_key="input_ids",
+ input_datatype=datatype_input,
+ label_key="labels",
+ label_datatype=datatype_label,
+ )
+
+ self._validate_original_data_against_collated_data_on_specified_keys_and_datatypes(
+ collator=token_classification_collators[-1],
+ base_data=features_base,
+ input_key="input_ids",
+ input_datatype=datatype_input,
+ label_key="labels",
+ label_datatype=datatype_label,
+ ignore_label=True,
+ )
+
+ def test_seq2seq_collator_immutability(self):
+ tokenizer = BertTokenizer(self.vocab_file)
+
+ features_base = [
+ {"input_ids": list(range(3)), "labels": list(range(3))},
+ {"input_ids": list(range(6)), "labels": list(range(6))},
+ ]
+ seq2seq_collators = [
+ DataCollatorForSeq2Seq(tokenizer, padding=PaddingStrategy.LONGEST, return_tensors="tf"),
+ DataCollatorForSeq2Seq(tokenizer, padding=PaddingStrategy.MAX_LENGTH, max_length=7, return_tensors="tf"),
+ DataCollatorForSeq2Seq(
+ tokenizer, padding=PaddingStrategy.LONGEST, pad_to_multiple_of=8, return_tensors="tf"
+ ),
+ DataCollatorForSeq2Seq(
+ tokenizer, padding=PaddingStrategy.LONGEST, label_pad_token_id=-1, return_tensors="tf"
+ ),
+ ]
+
+ for datatype_input, datatype_label in [(list, list)]:
+ for collator in seq2seq_collators:
+ self._validate_original_data_against_collated_data_on_specified_keys_and_datatypes(
+ collator=collator,
+ base_data=features_base,
+ input_key="input_ids",
+ input_datatype=datatype_input,
+ label_key="labels",
+ label_datatype=datatype_label,
+ )
+
+ self._validate_original_data_against_collated_data_on_specified_keys_and_datatypes(
+ collator=seq2seq_collators[-1],
+ base_data=features_base,
+ input_key="input_ids",
+ input_datatype=datatype_input,
+ label_key="labels",
+ label_datatype=datatype_label,
+ ignore_label=True,
+ )
+
+ features_base_no_pad = [
+ {"input_ids": list(range(3)), "labels": list(range(3))},
+ {"input_ids": list(range(3)), "labels": list(range(3))},
+ ]
+ seq2seq_no_padding_collator = DataCollatorForSeq2Seq(
+ tokenizer, padding=PaddingStrategy.DO_NOT_PAD, return_tensors="tf"
+ )
+ for datatype_input, datatype_label in [(list, list)]:
+ self._validate_original_data_against_collated_data_on_specified_keys_and_datatypes(
+ collator=seq2seq_no_padding_collator,
+ base_data=features_base_no_pad,
+ input_key="input_ids",
+ input_datatype=datatype_input,
+ label_key="labels",
+ label_datatype=datatype_label,
+ )
+
+ def test_language_modelling_collator_immutability(self):
+ tokenizer = BertTokenizer(self.vocab_file)
+
+ features_base_no_pad = [
+ {"input_ids": tuple(range(10)), "labels": (1,)},
+ {"input_ids": tuple(range(10)), "labels": (1,)},
+ ]
+ features_base_pad = [
+ {"input_ids": tuple(range(5)), "labels": (1,)},
+ {"input_ids": tuple(range(5)), "labels": (1,)},
+ ]
+ lm_collators = [
+ DataCollatorForLanguageModeling(tokenizer, mlm=False, return_tensors="tf"),
+ DataCollatorForLanguageModeling(tokenizer, mlm=False, pad_to_multiple_of=8, return_tensors="tf"),
+ DataCollatorForLanguageModeling(tokenizer, return_tensors="tf"),
+ DataCollatorForLanguageModeling(tokenizer, pad_to_multiple_of=8, return_tensors="tf"),
+ ]
+
+ for datatype_input, datatype_label in [(list, list)]:
+ for collator in lm_collators:
+ self._validate_original_data_against_collated_data_on_specified_keys_and_datatypes(
+ collator=collator,
+ base_data=features_base_no_pad,
+ input_key="input_ids",
+ input_datatype=datatype_input,
+ label_key="labels",
+ label_datatype=datatype_label,
+ ignore_label=True,
+ )
+
+ self._validate_original_data_against_collated_data_on_specified_keys_and_datatypes(
+ collator=collator,
+ base_data=features_base_pad,
+ input_key="input_ids",
+ input_datatype=datatype_input,
+ label_key="labels",
+ label_datatype=datatype_label,
+ ignore_label=True,
+ )
+
+ def test_whole_world_masking_collator_immutability(self):
+ tokenizer = BertTokenizer(self.vocab_file)
+
+ features_base = [
+ {"input_ids": list(range(10)), "labels": (1,)},
+ {"input_ids": list(range(10)), "labels": (1,)},
+ ]
+ whole_word_masking_collator = DataCollatorForWholeWordMask(tokenizer, return_tensors="tf")
+
+ for datatype_input, datatype_label in [(list, list), (np.array, np.array)]:
+ self._validate_original_data_against_collated_data_on_specified_keys_and_datatypes(
+ collator=whole_word_masking_collator,
+ base_data=features_base,
+ input_key="input_ids",
+ input_datatype=datatype_input,
+ label_key="labels",
+ label_datatype=datatype_label,
+ ignore_label=True,
+ )
+
+ def test_permutation_language_modelling_collator_immutability(self):
+ tokenizer = BertTokenizer(self.vocab_file)
+
+ plm_collator = DataCollatorForPermutationLanguageModeling(tokenizer, return_tensors="tf")
+
+ no_pad_features_original = [{"input_ids": list(range(10))}, {"input_ids": list(range(10))}]
+ no_pad_features_batch = [{"input_ids": list(range(10))}, {"input_ids": list(range(10))}]
+ self._validate_original_data_against_collated_data(
+ collator=plm_collator, original_data=no_pad_features_original, batch_data=no_pad_features_batch
+ )
+
+ pad_features_original = [{"input_ids": list(range(5))}, {"input_ids": list(range(10))}]
+ pad_features_batch = [{"input_ids": list(range(5))}, {"input_ids": list(range(10))}]
+ self._validate_original_data_against_collated_data(
+ collator=plm_collator, original_data=pad_features_original, batch_data=pad_features_batch
+ )
+
+ def test_next_sentence_prediction_collator_immutability(self):
+ tokenizer = BertTokenizer(self.vocab_file)
+
+ features_original = [
+ {"input_ids": [0, 1, 2, 3, 4], "token_type_ids": [0, 1, 2, 3, 4], "next_sentence_label": i}
+ for i in range(2)
+ ]
+ features_batch = [
+ {"input_ids": [0, 1, 2, 3, 4], "token_type_ids": [0, 1, 2, 3, 4], "next_sentence_label": i}
+ for i in range(2)
+ ]
+
+ nsp_collator = DataCollatorForLanguageModeling(tokenizer, return_tensors="tf")
+ self._validate_original_data_against_collated_data(
+ collator=nsp_collator, original_data=features_original, batch_data=features_batch
+ )
+
+ nsp_collator = DataCollatorForLanguageModeling(tokenizer, pad_to_multiple_of=8, return_tensors="tf")
+ self._validate_original_data_against_collated_data(
+ collator=nsp_collator, original_data=features_original, batch_data=features_batch
+ )
+
+ def test_sentence_order_prediction_collator_immutability(self):
+ tokenizer = BertTokenizer(self.vocab_file)
+
+ features_original = [
+ {
+ "input_ids": tf.convert_to_tensor([0, 1, 2, 3, 4]),
+ "token_type_ids": tf.convert_to_tensor([0, 1, 2, 3, 4]),
+ "sentence_order_label": i,
+ }
+ for i in range(2)
+ ]
+ features_batch = [
+ {
+ "input_ids": tf.convert_to_tensor([0, 1, 2, 3, 4]),
+ "token_type_ids": tf.convert_to_tensor([0, 1, 2, 3, 4]),
+ "sentence_order_label": i,
+ }
+ for i in range(2)
+ ]
+
+ sop_collator = DataCollatorForLanguageModeling(tokenizer, return_tensors="tf")
+ self._validate_original_data_against_collated_data(
+ collator=sop_collator, original_data=features_original, batch_data=features_batch
+ )
+
+ sop_collator = DataCollatorForLanguageModeling(tokenizer, pad_to_multiple_of=8, return_tensors="tf")
+ self._validate_original_data_against_collated_data(
+ collator=sop_collator, original_data=features_original, batch_data=features_batch
+ )
+
+
class NumpyDataCollatorIntegrationTest(unittest.TestCase):
def setUp(self):
self.tmpdirname = tempfile.mkdtemp()
@@ -761,6 +1564,74 @@ def test_data_collator_for_token_classification(self):
self.assertEqual(batch["labels"].shape, (2, 6))
self.assertEqual(batch["labels"][0].tolist(), [0, 1, 2] + [-1] * 3)
+ def test_data_collator_for_seq2seq(self):
+ def create_features():
+ return [
+ {"input_ids": list(range(3)), "labels": list(range(3))},
+ {"input_ids": list(range(6)), "labels": list(range(6))},
+ ]
+
+ tokenizer = BertTokenizer(self.vocab_file)
+ features = create_features()
+
+ data_collator = DataCollatorForSeq2Seq(tokenizer, padding=PaddingStrategy.LONGEST, return_tensors="np")
+ batch = data_collator(features)
+ self.assertEqual(batch["input_ids"].shape, (2, 6))
+ self.assertEqual(batch["input_ids"][0].tolist(), list(range(3)) + [tokenizer.pad_token_id] * 3)
+ self.assertEqual(batch["input_ids"][1].tolist(), list(range(6)))
+ self.assertEqual(batch["labels"].shape, (2, 6))
+ self.assertEqual(batch["labels"][0].tolist(), list(range(3)) + [-100] * 3)
+ self.assertEqual(batch["labels"][1].tolist(), list(range(6)))
+
+ data_collator = DataCollatorForSeq2Seq(
+ tokenizer, padding=PaddingStrategy.MAX_LENGTH, max_length=7, return_tensors="np"
+ )
+ batch = data_collator(features)
+ self.assertEqual(batch["input_ids"].shape, (2, 7))
+ self.assertEqual(batch["input_ids"][0].tolist(), list(range(3)) + [tokenizer.pad_token_id] * 4)
+ self.assertEqual(batch["input_ids"][1].tolist(), list(range(6)) + [tokenizer.pad_token_id] * 1)
+ self.assertEqual(batch["labels"].shape, (2, 7))
+ self.assertEqual(batch["labels"][0].tolist(), list(range(3)) + [-100] * 4)
+ self.assertEqual(batch["labels"][1].tolist(), list(range(6)) + [-100] * 1)
+
+ data_collator = DataCollatorForSeq2Seq(tokenizer, padding=PaddingStrategy.DO_NOT_PAD, return_tensors="np")
+ # numpy doesn't have issues handling unequal shapes via `dtype=object`
+ # with self.assertRaises(ValueError):
+ # data_collator(features)
+ batch = data_collator([features[0], features[0]])
+ self.assertEqual(batch["input_ids"][0].tolist(), features[0]["input_ids"])
+ self.assertEqual(batch["input_ids"][1].tolist(), features[0]["input_ids"])
+ self.assertEqual(batch["labels"][0].tolist(), features[0]["labels"])
+ self.assertEqual(batch["labels"][1].tolist(), features[0]["labels"])
+
+ data_collator = DataCollatorForSeq2Seq(
+ tokenizer, padding=PaddingStrategy.LONGEST, pad_to_multiple_of=8, return_tensors="np"
+ )
+ batch = data_collator(features)
+ self.assertEqual(batch["input_ids"].shape, (2, 8))
+ self.assertEqual(batch["labels"].shape, (2, 8))
+
+ # side effects on labels cause mismatch on longest strategy
+ features = create_features()
+
+ data_collator = DataCollatorForSeq2Seq(
+ tokenizer, padding=PaddingStrategy.LONGEST, label_pad_token_id=-1, return_tensors="np"
+ )
+ batch = data_collator(features)
+ self.assertEqual(batch["input_ids"].shape, (2, 6))
+ self.assertEqual(batch["input_ids"][0].tolist(), list(range(3)) + [tokenizer.pad_token_id] * 3)
+ self.assertEqual(batch["input_ids"][1].tolist(), list(range(6)))
+ self.assertEqual(batch["labels"].shape, (2, 6))
+ self.assertEqual(batch["labels"][0].tolist(), list(range(3)) + [-1] * 3)
+ self.assertEqual(batch["labels"][1].tolist(), list(range(6)))
+
+ for feature in features:
+ feature.pop("labels")
+
+ batch = data_collator(features)
+ self.assertEqual(batch["input_ids"].shape, (2, 6))
+ self.assertEqual(batch["input_ids"][0].tolist(), list(range(3)) + [tokenizer.pad_token_id] * 3)
+
def _test_no_pad_and_pad(self, no_pad_features, pad_features):
tokenizer = BertTokenizer(self.vocab_file)
data_collator = DataCollatorForLanguageModeling(tokenizer, mlm=False, return_tensors="np")
@@ -922,3 +1793,332 @@ def test_sop(self):
self.assertEqual(batch["token_type_ids"].shape, (2, 8))
self.assertEqual(batch["labels"].shape, (2, 8))
self.assertEqual(batch["sentence_order_label"].shape, (2,))
+
+
+class NumpyDataCollatorImmutabilityTest(unittest.TestCase):
+ def setUp(self):
+ self.tmpdirname = tempfile.mkdtemp()
+
+ vocab_tokens = ["[UNK]", "[CLS]", "[SEP]", "[PAD]", "[MASK]"]
+ self.vocab_file = os.path.join(self.tmpdirname, "vocab.txt")
+ with open(self.vocab_file, "w", encoding="utf-8") as vocab_writer:
+ vocab_writer.write("".join([x + "\n" for x in vocab_tokens]))
+
+ def tearDown(self):
+ shutil.rmtree(self.tmpdirname)
+
+ def _turn_to_none(self, item):
+ """used to convert `item` to `None` type"""
+ return None
+
+ def _validate_original_data_against_collated_data(self, collator, original_data, batch_data):
+ # we only care about side effects, the results are tested elsewhere
+ collator(batch_data)
+
+ # we go through every item and convert to `primitive` datatypes if necessary
+ # then compares for equivalence for the original data and the data that has been passed through the collator
+ for original, batch in zip(original_data, batch_data):
+ for original_val, batch_val in zip(original.values(), batch.values()):
+ if isinstance(original_val, np.ndarray):
+ self.assertEqual(original_val.tolist(), batch_val.tolist())
+ else:
+ self.assertEqual(original_val, batch_val)
+
+ def _validate_original_data_against_collated_data_on_specified_keys_and_datatypes(
+ self, collator, base_data, input_key, input_datatype, label_key, label_datatype, ignore_label=False
+ ):
+ # using the arguments to recreate the features with their respective (potentially new) datatypes
+ features_original = [
+ {label_key: label_datatype(sample[label_key]), input_key: input_datatype(sample[input_key])}
+ for sample in base_data
+ ]
+ features_batch = [
+ {label_key: label_datatype(sample[label_key]), input_key: input_datatype(sample[input_key])}
+ for sample in base_data
+ ]
+
+ # some collators do not use labels, or sometimes we want to check if the collator with labels can handle such cases
+ if ignore_label:
+ for original, batch in zip(features_original, features_batch):
+ original.pop(label_key)
+ batch.pop(label_key)
+
+ self._validate_original_data_against_collated_data(
+ collator=collator, original_data=features_original, batch_data=features_batch
+ )
+
+ def test_default_collator_immutability(self):
+ features_base_single_label = [{"label": i, "inputs": (0, 1, 2, 3, 4, 5)} for i in range(4)]
+ features_base_multiple_labels = [{"label": (0, 1, 2), "inputs": (0, 1, 2, 3, 4, 5)} for i in range(4)]
+
+ for datatype_input, datatype_label in [
+ (list, int),
+ (list, float),
+ (np.array, int),
+ (np.array, np.array),
+ (list, self._turn_to_none),
+ ]:
+ self._validate_original_data_against_collated_data_on_specified_keys_and_datatypes(
+ collator=lambda x: default_data_collator(x, return_tensors="np"),
+ base_data=features_base_single_label,
+ input_key="inputs",
+ input_datatype=datatype_input,
+ label_key="label",
+ label_datatype=datatype_label,
+ )
+
+ for datatype_input, datatype_label in [(list, list), (list, self._turn_to_none)]:
+ self._validate_original_data_against_collated_data_on_specified_keys_and_datatypes(
+ collator=lambda x: default_data_collator(x, return_tensors="np"),
+ base_data=features_base_multiple_labels,
+ input_key="inputs",
+ input_datatype=datatype_input,
+ label_key="label",
+ label_datatype=datatype_label,
+ )
+
+ features_base_single_label_alt = [{"input_ids": (0, 1, 2, 3, 4), "label": float(i)} for i in range(4)]
+ self._validate_original_data_against_collated_data_on_specified_keys_and_datatypes(
+ collator=lambda x: default_data_collator(x, return_tensors="np"),
+ base_data=features_base_single_label_alt,
+ input_key="input_ids",
+ input_datatype=list,
+ label_key="label",
+ label_datatype=float,
+ )
+
+ def test_with_padding_collator_immutability(self):
+ tokenizer = BertTokenizer(self.vocab_file)
+
+ features_original = [{"input_ids": [0, 1, 2]}, {"input_ids": [0, 1, 2, 3, 4, 5]}]
+ features_batch = [{"input_ids": [0, 1, 2]}, {"input_ids": [0, 1, 2, 3, 4, 5]}]
+
+ data_collator = DataCollatorWithPadding(tokenizer, padding="max_length", max_length=10, return_tensors="np")
+ self._validate_original_data_against_collated_data(
+ collator=data_collator, original_data=features_original, batch_data=features_batch
+ )
+
+ data_collator = DataCollatorWithPadding(tokenizer, pad_to_multiple_of=8, return_tensors="np")
+ self._validate_original_data_against_collated_data(
+ collator=data_collator, original_data=features_original, batch_data=features_batch
+ )
+
+ def test_for_token_classification_collator_immutability(self):
+ tokenizer = BertTokenizer(self.vocab_file)
+
+ features_base = [
+ {"input_ids": (0, 1, 2), "labels": (0, 1, 2)},
+ {"input_ids": (0, 1, 2, 3, 4, 5), "labels": (0, 1, 2, 3, 4, 5)},
+ ]
+ token_classification_collators = [
+ DataCollatorForTokenClassification(tokenizer, return_tensors="np"),
+ DataCollatorForTokenClassification(tokenizer, padding="max_length", max_length=10, return_tensors="np"),
+ DataCollatorForTokenClassification(tokenizer, pad_to_multiple_of=8, return_tensors="np"),
+ DataCollatorForTokenClassification(tokenizer, label_pad_token_id=-1, return_tensors="np"),
+ ]
+
+ for datatype_input, datatype_label in [(list, list)]:
+ for collator in token_classification_collators:
+ self._validate_original_data_against_collated_data_on_specified_keys_and_datatypes(
+ collator=collator,
+ base_data=features_base,
+ input_key="input_ids",
+ input_datatype=datatype_input,
+ label_key="labels",
+ label_datatype=datatype_label,
+ )
+
+ self._validate_original_data_against_collated_data_on_specified_keys_and_datatypes(
+ collator=token_classification_collators[-1],
+ base_data=features_base,
+ input_key="input_ids",
+ input_datatype=datatype_input,
+ label_key="labels",
+ label_datatype=datatype_label,
+ ignore_label=True,
+ )
+
+ def test_seq2seq_collator_immutability(self):
+ tokenizer = BertTokenizer(self.vocab_file)
+
+ features_base = [
+ {"input_ids": list(range(3)), "labels": list(range(3))},
+ {"input_ids": list(range(6)), "labels": list(range(6))},
+ ]
+ seq2seq_collators = [
+ DataCollatorForSeq2Seq(tokenizer, padding=PaddingStrategy.LONGEST, return_tensors="np"),
+ DataCollatorForSeq2Seq(tokenizer, padding=PaddingStrategy.MAX_LENGTH, max_length=7, return_tensors="np"),
+ DataCollatorForSeq2Seq(
+ tokenizer, padding=PaddingStrategy.LONGEST, pad_to_multiple_of=8, return_tensors="np"
+ ),
+ DataCollatorForSeq2Seq(
+ tokenizer, padding=PaddingStrategy.LONGEST, label_pad_token_id=-1, return_tensors="np"
+ ),
+ ]
+
+ for datatype_input, datatype_label in [(list, list)]:
+ for collator in seq2seq_collators:
+ self._validate_original_data_against_collated_data_on_specified_keys_and_datatypes(
+ collator=collator,
+ base_data=features_base,
+ input_key="input_ids",
+ input_datatype=datatype_input,
+ label_key="labels",
+ label_datatype=datatype_label,
+ )
+
+ self._validate_original_data_against_collated_data_on_specified_keys_and_datatypes(
+ collator=seq2seq_collators[-1],
+ base_data=features_base,
+ input_key="input_ids",
+ input_datatype=datatype_input,
+ label_key="labels",
+ label_datatype=datatype_label,
+ ignore_label=True,
+ )
+
+ features_base_no_pad = [
+ {"input_ids": list(range(3)), "labels": list(range(3))},
+ {"input_ids": list(range(3)), "labels": list(range(3))},
+ ]
+ seq2seq_no_padding_collator = DataCollatorForSeq2Seq(
+ tokenizer, padding=PaddingStrategy.DO_NOT_PAD, return_tensors="np"
+ )
+ for datatype_input, datatype_label in [(list, list)]:
+ self._validate_original_data_against_collated_data_on_specified_keys_and_datatypes(
+ collator=seq2seq_no_padding_collator,
+ base_data=features_base_no_pad,
+ input_key="input_ids",
+ input_datatype=datatype_input,
+ label_key="labels",
+ label_datatype=datatype_label,
+ )
+
+ def test_language_modelling_collator_immutability(self):
+ tokenizer = BertTokenizer(self.vocab_file)
+
+ features_base_no_pad = [
+ {"input_ids": tuple(range(10)), "labels": (1,)},
+ {"input_ids": tuple(range(10)), "labels": (1,)},
+ ]
+ features_base_pad = [
+ {"input_ids": tuple(range(5)), "labels": (1,)},
+ {"input_ids": tuple(range(5)), "labels": (1,)},
+ ]
+ lm_collators = [
+ DataCollatorForLanguageModeling(tokenizer, mlm=False, return_tensors="np"),
+ DataCollatorForLanguageModeling(tokenizer, mlm=False, pad_to_multiple_of=8, return_tensors="np"),
+ DataCollatorForLanguageModeling(tokenizer, return_tensors="np"),
+ DataCollatorForLanguageModeling(tokenizer, pad_to_multiple_of=8, return_tensors="np"),
+ ]
+
+ for datatype_input, datatype_label in [(list, list)]:
+ for collator in lm_collators:
+ self._validate_original_data_against_collated_data_on_specified_keys_and_datatypes(
+ collator=collator,
+ base_data=features_base_no_pad,
+ input_key="input_ids",
+ input_datatype=datatype_input,
+ label_key="labels",
+ label_datatype=datatype_label,
+ ignore_label=True,
+ )
+
+ self._validate_original_data_against_collated_data_on_specified_keys_and_datatypes(
+ collator=collator,
+ base_data=features_base_pad,
+ input_key="input_ids",
+ input_datatype=datatype_input,
+ label_key="labels",
+ label_datatype=datatype_label,
+ ignore_label=True,
+ )
+
+ def test_whole_world_masking_collator_immutability(self):
+ tokenizer = BertTokenizer(self.vocab_file)
+
+ features_base = [
+ {"input_ids": list(range(10)), "labels": (1,)},
+ {"input_ids": list(range(10)), "labels": (1,)},
+ ]
+ whole_word_masking_collator = DataCollatorForWholeWordMask(tokenizer, return_tensors="np")
+
+ for datatype_input, datatype_label in [(list, list), (np.array, np.array)]:
+ self._validate_original_data_against_collated_data_on_specified_keys_and_datatypes(
+ collator=whole_word_masking_collator,
+ base_data=features_base,
+ input_key="input_ids",
+ input_datatype=datatype_input,
+ label_key="labels",
+ label_datatype=datatype_label,
+ ignore_label=True,
+ )
+
+ def test_permutation_language_modelling_collator_immutability(self):
+ tokenizer = BertTokenizer(self.vocab_file)
+
+ plm_collator = DataCollatorForPermutationLanguageModeling(tokenizer, return_tensors="np")
+
+ no_pad_features_original = [{"input_ids": list(range(10))}, {"input_ids": list(range(10))}]
+ no_pad_features_batch = [{"input_ids": list(range(10))}, {"input_ids": list(range(10))}]
+ self._validate_original_data_against_collated_data(
+ collator=plm_collator, original_data=no_pad_features_original, batch_data=no_pad_features_batch
+ )
+
+ pad_features_original = [{"input_ids": list(range(5))}, {"input_ids": list(range(10))}]
+ pad_features_batch = [{"input_ids": list(range(5))}, {"input_ids": list(range(10))}]
+ self._validate_original_data_against_collated_data(
+ collator=plm_collator, original_data=pad_features_original, batch_data=pad_features_batch
+ )
+
+ def test_next_sentence_prediction_collator_immutability(self):
+ tokenizer = BertTokenizer(self.vocab_file)
+
+ features_original = [
+ {"input_ids": [0, 1, 2, 3, 4], "token_type_ids": [0, 1, 2, 3, 4], "next_sentence_label": i}
+ for i in range(2)
+ ]
+ features_batch = [
+ {"input_ids": [0, 1, 2, 3, 4], "token_type_ids": [0, 1, 2, 3, 4], "next_sentence_label": i}
+ for i in range(2)
+ ]
+
+ nsp_collator = DataCollatorForLanguageModeling(tokenizer, return_tensors="np")
+ self._validate_original_data_against_collated_data(
+ collator=nsp_collator, original_data=features_original, batch_data=features_batch
+ )
+
+ nsp_collator = DataCollatorForLanguageModeling(tokenizer, pad_to_multiple_of=8, return_tensors="np")
+ self._validate_original_data_against_collated_data(
+ collator=nsp_collator, original_data=features_original, batch_data=features_batch
+ )
+
+ def test_sentence_order_prediction_collator_immutability(self):
+ tokenizer = BertTokenizer(self.vocab_file)
+
+ features_original = [
+ {
+ "input_ids": np.array([0, 1, 2, 3, 4]),
+ "token_type_ids": np.array([0, 1, 2, 3, 4]),
+ "sentence_order_label": i,
+ }
+ for i in range(2)
+ ]
+ features_batch = [
+ {
+ "input_ids": np.array([0, 1, 2, 3, 4]),
+ "token_type_ids": np.array([0, 1, 2, 3, 4]),
+ "sentence_order_label": i,
+ }
+ for i in range(2)
+ ]
+
+ sop_collator = DataCollatorForLanguageModeling(tokenizer, return_tensors="np")
+ self._validate_original_data_against_collated_data(
+ collator=sop_collator, original_data=features_original, batch_data=features_batch
+ )
+
+ sop_collator = DataCollatorForLanguageModeling(tokenizer, pad_to_multiple_of=8, return_tensors="np")
+ self._validate_original_data_against_collated_data(
+ collator=sop_collator, original_data=features_original, batch_data=features_batch
+ )
diff --git a/tests/trainer/test_trainer.py b/tests/trainer/test_trainer.py
index 5619a5c98cbb..c420da4052f1 100644
--- a/tests/trainer/test_trainer.py
+++ b/tests/trainer/test_trainer.py
@@ -123,7 +123,6 @@
Trainer,
TrainerState,
)
- from transformers.modeling_utils import unwrap_model
from transformers.trainer_pt_utils import AcceleratorConfig
if is_safetensors_available():
@@ -231,6 +230,27 @@ def __call__(self, eval_pred):
return {"accuracy": true.astype(np.float32).mean().item()}
+class AlmostAccuracyBatched:
+ def __init__(self, thresh=0.25):
+ self.thresh = thresh
+ self.batch_acc = []
+
+ def __call__(self, eval_pred, compute_result):
+ predictions, labels = eval_pred
+ if isinstance(predictions, tuple):
+ predictions = predictions[0]
+ if isinstance(labels, tuple):
+ labels = labels[0]
+ batch_size = len(predictions)
+ true = torch.abs(predictions - labels) <= self.thresh
+ acc = true.type(torch.FloatTensor).mean().item()
+ self.batch_acc.extend([acc] * batch_size)
+ if compute_result:
+ result = {"accuracy": np.mean(self.batch_acc).item()}
+ self.batch_acc = []
+ return result
+
+
class RegressionModelConfig(PretrainedConfig):
def __init__(self, a=0, b=0, double_output=False, random_torch=True, **kwargs):
super().__init__(**kwargs)
@@ -587,7 +607,7 @@ def test_trainer_with_datasets(self):
# Base training. Should have the same results as test_reproducible_training
model = RegressionModel()
- args = TrainingArguments("./regression", learning_rate=0.1)
+ args = TrainingArguments("./regression", learning_rate=0.1, report_to="none")
trainer = Trainer(model, args, train_dataset=train_dataset)
trainer.train()
self.check_trained_model(trainer.model)
@@ -609,7 +629,7 @@ def test_trainer_with_datasets(self):
def test_model_init(self):
train_dataset = RegressionDataset()
- args = TrainingArguments("./regression", learning_rate=0.1)
+ args = TrainingArguments("./regression", learning_rate=0.1, report_to="none")
trainer = Trainer(args=args, train_dataset=train_dataset, model_init=lambda: RegressionModel())
trainer.train()
self.check_trained_model(trainer.model)
@@ -672,7 +692,7 @@ def test_training_loss(self):
def test_custom_optimizer(self):
train_dataset = RegressionDataset()
- args = TrainingArguments("./regression")
+ args = TrainingArguments("./regression", report_to="none")
model = RegressionModel()
optimizer = torch.optim.SGD(model.parameters(), lr=1.0)
lr_scheduler = torch.optim.lr_scheduler.LambdaLR(optimizer, lr_lambda=lambda x: 1.0)
@@ -696,6 +716,7 @@ def test_lr_scheduler_kwargs(self):
lr_scheduler_kwargs=extra_kwargs,
learning_rate=0.2,
warmup_steps=num_warmup_steps,
+ report_to="none",
)
trainer = Trainer(model, args, train_dataset=train_dataset)
trainer.create_optimizer_and_scheduler(num_training_steps=num_steps)
@@ -722,6 +743,7 @@ def test_cosine_with_min_lr_scheduler(self):
lr_scheduler_kwargs=extra_kwargs,
learning_rate=0.2,
warmup_steps=num_warmup_steps,
+ report_to="none",
)
trainer = Trainer(model, args, train_dataset=train_dataset)
trainer.create_optimizer_and_scheduler(num_training_steps=num_steps)
@@ -742,6 +764,7 @@ def test_reduce_lr_on_plateau_args(self):
"./regression",
eval_strategy="epoch",
metric_for_best_model="eval_loss",
+ report_to="none",
)
model = RegressionModel()
optimizer = torch.optim.SGD(model.parameters(), lr=1.0)
@@ -776,6 +799,7 @@ def log(self, logs):
metric_for_best_model="eval_loss",
num_train_epochs=10,
learning_rate=0.2,
+ report_to="none",
)
model = RegressionModel()
trainer = TrainerWithLRLogs(model, args, train_dataset=train_dataset, eval_dataset=eval_dataset)
@@ -808,7 +832,7 @@ def test_adafactor_lr_none(self):
from transformers.optimization import Adafactor, AdafactorSchedule
train_dataset = RegressionDataset()
- args = TrainingArguments("./regression")
+ args = TrainingArguments("./regression", report_to="none")
model = RegressionModel()
optimizer = Adafactor(model.parameters(), scale_parameter=True, relative_step=True, warmup_init=True, lr=None)
lr_scheduler = AdafactorSchedule(optimizer)
@@ -859,7 +883,7 @@ def test_trainer_works_with_dict(self):
train_dataset = RegressionDataset()
eval_dataset = RegressionDataset()
model = RegressionDictModel()
- args = TrainingArguments("./regression")
+ args = TrainingArguments("./regression", report_to="none")
trainer = Trainer(model, args, train_dataset=train_dataset, eval_dataset=eval_dataset)
trainer.train()
_ = trainer.evaluate()
@@ -870,7 +894,7 @@ def test_evaluation_with_keys_to_drop(self):
tiny_gpt2 = GPT2LMHeadModel(config)
x = torch.randint(0, 100, (128,))
eval_dataset = RepeatDataset(x)
- args = TrainingArguments("./test")
+ args = TrainingArguments("./test", report_to="none")
trainer = Trainer(tiny_gpt2, args, eval_dataset=eval_dataset)
# By default the past_key_values are removed
result = trainer.predict(eval_dataset)
@@ -965,6 +989,63 @@ def test_bnb_compile(self):
with self.assertRaises(ValueError):
_ = Trainer(tiny_model, args, train_dataset=train_dataset) # noqa
+ @require_peft
+ def test_multiple_peft_adapters(self):
+ from peft import LoraConfig, get_peft_model
+
+ # Tests if resuming from checkpoint works if the model has multiple adapters
+
+ MODEL_ID = "hf-internal-testing/tiny-random-LlamaForCausalLM"
+ tokenizer = AutoTokenizer.from_pretrained(MODEL_ID)
+ tiny_model = AutoModelForCausalLM.from_pretrained(MODEL_ID)
+
+ peft_config = LoraConfig(
+ r=4,
+ lora_alpha=16,
+ lora_dropout=0.05,
+ bias="none",
+ task_type="CAUSAL_LM",
+ )
+ tiny_model = get_peft_model(tiny_model, peft_config, "adapter1")
+ tiny_model.add_adapter("adapter2", peft_config)
+
+ train_dataset = LineByLineTextDataset(
+ tokenizer=tokenizer,
+ file_path=PATH_SAMPLE_TEXT,
+ block_size=tokenizer.max_len_single_sentence,
+ )
+ for example in train_dataset.examples:
+ example["labels"] = example["input_ids"]
+
+ tokenizer.pad_token = tokenizer.eos_token
+
+ with tempfile.TemporaryDirectory() as tmpdir:
+ args = TrainingArguments(
+ tmpdir,
+ per_device_train_batch_size=1,
+ learning_rate=1e-9,
+ save_steps=5,
+ logging_steps=5,
+ max_steps=10,
+ use_cpu=True,
+ )
+ trainer = Trainer(tiny_model, args, tokenizer=tokenizer, train_dataset=train_dataset)
+
+ trainer.train()
+ parameters = dict(tiny_model.named_parameters())
+ state = dataclasses.asdict(trainer.state)
+
+ # Reinitialize trainer
+ trainer = Trainer(tiny_model, args, tokenizer=tokenizer, train_dataset=train_dataset)
+
+ checkpoint = os.path.join(tmpdir, "checkpoint-5")
+
+ trainer.train(resume_from_checkpoint=checkpoint)
+ parameters1 = dict(tiny_model.named_parameters())
+ state1 = dataclasses.asdict(trainer.state)
+ self.assertEqual(parameters, parameters1)
+ self.check_trainer_state_are_the_same(state, state1)
+
@require_bitsandbytes
def test_rmsprop_bnb(self):
config = GPT2Config(vocab_size=100, n_positions=128, n_embd=32, n_layer=3, n_head=4)
@@ -1023,7 +1104,12 @@ def test_neftune(self):
# Trainer without inf/nan filter
args = TrainingArguments(
- "./test", learning_rate=1e-9, logging_steps=5, logging_nan_inf_filter=False, neftune_noise_alpha=0.4
+ "./test",
+ learning_rate=1e-9,
+ logging_steps=5,
+ logging_nan_inf_filter=False,
+ neftune_noise_alpha=0.4,
+ report_to="none",
)
trainer = Trainer(tiny_gpt2, args, train_dataset=train_dataset)
@@ -1040,7 +1126,12 @@ def test_neftune(self):
tiny_gpt2 = GPT2LMHeadModel(config)
# Trainer without inf/nan filter
args = TrainingArguments(
- "./test", learning_rate=1e-9, logging_steps=5, logging_nan_inf_filter=False, neftune_noise_alpha=0.4
+ "./test",
+ learning_rate=1e-9,
+ logging_steps=5,
+ logging_nan_inf_filter=False,
+ neftune_noise_alpha=0.4,
+ report_to="none",
)
trainer = Trainer(tiny_gpt2, args, train_dataset=train_dataset)
@@ -1066,13 +1157,17 @@ def test_logging_inf_nan_filter(self):
train_dataset = RepeatDataset(x)
# Trainer without inf/nan filter
- args = TrainingArguments("./test", learning_rate=1e9, logging_steps=5, logging_nan_inf_filter=False)
+ args = TrainingArguments(
+ "./test", learning_rate=1e9, logging_steps=5, logging_nan_inf_filter=False, report_to="none"
+ )
trainer = Trainer(tiny_gpt2, args, train_dataset=train_dataset)
trainer.train()
log_history_no_filter = trainer.state.log_history
# Trainer with inf/nan filter
- args = TrainingArguments("./test", learning_rate=1e9, logging_steps=5, logging_nan_inf_filter=True)
+ args = TrainingArguments(
+ "./test", learning_rate=1e9, logging_steps=5, logging_nan_inf_filter=True, report_to="none"
+ )
trainer = Trainer(tiny_gpt2, args, train_dataset=train_dataset)
trainer.train()
log_history_filter = trainer.state.log_history
@@ -1119,11 +1214,16 @@ def test_train_and_eval_dataloaders(self):
# tests that we do not require dataloader to have a .dataset attribute
def test_dataloader_without_dataset(self):
train_dataset = RegressionDataset(length=128)
- trainer = CustomDataloaderTrainer(
- model=RegressionModel(), train_dataset=train_dataset, eval_dataset=train_dataset
- )
- trainer.train()
- trainer.evaluate()
+ with tempfile.TemporaryDirectory() as tmp_dir:
+ trainer = CustomDataloaderTrainer(
+ model=RegressionModel(),
+ train_dataset=train_dataset,
+ eval_dataset=train_dataset,
+ args=TrainingArguments(output_dir=tmp_dir, report_to="none"),
+ )
+
+ trainer.train()
+ trainer.evaluate()
def test_galore_matched_modules(self):
regex_patterns = [r".*.attn.*", r".*.mlp.*"]
@@ -1418,7 +1518,9 @@ def test_data_is_not_parallelized_when_model_is_parallel(self):
# Make the Trainer believe it's a parallelized model
model.is_parallelizable = True
model.model_parallel = True
- args = TrainingArguments("./regression", per_device_train_batch_size=16, per_device_eval_batch_size=16)
+ args = TrainingArguments(
+ "./regression", per_device_train_batch_size=16, per_device_eval_batch_size=16, report_to="none"
+ )
trainer = Trainer(model, args, train_dataset=RegressionDataset(), eval_dataset=RegressionDataset())
# Check the Trainer was fooled
self.assertTrue(trainer.is_model_parallel)
@@ -1468,6 +1570,49 @@ def test_evaluate(self):
expected_acc = AlmostAccuracy()((pred + 1, y))["accuracy"]
self.assertAlmostEqual(results["eval_accuracy"], expected_acc)
+ def test_evaluate_with_batch_eval_metrics(self):
+ trainer = get_regression_trainer(
+ a=1.5, b=2.5, compute_metrics=AlmostAccuracyBatched(), batch_eval_metrics=True
+ )
+ results = trainer.evaluate()
+
+ x, y = trainer.eval_dataset.x, trainer.eval_dataset.ys[0]
+ pred = 1.5 * x + 2.5
+ expected_loss = ((pred - y) ** 2).mean()
+ self.assertAlmostEqual(results["eval_loss"], expected_loss)
+ expected_acc = AlmostAccuracy()((pred, y))["accuracy"]
+ self.assertAlmostEqual(results["eval_accuracy"], expected_acc)
+
+ # With a number of elements not a round multiple of the batch size
+ trainer = get_regression_trainer(
+ a=1.5, b=2.5, eval_len=66, compute_metrics=AlmostAccuracyBatched(), batch_eval_metrics=True
+ )
+ results = trainer.evaluate()
+
+ x, y = trainer.eval_dataset.x, trainer.eval_dataset.ys[0]
+ pred = 1.5 * x + 2.5
+ expected_loss = ((pred - y) ** 2).mean()
+ self.assertAlmostEqual(results["eval_loss"], expected_loss)
+ expected_acc = AlmostAccuracy()((pred, y))["accuracy"]
+ self.assertAlmostEqual(results["eval_accuracy"], expected_acc)
+
+ # With logits preprocess
+ trainer = get_regression_trainer(
+ a=1.5,
+ b=2.5,
+ compute_metrics=AlmostAccuracyBatched(),
+ batch_eval_metrics=True,
+ preprocess_logits_for_metrics=lambda logits, labels: logits + 1,
+ )
+ results = trainer.evaluate()
+
+ x, y = trainer.eval_dataset.x, trainer.eval_dataset.ys[0]
+ pred = 1.5 * x + 2.5
+ expected_loss = ((pred - y) ** 2).mean()
+ self.assertAlmostEqual(results["eval_loss"], expected_loss)
+ expected_acc = AlmostAccuracy()((pred + 1, y))["accuracy"]
+ self.assertAlmostEqual(results["eval_accuracy"], expected_acc)
+
def test_evaluate_with_jit(self):
trainer = get_regression_trainer(a=1.5, b=2.5, compute_metrics=AlmostAccuracy(), jit_mode_eval=True)
results = trainer.evaluate()
@@ -1595,6 +1740,58 @@ def test_predict(self):
self.assertTrue(np.array_equal(labels[0], trainer.eval_dataset.ys[0]))
self.assertTrue(np.array_equal(labels[1], trainer.eval_dataset.ys[1]))
+ def test_predict_with_batch_eval_metrics(self):
+ trainer = get_regression_trainer(
+ a=1.5, b=2.5, compute_metrics=AlmostAccuracyBatched(), batch_eval_metrics=True
+ )
+ results = trainer.predict(trainer.eval_dataset)
+ preds = results.predictions
+ x, y = trainer.eval_dataset.x, trainer.eval_dataset.ys[0]
+ gt = 1.5 * x + 2.5
+ self.assertTrue(np.allclose(preds, gt))
+ expected_acc = AlmostAccuracy()((preds, y))["accuracy"]
+ self.assertAlmostEqual(results.metrics["test_accuracy"], expected_acc)
+
+ # With a number of elements not a round multiple of the batch size
+ trainer = get_regression_trainer(
+ a=1.5, b=2.5, eval_len=66, compute_metrics=AlmostAccuracyBatched(), batch_eval_metrics=True
+ )
+ results = trainer.predict(trainer.eval_dataset)
+ preds = results.predictions
+ x, y = trainer.eval_dataset.x, trainer.eval_dataset.ys[0]
+ self.assertTrue(np.allclose(preds, 1.5 * x + 2.5))
+ expected_acc = AlmostAccuracy()((preds, y))["accuracy"]
+ self.assertAlmostEqual(results.metrics["test_accuracy"], expected_acc)
+
+ # With more than one output of the model
+ trainer = get_regression_trainer(
+ a=1.5, b=2.5, double_output=True, compute_metrics=AlmostAccuracyBatched(), batch_eval_metrics=True
+ )
+ preds = trainer.predict(trainer.eval_dataset).predictions
+ x = trainer.eval_dataset.x
+ self.assertEqual(len(preds), 2)
+ self.assertTrue(np.allclose(preds[0], 1.5 * x + 2.5))
+ self.assertTrue(np.allclose(preds[1], 1.5 * x + 2.5))
+
+ # With more than one output/label of the model
+ trainer = get_regression_trainer(
+ a=1.5,
+ b=2.5,
+ double_output=True,
+ label_names=["labels", "labels_2"],
+ compute_metrics=AlmostAccuracyBatched(),
+ batch_eval_metrics=True,
+ )
+ outputs = trainer.predict(trainer.eval_dataset)
+ preds = outputs.predictions
+ labels = outputs.label_ids
+ x = trainer.eval_dataset.x
+ self.assertEqual(len(preds), 2)
+ self.assertTrue(np.allclose(preds[0], 1.5 * x + 2.5))
+ self.assertTrue(np.allclose(preds[1], 1.5 * x + 2.5))
+ self.assertTrue(np.array_equal(labels[0], trainer.eval_dataset.ys[0]))
+ self.assertTrue(np.array_equal(labels[1], trainer.eval_dataset.ys[1]))
+
def test_predict_with_jit(self):
trainer = get_regression_trainer(a=1.5, b=2.5, jit_mode_eval=True)
preds = trainer.predict(trainer.eval_dataset).predictions
@@ -1677,7 +1874,7 @@ def test_predict_with_ipex(self):
def test_dynamic_shapes(self):
eval_dataset = DynamicShapesDataset(batch_size=self.batch_size)
model = RegressionModel(a=2, b=1)
- args = TrainingArguments("./regression")
+ args = TrainingArguments("./regression", report_to="none")
trainer = Trainer(model, args, eval_dataset=eval_dataset)
# Check evaluation can run to completion
@@ -1694,7 +1891,7 @@ def test_dynamic_shapes(self):
self.assertTrue(np.all(seen[expected.shape[0] :] == -100))
# Same tests with eval accumulation
- args = TrainingArguments("./regression", eval_accumulation_steps=2)
+ args = TrainingArguments("./regression", eval_accumulation_steps=2, report_to="none")
trainer = Trainer(model, args, eval_dataset=eval_dataset)
# Check evaluation can run to completion
@@ -2468,8 +2665,10 @@ def test_flos_extraction(self):
trainer = get_regression_trainer(learning_rate=0.1)
def assert_flos_extraction(trainer, wrapped_model_to_check):
- self.assertEqual(trainer.model, unwrap_model(wrapped_model_to_check))
- self.assertGreaterEqual(getattr(unwrap_model(wrapped_model_to_check).config, "total_flos", 0), 0)
+ self.assertEqual(trainer.model, trainer.accelerator.unwrap_model(wrapped_model_to_check))
+ self.assertGreaterEqual(
+ getattr(trainer.accelerator.unwrap_model(wrapped_model_to_check).config, "total_flos", 0), 0
+ )
# with plain model
assert_flos_extraction(trainer, trainer.model)
@@ -2810,13 +3009,14 @@ def test_bf16_full_eval(self):
def test_no_wd_param_group(self):
model = nn.Sequential(TstLayer(128), nn.ModuleList([TstLayer(128), TstLayer(128)]))
- trainer = Trainer(model=model)
- trainer.create_optimizer_and_scheduler(10)
- wd_names = ['0.linear1.weight', '0.linear2.weight', '1.0.linear1.weight', '1.0.linear2.weight', '1.1.linear1.weight', '1.1.linear2.weight'] # fmt: skip
- wd_params = [p for n, p in model.named_parameters() if n in wd_names]
- no_wd_params = [p for n, p in model.named_parameters() if n not in wd_names]
- self.assertListEqual(trainer.optimizer.param_groups[0]["params"], wd_params)
- self.assertListEqual(trainer.optimizer.param_groups[1]["params"], no_wd_params)
+ with tempfile.TemporaryDirectory() as tmp_dir:
+ trainer = Trainer(model=model, args=TrainingArguments(output_dir=tmp_dir, report_to="none"))
+ trainer.create_optimizer_and_scheduler(10)
+ wd_names = ['0.linear1.weight', '0.linear2.weight', '1.0.linear1.weight', '1.0.linear2.weight', '1.1.linear1.weight', '1.1.linear2.weight'] # fmt: skip
+ wd_params = [p for n, p in model.named_parameters() if n in wd_names]
+ no_wd_params = [p for n, p in model.named_parameters() if n not in wd_names]
+ self.assertListEqual(trainer.optimizer.param_groups[0]["params"], wd_params)
+ self.assertListEqual(trainer.optimizer.param_groups[1]["params"], no_wd_params)
@slow
@require_torch_multi_accelerator
@@ -3960,32 +4160,35 @@ def test_get_num_trainable_parameters(self):
# in_features * out_features + bias
layer_1 = 128 * 64 + 64
layer_2 = 64 * 32 + 32
- trainer = Trainer(model=model)
- self.assertEqual(trainer.get_num_trainable_parameters(), layer_1 + layer_2)
- # Freeze the last layer
- for param in model[-1].parameters():
- param.requires_grad = False
- self.assertEqual(trainer.get_num_trainable_parameters(), layer_1)
+ with tempfile.TemporaryDirectory() as tmp_dir:
+ trainer = Trainer(model=model, args=TrainingArguments(output_dir=tmp_dir, report_to="none"))
+ self.assertEqual(trainer.get_num_trainable_parameters(), layer_1 + layer_2)
+ # Freeze the last layer
+ for param in model[-1].parameters():
+ param.requires_grad = False
+ self.assertEqual(trainer.get_num_trainable_parameters(), layer_1)
def test_get_learning_rates(self):
model = nn.Sequential(nn.Linear(128, 64))
- trainer = Trainer(model=model)
- with self.assertRaises(ValueError):
- trainer.get_learning_rates()
- trainer.create_optimizer()
- self.assertEqual(trainer.get_learning_rates(), [5e-05, 5e-05])
+ with tempfile.TemporaryDirectory() as tmp_dir:
+ trainer = Trainer(model=model, args=TrainingArguments(output_dir=tmp_dir, report_to="none"))
+ with self.assertRaises(ValueError):
+ trainer.get_learning_rates()
+ trainer.create_optimizer()
+ self.assertEqual(trainer.get_learning_rates(), [5e-05, 5e-05])
def test_get_optimizer_group(self):
model = nn.Sequential(nn.Linear(128, 64))
- trainer = Trainer(model=model)
- # ValueError is raised if optimizer is None
- with self.assertRaises(ValueError):
- trainer.get_optimizer_group()
- trainer.create_optimizer()
- # Get groups
- num_groups = len(trainer.get_optimizer_group())
- self.assertEqual(num_groups, 2)
- # Get group of parameter
- param = next(model.parameters())
- group = trainer.get_optimizer_group(param)
- self.assertIn(param, group["params"])
+ with tempfile.TemporaryDirectory() as tmp_dir:
+ trainer = Trainer(model=model, args=TrainingArguments(output_dir=tmp_dir, report_to="none"))
+ # ValueError is raised if optimizer is None
+ with self.assertRaises(ValueError):
+ trainer.get_optimizer_group()
+ trainer.create_optimizer()
+ # Get groups
+ num_groups = len(trainer.get_optimizer_group())
+ self.assertEqual(num_groups, 2)
+ # Get group of parameter
+ param = next(model.parameters())
+ group = trainer.get_optimizer_group(param)
+ self.assertIn(param, group["params"])
diff --git a/tests/trainer/test_trainer_callback.py b/tests/trainer/test_trainer_callback.py
index b712edca385c..8c0c9367d8d7 100644
--- a/tests/trainer/test_trainer_callback.py
+++ b/tests/trainer/test_trainer_callback.py
@@ -12,6 +12,8 @@
# See the License for the specific language governing permissions and
# limitations under the License.
+
+import os
import shutil
import tempfile
import unittest
@@ -19,28 +21,44 @@
from transformers import (
DefaultFlowCallback,
+ EarlyStoppingCallback,
IntervalStrategy,
PrinterCallback,
ProgressCallback,
Trainer,
TrainerCallback,
+ TrainerState,
TrainingArguments,
is_torch_available,
)
from transformers.testing_utils import require_torch
+from transformers.trainer_callback import ExportableState
if is_torch_available():
- from transformers.trainer import DEFAULT_CALLBACKS
+ from transformers.trainer import DEFAULT_CALLBACKS, TRAINER_STATE_NAME
from .test_trainer import RegressionDataset, RegressionModelConfig, RegressionPreTrainedModel
+class MyTestExportableCallback(TrainerCallback, ExportableState):
+ def __init__(self, my_test_state="test"):
+ self.my_test_state = my_test_state
+
+ def state(self):
+ return {
+ "args": {
+ "my_test_state": self.my_test_state,
+ },
+ }
+
+
class MyTestTrainerCallback(TrainerCallback):
"A callback that registers the events that goes through."
- def __init__(self):
+ def __init__(self, my_test_state="test"):
self.events = []
+ self.my_test_state = my_test_state
def on_init_end(self, args, state, control, **kwargs):
self.events.append("on_init_end")
@@ -243,3 +261,160 @@ def test_event_flow(self):
callbacks=[MyTestTrainerCallback, MyTestTrainerCallback],
)
assert str(MyTestTrainerCallback) in warn_mock.call_args[0][0]
+
+ def test_stateful_callbacks(self):
+ # Use something with non-defaults
+ cb = EarlyStoppingCallback(early_stopping_patience=5, early_stopping_threshold=0.2)
+ trainer = self.get_trainer(
+ callbacks=[cb],
+ load_best_model_at_end=True,
+ save_strategy="steps",
+ eval_strategy="steps",
+ save_steps=2,
+ eval_steps=2,
+ max_steps=2,
+ )
+ trainer.train()
+
+ # Create a new trainer with defaults
+ trainer = self.get_trainer(
+ callbacks=[EarlyStoppingCallback()],
+ load_best_model_at_end=True,
+ save_strategy="steps",
+ eval_strategy="steps",
+ save_steps=2,
+ eval_steps=2,
+ max_steps=2,
+ restore_callback_states_from_checkpoint=True,
+ )
+ # Load it back in and verify values
+ checkpoint = os.path.join(self.output_dir, "checkpoint-2")
+ trainer.train(resume_from_checkpoint=checkpoint)
+ cb = [
+ callback for callback in trainer.callback_handler.callbacks if isinstance(callback, EarlyStoppingCallback)
+ ][0]
+ assert cb.early_stopping_patience == 5
+ assert cb.early_stopping_threshold == 0.2
+
+ def test_stateful_mixed_callbacks(self):
+ # Use two callbacks, one stateful one not
+ # Use something with non-defaults
+ cbs = [
+ MyTestTrainerCallback(my_test_state="another value"),
+ EarlyStoppingCallback(early_stopping_patience=5, early_stopping_threshold=0.2),
+ ]
+ trainer = self.get_trainer(
+ callbacks=cbs,
+ load_best_model_at_end=True,
+ save_strategy="steps",
+ eval_strategy="steps",
+ save_steps=2,
+ eval_steps=2,
+ max_steps=2,
+ )
+ trainer.train()
+
+ # Create a new trainer with defaults
+ trainer = self.get_trainer(
+ callbacks=[EarlyStoppingCallback(), MyTestTrainerCallback()],
+ load_best_model_at_end=True,
+ save_strategy="steps",
+ eval_strategy="steps",
+ save_steps=2,
+ eval_steps=2,
+ max_steps=2,
+ restore_callback_states_from_checkpoint=True,
+ )
+ # Load it back in and verify values
+ checkpoint = os.path.join(self.output_dir, "checkpoint-2")
+ trainer.train(resume_from_checkpoint=checkpoint)
+ cbs = [
+ callback
+ for callback in trainer.callback_handler.callbacks
+ if isinstance(callback, (EarlyStoppingCallback, MyTestTrainerCallback))
+ ]
+ assert len(cbs) == 2
+ my_test, early_stopping = cbs
+ assert early_stopping.early_stopping_patience == 5
+ assert early_stopping.early_stopping_threshold == 0.2
+ assert my_test.my_test_state == "test"
+
+ def test_stateful_duplicate_callbacks(self):
+ # Use something with non-defaults
+ cbs = [MyTestExportableCallback("first"), MyTestExportableCallback("second")]
+ trainer = self.get_trainer(
+ callbacks=cbs,
+ load_best_model_at_end=True,
+ save_strategy="steps",
+ eval_strategy="steps",
+ save_steps=2,
+ eval_steps=2,
+ max_steps=2,
+ )
+ trainer.train()
+
+ # Create a new trainer with defaults
+ trainer = self.get_trainer(
+ callbacks=[MyTestExportableCallback(), MyTestExportableCallback()],
+ load_best_model_at_end=True,
+ save_strategy="steps",
+ eval_strategy="steps",
+ save_steps=2,
+ eval_steps=2,
+ max_steps=2,
+ restore_callback_states_from_checkpoint=True,
+ )
+ # Load it back in and verify values
+ checkpoint = os.path.join(self.output_dir, "checkpoint-2")
+ trainer.train(resume_from_checkpoint=checkpoint)
+ cbs = [
+ callback
+ for callback in trainer.callback_handler.callbacks
+ if isinstance(callback, MyTestExportableCallback)
+ ]
+ assert len(cbs) == 2
+ assert cbs[0].my_test_state == "first"
+ assert cbs[1].my_test_state == "second"
+
+ def test_missing_stateful_callback(self):
+ cb = EarlyStoppingCallback()
+ trainer = self.get_trainer(
+ callbacks=[cb],
+ load_best_model_at_end=True,
+ save_strategy="steps",
+ eval_strategy="steps",
+ save_steps=2,
+ eval_steps=2,
+ max_steps=2,
+ )
+ trainer.train()
+
+ # Create a new trainer with defaults
+ trainer = self.get_trainer(
+ save_strategy="steps",
+ eval_strategy="steps",
+ save_steps=2,
+ eval_steps=2,
+ max_steps=2,
+ restore_callback_states_from_checkpoint=True,
+ )
+ # Load it back in and verify values
+ checkpoint = os.path.join(self.output_dir, "checkpoint-2")
+ # warning should be emitted for not-present callbacks
+ with patch("transformers.trainer.logger.warning") as warn_mock:
+ trainer.train(resume_from_checkpoint=checkpoint)
+ assert "EarlyStoppingCallback" in warn_mock.call_args[0][0]
+
+ def test_stateful_control(self):
+ trainer = self.get_trainer(
+ max_steps=2,
+ save_strategy="steps",
+ save_steps=2,
+ )
+ trainer.train()
+ # Load it back in and verify values
+ trainer = self.get_trainer(max_steps=2, restore_callback_states_from_checkpoint=True)
+ checkpoint = os.path.join(self.output_dir, "checkpoint-2")
+ trainer.state = TrainerState.load_from_json(os.path.join(checkpoint, TRAINER_STATE_NAME))
+ trainer._load_callback_state()
+ assert trainer.control.should_training_stop
diff --git a/tests/trainer/test_trainer_distributed.py b/tests/trainer/test_trainer_distributed.py
index 8f867cf0beba..968f800174a6 100644
--- a/tests/trainer/test_trainer_distributed.py
+++ b/tests/trainer/test_trainer_distributed.py
@@ -153,7 +153,7 @@ def test_trainer(self):
{self.test_file_dir}/test_trainer_distributed.py
""".split()
output_dir = self.get_auto_remove_tmp_dir()
- args = f"--output_dir {output_dir}".split()
+ args = f"--output_dir {output_dir} --report_to none".split()
cmd = ["torchrun"] + distributed_args + args
execute_subprocess_async(cmd, env=self.get_env())
# successful return here == success - any errors would have caused an error in the sub-call
diff --git a/tests/trainer/test_trainer_seq2seq.py b/tests/trainer/test_trainer_seq2seq.py
index d8722c67836f..61d2163b9e81 100644
--- a/tests/trainer/test_trainer_seq2seq.py
+++ b/tests/trainer/test_trainer_seq2seq.py
@@ -119,6 +119,7 @@ def _compute_metrics(pred):
warmup_steps=0,
eval_steps=2,
logging_steps=2,
+ report_to="none",
)
# instantiate trainer
@@ -152,7 +153,7 @@ def test_return_sequences(self):
"google-t5/t5-small", max_length=None, min_length=None, max_new_tokens=256, min_new_tokens=1, num_beams=5
)
- training_args = Seq2SeqTrainingArguments(".", predict_with_generate=True)
+ training_args = Seq2SeqTrainingArguments(".", predict_with_generate=True, report_to="none")
trainer = Seq2SeqTrainer(
model=model,
@@ -160,6 +161,7 @@ def test_return_sequences(self):
tokenizer=tokenizer,
data_collator=data_collator,
compute_metrics=lambda x: {"samples": x[0].shape[0]},
+ report_to="none",
)
def prepare_data(examples):
@@ -191,7 +193,9 @@ def test_bad_generation_config_fail_early(self):
data_collator = DataCollatorForSeq2Seq(tokenizer, model=model, return_tensors="pt", padding="longest")
gen_config = GenerationConfig(do_sample=False, top_p=0.9) # bad: top_p is not compatible with do_sample=False
- training_args = Seq2SeqTrainingArguments(".", predict_with_generate=True, generation_config=gen_config)
+ training_args = Seq2SeqTrainingArguments(
+ ".", predict_with_generate=True, generation_config=gen_config, report_to="none"
+ )
with self.assertRaises(ValueError) as exc:
_ = Seq2SeqTrainer(
model=model,
diff --git a/tests/utils/test_image_utils.py b/tests/utils/test_image_utils.py
index d6bc9a375858..f360c4bb8253 100644
--- a/tests/utils/test_image_utils.py
+++ b/tests/utils/test_image_utils.py
@@ -13,6 +13,7 @@
# See the License for the specific language governing permissions and
# limitations under the License.
+import codecs
import os
import tempfile
import unittest
@@ -544,6 +545,23 @@ def test_load_img_base64(self):
self.assertEqual(img_arr.shape, (64, 32, 3))
+ def test_load_img_base64_encoded_bytes(self):
+ try:
+ tmp_file = tempfile.mktemp()
+ with open(tmp_file, "wb") as f:
+ http_get(
+ "https://huggingface.co/datasets/hf-internal-testing/dummy-base64-images/raw/main/image_2.txt", f
+ )
+
+ with codecs.open(tmp_file, encoding="unicode_escape") as b64:
+ img = load_image(b64.read())
+ img_arr = np.array(img)
+
+ finally:
+ os.remove(tmp_file)
+
+ self.assertEqual(img_arr.shape, (256, 256, 3))
+
def test_load_img_rgba(self):
# we use revision="refs/pr/1" until the PR is merged
# https://hf.co/datasets/hf-internal-testing/fixtures_image_utils/discussions/1
diff --git a/utils/check_copies.py b/utils/check_copies.py
index 60a2fac4c8f5..dd5d5c77dab6 100644
--- a/utils/check_copies.py
+++ b/utils/check_copies.py
@@ -858,7 +858,6 @@ def check_copies(overwrite: bool = False, file: str = None):
+ diff
+ "\nRun `make fix-copies` or `python utils/check_copies.py --fix_and_overwrite` to fix them."
)
- check_model_list_copy(overwrite=overwrite)
def check_full_copies(overwrite: bool = False):
@@ -1055,68 +1054,6 @@ def _find_text_in_file(filename: str, start_prompt: str, end_prompt: str) -> Tup
return "".join(lines[start_index:end_index]), start_index, end_index, lines
-def check_model_list_copy(overwrite: bool = False):
- """
- Check the model lists in the README is consistent with the ones in the other READMES and also with `index.nmd`.
-
- Args:
- overwrite (`bool`, *optional*, defaults to `False`):
- Whether or not to overwrite the copies when they don't match.
- """
- # Fix potential doc links in the README
- with open(os.path.join(REPO_PATH, "README.md"), "r", encoding="utf-8", newline="\n") as f:
- readme = f.read()
- new_readme = readme.replace("https://huggingface.co/transformers", "https://huggingface.co/docs/transformers")
- new_readme = new_readme.replace(
- "https://huggingface.co/docs/main/transformers", "https://huggingface.co/docs/transformers/main"
- )
- if new_readme != readme:
- if overwrite:
- with open(os.path.join(REPO_PATH, "README.md"), "w", encoding="utf-8", newline="\n") as f:
- f.write(new_readme)
- else:
- raise ValueError(
- "The main README contains wrong links to the documentation of Transformers. Run `make fix-copies` to "
- "automatically fix them."
- )
-
- md_list = get_model_list(
- filename="README.md",
- start_prompt=LOCALIZED_READMES["README.md"]["start_prompt"],
- end_prompt=LOCALIZED_READMES["README.md"]["end_prompt"],
- )
-
- # Build the converted Markdown.
- converted_md_lists = []
- for filename, value in LOCALIZED_READMES.items():
- _start_prompt = value["start_prompt"]
- _end_prompt = value["end_prompt"]
- _format_model_list = value["format_model_list"]
-
- localized_md_list = get_model_list(filename, _start_prompt, _end_prompt)
- readmes_match, converted_md_list = convert_to_localized_md(md_list, localized_md_list, _format_model_list)
-
- converted_md_lists.append((filename, readmes_match, converted_md_list, _start_prompt, _end_prompt))
-
- # Compare the converted Markdowns
- for converted_md_list in converted_md_lists:
- filename, readmes_match, converted_md, _start_prompt, _end_prompt = converted_md_list
-
- if filename == "README.md":
- continue
- if overwrite:
- _, start_index, end_index, lines = _find_text_in_file(
- filename=os.path.join(REPO_PATH, filename), start_prompt=_start_prompt, end_prompt=_end_prompt
- )
- with open(os.path.join(REPO_PATH, filename), "w", encoding="utf-8", newline="\n") as f:
- f.writelines(lines[:start_index] + [converted_md] + lines[end_index:])
- elif not readmes_match:
- raise ValueError(
- f"The model list in the README changed and the list in `{filename}` has not been updated. Run "
- "`make fix-copies` to fix this."
- )
-
-
# Map a model name with the name it has in the README for the check_readme check
SPECIAL_MODEL_NAMES = {
"Bert Generation": "BERT For Sequence Generation",
@@ -1160,60 +1097,11 @@ def check_model_list_copy(overwrite: bool = False):
)
-def check_readme(overwrite: bool = False):
- """
- Check if the main README contains all the models in the library or not.
-
- Args:
- overwrite (`bool`, *optional*, defaults to `False`):
- Whether or not to add an entry for the missing models using `README_TEMPLATE`.
- """
- info = LOCALIZED_READMES["README.md"]
- models, start_index, end_index, lines = _find_text_in_file(
- os.path.join(REPO_PATH, "README.md"),
- info["start_prompt"],
- info["end_prompt"],
- )
- models_in_readme = [re.search(r"\*\*\[([^\]]*)", line).groups()[0] for line in models.strip().split("\n")]
-
- model_names_mapping = transformers_module.models.auto.configuration_auto.MODEL_NAMES_MAPPING
- absents = [
- (key, name)
- for key, name in model_names_mapping.items()
- if SPECIAL_MODEL_NAMES.get(name, name) not in models_in_readme
- ]
- # Remove exceptions
- absents = [(key, name) for key, name in absents if name not in MODELS_NOT_IN_README]
- if len(absents) > 0 and not overwrite:
- print(absents)
- raise ValueError(
- "The main README doesn't contain all models, run `make fix-copies` to fill it with the missing model(s)"
- " then complete the generated entries.\nIf the model is not supposed to be in the main README, add it to"
- " the list `MODELS_NOT_IN_README` in utils/check_copies.py.\nIf it has a different name in the repo than"
- " in the README, map the correspondence in `SPECIAL_MODEL_NAMES` in utils/check_copies.py."
- )
-
- new_models = [README_TEMPLATE.format(model_name=name, model_type=key) for key, name in absents]
-
- all_models = models.strip().split("\n") + new_models
- all_models = sorted(all_models, key=lambda x: re.search(r"\*\*\[([^\]]*)", x).groups()[0].lower())
- all_models = "\n".join(all_models) + "\n"
-
- if all_models != models:
- if overwrite:
- print("Fixing the main README.")
- with open(os.path.join(REPO_PATH, "README.md"), "w", encoding="utf-8", newline="\n") as f:
- f.writelines(lines[:start_index] + [all_models] + lines[end_index:])
- else:
- raise ValueError("The main README model list is not properly sorted. Run `make fix-copies` to fix this.")
-
-
if __name__ == "__main__":
parser = argparse.ArgumentParser()
parser.add_argument("--file", type=str, default=None, help="A specific file to check and/or fix")
parser.add_argument("--fix_and_overwrite", action="store_true", help="Whether to fix inconsistencies.")
args = parser.parse_args()
- check_readme(args.fix_and_overwrite)
check_copies(args.fix_and_overwrite, args.file)
check_full_copies(args.fix_and_overwrite)
diff --git a/utils/check_docstrings.py b/utils/check_docstrings.py
index a58d08eccaf3..04572d132b9d 100644
--- a/utils/check_docstrings.py
+++ b/utils/check_docstrings.py
@@ -697,6 +697,8 @@
"TFSegformerModel",
"TFSpeech2TextForConditionalGeneration",
"TFSpeech2TextModel",
+ "TFSwiftFormerForImageClassification",
+ "TFSwiftFormerModel",
"TFSwinForImageClassification",
"TFSwinForMaskedImageModeling",
"TFSwinModel",
diff --git a/utils/check_inits.py b/utils/check_inits.py
index b9a637e6354b..19c23279b9b8 100644
--- a/utils/check_inits.py
+++ b/utils/check_inits.py
@@ -331,6 +331,7 @@ def get_transformers_submodules() -> List[str]:
"models.esm.openfold_utils",
"modeling_attn_mask_utils",
"safetensors_conversion",
+ "modeling_gguf_pytorch_utils",
]
diff --git a/utils/check_table.py b/utils/check_table.py
index 99031f025c85..9c9318ca8571 100644
--- a/utils/check_table.py
+++ b/utils/check_table.py
@@ -155,6 +155,7 @@ def _center_text(text: str, width: int) -> str:
"HerBERT": "BERT",
"LayoutXLM": "LayoutLMv2",
"Llama2": "LLaMA",
+ "Llama3": "LLaMA",
"MADLAD-400": "T5",
"MatCha": "Pix2Struct",
"mBART-50": "mBART",
diff --git a/utils/check_task_guides.py b/utils/check_task_guides.py
deleted file mode 100644
index b00ff1dc1a5a..000000000000
--- a/utils/check_task_guides.py
+++ /dev/null
@@ -1,168 +0,0 @@
-# coding=utf-8
-# Copyright 2023 The HuggingFace Inc. team.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-"""
-Utility that checks the list of models in the tips in the task-specific pages of the doc is up to date and potentially
-fixes it.
-
-Use from the root of the repo with:
-
-```bash
-python utils/check_task_guides.py
-```
-
-for a check that will error in case of inconsistencies (used by `make repo-consistency`).
-
-To auto-fix issues run:
-
-```bash
-python utils/check_task_guides.py --fix_and_overwrite
-```
-
-which is used by `make fix-copies`.
-"""
-import argparse
-import os
-
-from transformers.utils import direct_transformers_import
-
-
-# All paths are set with the intent you should run this script from the root of the repo with the command
-# python utils/check_task_guides.py
-TRANSFORMERS_PATH = "src/transformers"
-PATH_TO_TASK_GUIDES = "docs/source/en/tasks"
-
-
-def _find_text_in_file(filename: str, start_prompt: str, end_prompt: str) -> str:
- """
- Find the text in filename between two prompts.
-
- Args:
- filename (`str`): The file to search into.
- start_prompt (`str`): A string to look for at the start of the content searched.
- end_prompt (`str`): A string that will mark the end of the content to look for.
-
- Returns:
- `str`: The content between the prompts.
- """
- with open(filename, "r", encoding="utf-8", newline="\n") as f:
- lines = f.readlines()
- # Find the start prompt.
- start_index = 0
- while not lines[start_index].startswith(start_prompt):
- start_index += 1
- start_index += 1
-
- # Now go until the end prompt.
- end_index = start_index
- while not lines[end_index].startswith(end_prompt):
- end_index += 1
- end_index -= 1
-
- while len(lines[start_index]) <= 1:
- start_index += 1
- while len(lines[end_index]) <= 1:
- end_index -= 1
- end_index += 1
- return "".join(lines[start_index:end_index]), start_index, end_index, lines
-
-
-# This is to make sure the transformers module imported is the one in the repo.
-transformers_module = direct_transformers_import(TRANSFORMERS_PATH)
-
-# Map between a task guide and the corresponding auto class.
-TASK_GUIDE_TO_MODELS = {
- "asr.md": transformers_module.models.auto.modeling_auto.MODEL_FOR_CTC_MAPPING_NAMES,
- "audio_classification.md": transformers_module.models.auto.modeling_auto.MODEL_FOR_AUDIO_CLASSIFICATION_MAPPING_NAMES,
- "language_modeling.md": transformers_module.models.auto.modeling_auto.MODEL_FOR_CAUSAL_LM_MAPPING_NAMES,
- "image_classification.md": transformers_module.models.auto.modeling_auto.MODEL_FOR_IMAGE_CLASSIFICATION_MAPPING_NAMES,
- "masked_language_modeling.md": transformers_module.models.auto.modeling_auto.MODEL_FOR_MASKED_LM_MAPPING_NAMES,
- "multiple_choice.md": transformers_module.models.auto.modeling_auto.MODEL_FOR_MULTIPLE_CHOICE_MAPPING_NAMES,
- "object_detection.md": transformers_module.models.auto.modeling_auto.MODEL_FOR_OBJECT_DETECTION_MAPPING_NAMES,
- "question_answering.md": transformers_module.models.auto.modeling_auto.MODEL_FOR_QUESTION_ANSWERING_MAPPING_NAMES,
- "semantic_segmentation.md": transformers_module.models.auto.modeling_auto.MODEL_FOR_SEMANTIC_SEGMENTATION_MAPPING_NAMES,
- "sequence_classification.md": transformers_module.models.auto.modeling_auto.MODEL_FOR_SEQUENCE_CLASSIFICATION_MAPPING_NAMES,
- "summarization.md": transformers_module.models.auto.modeling_auto.MODEL_FOR_SEQ_TO_SEQ_CAUSAL_LM_MAPPING_NAMES,
- "token_classification.md": transformers_module.models.auto.modeling_auto.MODEL_FOR_TOKEN_CLASSIFICATION_MAPPING_NAMES,
- "translation.md": transformers_module.models.auto.modeling_auto.MODEL_FOR_SEQ_TO_SEQ_CAUSAL_LM_MAPPING_NAMES,
- "video_classification.md": transformers_module.models.auto.modeling_auto.MODEL_FOR_VIDEO_CLASSIFICATION_MAPPING_NAMES,
- "document_question_answering.md": transformers_module.models.auto.modeling_auto.MODEL_FOR_DOCUMENT_QUESTION_ANSWERING_MAPPING_NAMES,
- "monocular_depth_estimation.md": transformers_module.models.auto.modeling_auto.MODEL_FOR_DEPTH_ESTIMATION_MAPPING_NAMES,
-}
-
-# This list contains model types used in some task guides that are not in `CONFIG_MAPPING_NAMES` (therefore not in any
-# `MODEL_MAPPING_NAMES` or any `MODEL_FOR_XXX_MAPPING_NAMES`).
-SPECIAL_TASK_GUIDE_TO_MODEL_TYPES = {
- "summarization.md": ("nllb",),
- "translation.md": ("nllb",),
-}
-
-
-def get_model_list_for_task(task_guide: str) -> str:
- """
- Return the list of models supporting a given task.
-
- Args:
- task_guide (`str`): The name of the task guide to check.
-
- Returns:
- `str`: The list of models supporting this task, as links to their respective doc pages separated by commas.
- """
- model_maping_names = TASK_GUIDE_TO_MODELS[task_guide]
- special_model_types = SPECIAL_TASK_GUIDE_TO_MODEL_TYPES.get(task_guide, set())
- model_names = {
- code: name
- for code, name in transformers_module.MODEL_NAMES_MAPPING.items()
- if (code in model_maping_names or code in special_model_types)
- }
- return ", ".join([f"[{name}](../model_doc/{code})" for code, name in model_names.items()]) + "\n"
-
-
-def check_model_list_for_task(task_guide: str, overwrite: bool = False):
- """
- For a given task guide, checks the model list in the generated tip for consistency with the state of the lib and
- updates it if needed.
-
- Args:
- task_guide (`str`):
- The name of the task guide to check.
- overwrite (`bool`, *optional*, defaults to `False`):
- Whether or not to overwrite the table when it's not up to date.
- """
- current_list, start_index, end_index, lines = _find_text_in_file(
- filename=os.path.join(PATH_TO_TASK_GUIDES, task_guide),
- start_prompt="",
- end_prompt="",
- )
-
- new_list = get_model_list_for_task(task_guide)
-
- if current_list != new_list:
- if overwrite:
- with open(os.path.join(PATH_TO_TASK_GUIDES, task_guide), "w", encoding="utf-8", newline="\n") as f:
- f.writelines(lines[:start_index] + [new_list] + lines[end_index:])
- else:
- raise ValueError(
- f"The list of models that can be used in the {task_guide} guide needs an update. Run `make fix-copies`"
- " to fix this."
- )
-
-
-if __name__ == "__main__":
- parser = argparse.ArgumentParser()
- parser.add_argument("--fix_and_overwrite", action="store_true", help="Whether to fix inconsistencies.")
- args = parser.parse_args()
-
- for task_guide in TASK_GUIDE_TO_MODELS.keys():
- check_model_list_for_task(task_guide, args.fix_and_overwrite)
diff --git a/utils/deprecate_models.py b/utils/deprecate_models.py
new file mode 100644
index 000000000000..d5160e938420
--- /dev/null
+++ b/utils/deprecate_models.py
@@ -0,0 +1,357 @@
+"""
+Script which deprecates a list of given models
+
+Example usage:
+python utils/deprecate_models.py --models bert distilbert
+"""
+
+import argparse
+import os
+from collections import defaultdict
+from pathlib import Path
+from typing import Optional, Tuple
+
+import requests
+from custom_init_isort import sort_imports_in_all_inits
+from git import Repo
+from packaging import version
+
+from transformers import CONFIG_MAPPING, logging
+from transformers import __version__ as current_version
+
+
+REPO_PATH = Path(os.path.abspath(os.path.dirname(os.path.dirname(__file__))))
+repo = Repo(REPO_PATH)
+
+logger = logging.get_logger(__name__)
+
+
+def get_last_stable_minor_release():
+ # Get the last stable release of transformers
+ url = "https://pypi.org/pypi/transformers/json"
+ release_data = requests.get(url).json()
+
+ # Find the last stable release of of transformers (version below current version)
+ major_version, minor_version, patch_version, _ = current_version.split(".")
+ last_major_minor = f"{major_version}.{int(minor_version) - 1}"
+ last_stable_minor_releases = [
+ release for release in release_data["releases"] if release.startswith(last_major_minor)
+ ]
+ last_stable_release = sorted(last_stable_minor_releases, key=version.parse)[-1]
+
+ return last_stable_release
+
+
+def build_tip_message(last_stable_release):
+ return (
+ """
+
+
+ This model is in maintenance mode only, we don't accept any new PRs changing its code.
+ """
+ + f"""If you run into any issues running this model, please reinstall the last version that supported this model: v{last_stable_release}.
+ You can do so by running the following command: `pip install -U transformers=={last_stable_release}`.
+
+ """
+ )
+
+
+def insert_tip_to_model_doc(model_doc_path, tip_message):
+ tip_message_lines = tip_message.split("\n")
+
+ with open(model_doc_path, "r") as f:
+ model_doc = f.read()
+
+ # Add the tip message to the model doc page directly underneath the title
+ lines = model_doc.split("\n")
+
+ new_model_lines = []
+ for line in lines:
+ if line.startswith("# "):
+ new_model_lines.append(line)
+ new_model_lines.extend(tip_message_lines)
+ else:
+ new_model_lines.append(line)
+
+ with open(model_doc_path, "w") as f:
+ f.write("\n".join(new_model_lines))
+
+
+def get_model_doc_path(model: str) -> Tuple[Optional[str], Optional[str]]:
+ # Possible variants of the model name in the model doc path
+ model_doc_paths = [
+ REPO_PATH / f"docs/source/en/model_doc/{model}.md",
+ # Try replacing _ with - in the model name
+ REPO_PATH / f"docs/source/en/model_doc/{model.replace('_', '-')}.md",
+ # Try replacing _ with "" in the model name
+ REPO_PATH / f"docs/source/en/model_doc/{model.replace('_', '')}.md",
+ ]
+
+ for model_doc_path in model_doc_paths:
+ if os.path.exists(model_doc_path):
+ return model_doc_path, model
+
+ return None, None
+
+
+def extract_model_info(model):
+ model_info = {}
+ model_doc_path, model_doc_name = get_model_doc_path(model)
+ model_path = REPO_PATH / f"src/transformers/models/{model}"
+
+ if model_doc_path is None:
+ print(f"Model doc path does not exist for {model}")
+ return None
+ model_info["model_doc_path"] = model_doc_path
+ model_info["model_doc_name"] = model_doc_name
+
+ if not os.path.exists(model_path):
+ print(f"Model path does not exist for {model}")
+ return None
+ model_info["model_path"] = model_path
+
+ return model_info
+
+
+def update_relative_imports(filename, model):
+ with open(filename, "r") as f:
+ filelines = f.read()
+
+ new_file_lines = []
+ for line in filelines.split("\n"):
+ if line.startswith("from .."):
+ new_file_lines.append(line.replace("from ..", "from ..."))
+ else:
+ new_file_lines.append(line)
+
+ with open(filename, "w") as f:
+ f.write("\n".join(new_file_lines))
+
+
+def move_model_files_to_deprecated(model):
+ model_path = REPO_PATH / f"src/transformers/models/{model}"
+ deprecated_model_path = REPO_PATH / f"src/transformers/models/deprecated/{model}"
+
+ if not os.path.exists(deprecated_model_path):
+ os.makedirs(deprecated_model_path)
+
+ for file in os.listdir(model_path):
+ if file == "__pycache__":
+ continue
+ repo.git.mv(f"{model_path}/{file}", f"{deprecated_model_path}/{file}")
+
+ # For deprecated files, we then need to update the relative imports
+ update_relative_imports(f"{deprecated_model_path}/{file}", model)
+
+
+def delete_model_tests(model):
+ tests_path = REPO_PATH / f"tests/models/{model}"
+
+ if os.path.exists(tests_path):
+ repo.git.rm("-r", tests_path)
+
+
+def get_line_indent(s):
+ return len(s) - len(s.lstrip())
+
+
+def update_main_init_file(models):
+ """
+ Replace all instances of model.model_name with model.deprecated.model_name in the __init__.py file
+
+ Args:
+ models (List[str]): The models to mark as deprecated
+ """
+ filename = REPO_PATH / "src/transformers/__init__.py"
+ with open(filename, "r") as f:
+ init_file = f.read()
+
+ # 1. For each model, find all the instances of model.model_name and replace with model.deprecated.model_name
+ for model in models:
+ init_file = init_file.replace(f"models.{model}", f"models.deprecated.{model}")
+
+ with open(filename, "w") as f:
+ f.write(init_file)
+
+ # 2. Resort the imports
+ sort_imports_in_all_inits(check_only=False)
+
+
+def remove_model_references_from_file(filename, models, condition):
+ """
+ Remove all references to the given models from the given file
+
+ Args:
+ filename (str): The file to remove the references from
+ models (List[str]): The models to remove
+ condition (Callable): A function that takes the line and model and returns True if the line should be removed
+ """
+ with open(filename, "r") as f:
+ init_file = f.read()
+
+ new_file_lines = []
+ for i, line in enumerate(init_file.split("\n")):
+ if any(condition(line, model) for model in models):
+ continue
+ new_file_lines.append(line)
+
+ with open(filename, "w") as f:
+ f.write("\n".join(new_file_lines))
+
+
+def remove_model_config_classes_from_config_check(model_config_classes):
+ """
+ Remove the deprecated model config classes from the check_config_attributes.py file
+
+ Args:
+ model_config_classes (List[str]): The model config classes to remove e.g. ["BertConfig", "DistilBertConfig"]
+ """
+ filename = REPO_PATH / "utils/check_config_attributes.py"
+ with open(filename, "r") as f:
+ check_config_attributes = f.read()
+
+ # Keep track as we have to delete comment above too
+ in_special_cases_to_allow = False
+ in_indent = False
+ new_file_lines = []
+
+ for line in check_config_attributes.split("\n"):
+ indent = get_line_indent(line)
+ if (line.strip() == "SPECIAL_CASES_TO_ALLOW = {") or (line.strip() == "SPECIAL_CASES_TO_ALLOW.update("):
+ in_special_cases_to_allow = True
+
+ elif in_special_cases_to_allow and indent == 0 and line.strip() in ("}", ")"):
+ in_special_cases_to_allow = False
+
+ if in_indent:
+ if line.strip().endswith(("]", "],")):
+ in_indent = False
+ continue
+
+ if in_special_cases_to_allow and any(
+ model_config_class in line for model_config_class in model_config_classes
+ ):
+ # Remove comments above the model config class to remove
+ while new_file_lines[-1].strip().startswith("#"):
+ new_file_lines.pop()
+
+ if line.strip().endswith("["):
+ in_indent = True
+
+ continue
+
+ elif any(model_config_class in line for model_config_class in model_config_classes):
+ continue
+
+ new_file_lines.append(line)
+
+ with open(filename, "w") as f:
+ f.write("\n".join(new_file_lines))
+
+
+def add_models_to_deprecated_models_in_config_auto(models):
+ """
+ Add the models to the DEPRECATED_MODELS list in configuration_auto.py and sorts the list
+ to be in alphabetical order.
+ """
+ filepath = REPO_PATH / "src/transformers/models/auto/configuration_auto.py"
+ with open(filepath, "r") as f:
+ config_auto = f.read()
+
+ new_file_lines = []
+ deprecated_models_list = []
+ in_deprecated_models = False
+ for line in config_auto.split("\n"):
+ if line.strip() == "DEPRECATED_MODELS = [":
+ in_deprecated_models = True
+ new_file_lines.append(line)
+ elif in_deprecated_models and line.strip() == "]":
+ in_deprecated_models = False
+ # Add the new models to deprecated models list
+ deprecated_models_list.extend([f'"{model},"' for model in models])
+ # Sort so they're in alphabetical order in the file
+ deprecated_models_list = sorted(deprecated_models_list)
+ new_file_lines.extend(deprecated_models_list)
+ # Make sure we still have the closing bracket
+ new_file_lines.append(line)
+ elif in_deprecated_models:
+ deprecated_models_list.append(line.strip())
+ else:
+ new_file_lines.append(line)
+
+ with open(filepath, "w") as f:
+ f.write("\n".join(new_file_lines))
+
+
+def deprecate_models(models):
+ # Get model info
+ skipped_models = []
+ models_info = defaultdict(dict)
+ for model in models:
+ single_model_info = extract_model_info(model)
+ if single_model_info is None:
+ skipped_models.append(model)
+ else:
+ models_info[model] = single_model_info
+
+ model_config_classes = []
+ for model, model_info in models_info.items():
+ if model in CONFIG_MAPPING:
+ model_config_classes.append(CONFIG_MAPPING[model].__name__)
+ elif model_info["model_doc_name"] in CONFIG_MAPPING:
+ model_config_classes.append(CONFIG_MAPPING[model_info["model_doc_name"]].__name__)
+ else:
+ skipped_models.append(model)
+ print(f"Model config class not found for model: {model}")
+
+ # Filter out skipped models
+ models = [model for model in models if model not in skipped_models]
+
+ if skipped_models:
+ print(f"Skipped models: {skipped_models} as the model doc or model path could not be found.")
+ print(f"Models to deprecate: {models}")
+
+ # Remove model config classes from config check
+ print("Removing model config classes from config checks")
+ remove_model_config_classes_from_config_check(model_config_classes)
+
+ tip_message = build_tip_message(get_last_stable_minor_release())
+
+ for model, model_info in models_info.items():
+ print(f"Processing model: {model}")
+ # Add the tip message to the model doc page directly underneath the title
+ print("Adding tip message to model doc page")
+ insert_tip_to_model_doc(model_info["model_doc_path"], tip_message)
+
+ # Move the model file to deprecated: src/transfomers/models/model -> src/transformers/models/deprecated/model
+ print("Moving model files to deprecated for model")
+ move_model_files_to_deprecated(model)
+
+ # Delete the model tests: tests/models/model
+ print("Deleting model tests")
+ delete_model_tests(model)
+
+ # # We do the following with all models passed at once to avoid having to re-write the file multiple times
+ print("Updating __init__.py file to point to the deprecated models")
+ update_main_init_file(models)
+
+ # Remove model references from other files
+ print("Removing model references from other files")
+ remove_model_references_from_file(
+ "src/transformers/models/__init__.py", models, lambda line, model: model == line.strip().strip(",")
+ )
+ remove_model_references_from_file(
+ "utils/slow_documentation_tests.txt", models, lambda line, model: "/" + model + "/" in line
+ )
+ remove_model_references_from_file("utils/not_doctested.txt", models, lambda line, model: "/" + model + "/" in line)
+
+ # Add models to DEPRECATED_MODELS in the configuration_auto.py
+ print("Adding models to DEPRECATED_MODELS in configuration_auto.py")
+ add_models_to_deprecated_models_in_config_auto(models)
+
+
+if __name__ == "__main__":
+ parser = argparse.ArgumentParser()
+ parser.add_argument("--models", nargs="+", help="List of models to deprecate")
+ args = parser.parse_args()
+ deprecate_models(args.models)
diff --git a/utils/get_previous_daily_ci.py b/utils/get_previous_daily_ci.py
index 4e4cb0a8c10d..975c6f339820 100644
--- a/utils/get_previous_daily_ci.py
+++ b/utils/get_previous_daily_ci.py
@@ -14,8 +14,11 @@ def get_daily_ci_runs(token, num_runs=7):
if token is not None:
headers = {"Accept": "application/vnd.github+json", "Authorization": f"Bearer {token}"}
- # The id of a workflow (not of a workflow run)
- workflow_id = "636036"
+ # The id of a workflow (not of a workflow run).
+ # From a given workflow run (where we have workflow run id), we can get the workflow id by going to
+ # https://api.github.com/repos/huggingface/transformers/actions/runs/{workflow_run_id}
+ # and check the `workflow_id` key.
+ workflow_id = "90575235"
url = f"https://api.github.com/repos/huggingface/transformers/actions/workflows/{workflow_id}/runs"
# On `main` branch + event being `schedule` + not returning PRs + only `num_runs` results
diff --git a/utils/models_to_deprecate.py b/utils/models_to_deprecate.py
new file mode 100644
index 000000000000..ebdecf22eb8a
--- /dev/null
+++ b/utils/models_to_deprecate.py
@@ -0,0 +1,199 @@
+# Copyright 2024 The HuggingFace Team. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""
+Script to find a candidate list of models to deprecate based on the number of downloads and the date of the last commit.
+"""
+import argparse
+import glob
+import json
+import os
+from collections import defaultdict
+from datetime import datetime, timezone
+from pathlib import Path
+
+from git import Repo
+from huggingface_hub import HfApi
+
+
+api = HfApi()
+
+PATH_TO_REPO = Path(__file__).parent.parent.resolve()
+repo = Repo(PATH_TO_REPO)
+
+
+class HubModelLister:
+ """
+ Utility for getting models from the hub based on tags. Handles errors without crashing the script.
+ """
+
+ def __init__(self, tags):
+ self.tags = tags
+ self.model_list = api.list_models(tags=tags)
+
+ def __iter__(self):
+ try:
+ yield from self.model_list
+ except Exception as e:
+ print(f"Error: {e}")
+ return
+
+
+def _extract_commit_hash(commits):
+ for commit in commits:
+ if commit.startswith("commit "):
+ return commit.split(" ")[1]
+ return ""
+
+
+def get_list_of_repo_model_paths(models_dir):
+ # Get list of all models in the library
+ models = glob.glob(os.path.join(models_dir, "*/modeling_*.py"))
+
+ # Remove flax and tf models
+ models = [model for model in models if "_flax_" not in model]
+ models = [model for model in models if "_tf_" not in model]
+
+ # Get list of all deprecated models in the library
+ deprecated_models = glob.glob(os.path.join(models_dir, "deprecated", "*"))
+ # For each deprecated model, remove the deprecated models from the list of all models as well as the symlink path
+ for deprecated_model in deprecated_models:
+ deprecated_model_name = "/" + deprecated_model.split("/")[-1] + "/"
+ models = [model for model in models if deprecated_model_name not in model]
+ # Remove deprecated models
+ models = [model for model in models if "/deprecated" not in model]
+ # Remove auto
+ models = [model for model in models if "/auto/" not in model]
+ return models
+
+
+def get_list_of_models_to_deprecate(
+ thresh_num_downloads=5_000,
+ thresh_date=None,
+ use_cache=False,
+ save_model_info=False,
+ max_num_models=-1,
+):
+ if thresh_date is None:
+ thresh_date = datetime.now(timezone.utc).replace(year=datetime.now(timezone.utc).year - 1)
+ else:
+ thresh_date = datetime.strptime(thresh_date, "%Y-%m-%d").replace(tzinfo=timezone.utc)
+
+ models_dir = PATH_TO_REPO / "src/transformers/models"
+ model_paths = get_list_of_repo_model_paths(models_dir=models_dir)
+
+ if use_cache and os.path.exists("models_info.json"):
+ with open("models_info.json", "r") as f:
+ models_info = json.load(f)
+ # Convert datetimes back to datetime objects
+ for model, info in models_info.items():
+ info["first_commit_datetime"] = datetime.fromisoformat(info["first_commit_datetime"])
+
+ else:
+ # Build a dictionary of model info: first commit datetime, commit hash, model path
+ models_info = defaultdict(dict)
+ for model_path in model_paths:
+ model = model_path.split("/")[-2]
+ if model in models_info:
+ continue
+ commits = repo.git.log("--diff-filter=A", "--", model_path).split("\n")
+ commit_hash = _extract_commit_hash(commits)
+ commit_obj = repo.commit(commit_hash)
+ committed_datetime = commit_obj.committed_datetime
+ models_info[model]["commit_hash"] = commit_hash
+ models_info[model]["first_commit_datetime"] = committed_datetime
+ models_info[model]["model_path"] = model_path
+ models_info[model]["downloads"] = 0
+
+ # Some tags on the hub are formatted differently than in the library
+ tags = [model]
+ if "_" in model:
+ tags.append(model.replace("_", "-"))
+ models_info[model]["tags"] = tags
+
+ # Filter out models which were added less than a year ago
+ models_info = {
+ model: info for model, info in models_info.items() if info["first_commit_datetime"] < thresh_date
+ }
+
+ # We make successive calls to the hub, filtering based on the model tags
+ n_seen = 0
+ for model, model_info in models_info.items():
+ for model_tag in model_info["tags"]:
+ model_list = HubModelLister(tags=model_tag)
+ for i, hub_model in enumerate(model_list):
+ n_seen += 1
+ if i % 100 == 0:
+ print(f"Processing model {i} for tag {model_tag}")
+ if max_num_models != -1 and i > n_seen:
+ break
+ if hub_model.private:
+ continue
+ model_info["downloads"] += hub_model.downloads
+
+ if save_model_info and not (use_cache and os.path.exists("models_info.json")):
+ # Make datetimes serializable
+ for model, info in models_info.items():
+ info["first_commit_datetime"] = info["first_commit_datetime"].isoformat()
+ with open("models_info.json", "w") as f:
+ json.dump(models_info, f, indent=4)
+
+ print("\nFinding models to deprecate:")
+ n_models_to_deprecate = 0
+ models_to_deprecate = {}
+ for model, info in models_info.items():
+ n_downloads = info["downloads"]
+ if n_downloads < thresh_num_downloads:
+ n_models_to_deprecate += 1
+ models_to_deprecate[model] = info
+ print(f"\nModel: {model}")
+ print(f"Downloads: {n_downloads}")
+ print(f"Date: {info['first_commit_datetime']}")
+ print("\nModels to deprecate: ", "\n" + "\n".join(models_to_deprecate.keys()))
+ print(f"\nNumber of models to deprecate: {n_models_to_deprecate}")
+ print("Before deprecating make sure to verify the models, including if they're used as a module in other models.")
+
+
+if __name__ == "__main__":
+ parser = argparse.ArgumentParser()
+ parser.add_argument("--save_model_info", action="store_true", help="Save the retrieved model info to a json file.")
+ parser.add_argument(
+ "--use_cache", action="store_true", help="Use the cached model info instead of calling the hub."
+ )
+ parser.add_argument(
+ "--thresh_num_downloads",
+ type=int,
+ default=5_000,
+ help="Threshold number of downloads below which a model should be deprecated. Default is 5,000.",
+ )
+ parser.add_argument(
+ "--thresh_date",
+ type=str,
+ default=None,
+ help="Date to consider the first commit from. Format: YYYY-MM-DD. If unset, defaults to one year ago from today.",
+ )
+ parser.add_argument(
+ "--max_num_models",
+ type=int,
+ default=-1,
+ help="Maximum number of models to consider from the hub. -1 means all models. Useful for testing.",
+ )
+ args = parser.parse_args()
+
+ models_to_deprecate = get_list_of_models_to_deprecate(
+ thresh_num_downloads=args.thresh_num_downloads,
+ thresh_date=args.thresh_date,
+ use_cache=args.use_cache,
+ save_model_info=args.save_model_info,
+ max_num_models=args.max_num_models,
+ )
diff --git a/utils/not_doctested.txt b/utils/not_doctested.txt
index 1869836909e6..3ffc2740606e 100644
--- a/utils/not_doctested.txt
+++ b/utils/not_doctested.txt
@@ -2,7 +2,8 @@ docs/source/en/_config.py
docs/source/en/accelerate.md
docs/source/en/add_new_model.md
docs/source/en/add_new_pipeline.md
-docs/source/en/add_tensorflow_model.md
+docs/source/en/agents.md
+docs/source/en/agents.md
docs/source/en/attention.md
docs/source/en/benchmarks.md
docs/source/en/bertology.md
@@ -11,7 +12,6 @@ docs/source/en/community.md
docs/source/en/contributing.md
docs/source/en/create_a_model.md
docs/source/en/custom_models.md
-docs/source/en/custom_tools.md
docs/source/en/debugging.md
docs/source/en/fast_tokenizers.md
docs/source/en/glossary.md
@@ -325,10 +325,20 @@ docs/source/en/tflite.md
docs/source/en/tokenizer_summary.md
docs/source/en/torchscript.md
docs/source/en/training.md
-docs/source/en/transformers_agents.md
docs/source/en/troubleshooting.md
src/transformers/activations.py
src/transformers/activations_tf.py
+src/transformers/agents/agent_types.py
+src/transformers/agents/agents.py
+src/transformers/agents/document_question_answering.py
+src/transformers/agents/evaluate_agent.py
+src/transformers/agents/image_question_answering.py
+src/transformers/agents/prompts.py
+src/transformers/agents/python_interpreter.py
+src/transformers/agents/speech_to_text.py
+src/transformers/agents/text_to_speech.py
+src/transformers/agents/tools.py
+src/transformers/agents/translation.py
src/transformers/audio_utils.py
src/transformers/benchmark/benchmark.py
src/transformers/benchmark/benchmark_args.py
@@ -336,7 +346,6 @@ src/transformers/benchmark/benchmark_args_tf.py
src/transformers/benchmark/benchmark_args_utils.py
src/transformers/benchmark/benchmark_tf.py
src/transformers/benchmark/benchmark_utils.py
-src/transformers/commands/add_new_model.py
src/transformers/commands/add_new_model_like.py
src/transformers/commands/convert.py
src/transformers/commands/download.py
@@ -976,22 +985,6 @@ src/transformers/time_series_utils.py
src/transformers/tokenization_utils.py
src/transformers/tokenization_utils_base.py
src/transformers/tokenization_utils_fast.py
-src/transformers/tools/agent_types.py
-src/transformers/tools/agents.py
-src/transformers/tools/base.py
-src/transformers/tools/document_question_answering.py
-src/transformers/tools/evaluate_agent.py
-src/transformers/tools/image_captioning.py
-src/transformers/tools/image_question_answering.py
-src/transformers/tools/image_segmentation.py
-src/transformers/tools/prompts.py
-src/transformers/tools/python_interpreter.py
-src/transformers/tools/speech_to_text.py
-src/transformers/tools/text_classification.py
-src/transformers/tools/text_question_answering.py
-src/transformers/tools/text_summarization.py
-src/transformers/tools/text_to_speech.py
-src/transformers/tools/translation.py
src/transformers/trainer.py
src/transformers/trainer_callback.py
src/transformers/trainer_pt_utils.py
diff --git a/utils/notification_service.py b/utils/notification_service.py
index 158e01942b81..cf126cd68a33 100644
--- a/utils/notification_service.py
+++ b/utils/notification_service.py
@@ -416,7 +416,7 @@ def per_model_sum(model_category_dict):
reports=sorted_model_reports,
to_truncate=False,
)
- file_path = os.path.join(os.getcwd(), "prev_ci_results/model_failures_report.txt")
+ file_path = os.path.join(os.getcwd(), f"ci_results_{job_name}/model_failures_report.txt")
with open(file_path, "w", encoding="UTF-8") as fp:
fp.write(model_failures_report)
@@ -426,18 +426,18 @@ def per_model_sum(model_category_dict):
reports=sorted_module_reports,
to_truncate=False,
)
- file_path = os.path.join(os.getcwd(), "prev_ci_results/module_failures_report.txt")
+ file_path = os.path.join(os.getcwd(), f"ci_results_{job_name}/module_failures_report.txt")
with open(file_path, "w", encoding="UTF-8") as fp:
fp.write(module_failures_report)
if self.prev_ci_artifacts is not None:
- # if the last run produces artifact named `prev_ci_results`
+ # if the last run produces artifact named `ci_results_{job_name}`
if (
- "prev_ci_results" in self.prev_ci_artifacts
- and "model_failures_report.txt" in self.prev_ci_artifacts["prev_ci_results"]
+ f"ci_results_{job_name}" in self.prev_ci_artifacts
+ and "model_failures_report.txt" in self.prev_ci_artifacts[f"ci_results_{job_name}"]
):
# Compute the difference of the previous/current (model failure) table
- prev_model_failures = self.prev_ci_artifacts["prev_ci_results"]["model_failures_report.txt"]
+ prev_model_failures = self.prev_ci_artifacts[f"ci_results_{job_name}"]["model_failures_report.txt"]
entries_changed = self.compute_diff_for_failure_reports(model_failures_report, prev_model_failures)
if len(entries_changed) > 0:
# Save the complete difference
@@ -447,7 +447,7 @@ def per_model_sum(model_category_dict):
reports=entries_changed,
to_truncate=False,
)
- file_path = os.path.join(os.getcwd(), "prev_ci_results/changed_model_failures_report.txt")
+ file_path = os.path.join(os.getcwd(), f"ci_results_{job_name}/changed_model_failures_report.txt")
with open(file_path, "w", encoding="UTF-8") as fp:
fp.write(diff_report)
@@ -644,10 +644,10 @@ def get_new_model_failure_blocks(self, with_header=True):
prev_model_results = {}
if (
- "prev_ci_results" in self.prev_ci_artifacts
- and "model_results.json" in self.prev_ci_artifacts["prev_ci_results"]
+ f"ci_results_{job_name}" in self.prev_ci_artifacts
+ and "model_results.json" in self.prev_ci_artifacts[f"ci_results_{job_name}"]
):
- prev_model_results = json.loads(self.prev_ci_artifacts["prev_ci_results"]["model_results.json"])
+ prev_model_results = json.loads(self.prev_ci_artifacts[f"ci_results_{job_name}"]["model_results.json"])
all_failure_lines = {}
for job, job_result in sorted_dict:
@@ -992,13 +992,13 @@ def prepare_reports(title, header, reports, to_truncate=True):
"job_link": {},
}
for model in models
- if f"run_all_tests_gpu_{model}_test_reports" in available_artifacts
+ if f"run_models_gpu_{model}_test_reports" in available_artifacts
}
unclassified_model_failures = []
for model in model_results.keys():
- for artifact_path in available_artifacts[f"run_all_tests_gpu_{model}_test_reports"].paths:
+ for artifact_path in available_artifacts[f"run_models_gpu_{model}_test_reports"].paths:
artifact = retrieve_artifact(artifact_path["path"], artifact_path["gpu"])
if "stats" in artifact:
# Link to the GitHub Action job
@@ -1052,10 +1052,10 @@ def prepare_reports(title, header, reports, to_truncate=True):
# Additional runs
additional_files = {
- "PyTorch pipelines": "run_tests_torch_pipeline_gpu",
- "TensorFlow pipelines": "run_tests_tf_pipeline_gpu",
- "Examples directory": "run_examples_gpu",
- "Torch CUDA extension tests": "run_tests_torch_cuda_extensions_gpu_test_reports",
+ "PyTorch pipelines": "run_pipelines_torch_gpu_test_reports",
+ "TensorFlow pipelines": "run_pipelines_tf_gpu_test_reports",
+ "Examples directory": "run_examples_gpu_test_reports",
+ "Torch CUDA extension tests": "run_torch_cuda_extensions_gpu_test_reports",
}
if ci_event in ["push", "Nightly CI"] or ci_event.startswith("Past CI"):
@@ -1075,7 +1075,7 @@ def prepare_reports(title, header, reports, to_truncate=True):
"run_pipelines_torch_gpu": "PyTorch pipelines",
"run_pipelines_tf_gpu": "TensorFlow pipelines",
"run_examples_gpu": "Examples directory",
- "run_all_tests_torch_cuda_extensions_gpu": "Torch CUDA extension tests",
+ "run_torch_cuda_extensions_gpu": "Torch CUDA extension tests",
}
# Remove some entries in `additional_files` if they are not concerned.
@@ -1133,29 +1133,41 @@ def prepare_reports(title, header, reports, to_truncate=True):
)
# Let's only check the warning for the model testing job. Currently, the job `run_extract_warnings` is only run
- # when `inputs.job` (in the workflow file) is `run_tests_gpu`. The reason is: otherwise we need to save several
+ # when `inputs.job` (in the workflow file) is `run_models_gpu`. The reason is: otherwise we need to save several
# artifacts with different names which complicates the logic for an insignificant part of the CI workflow reporting.
selected_warnings = []
- if job_name == "run_tests_gpu":
+ if job_name == "run_models_gpu":
if "warnings_in_ci" in available_artifacts:
directory = available_artifacts["warnings_in_ci"].paths[0]["path"]
with open(os.path.join(directory, "selected_warnings.json")) as fp:
selected_warnings = json.load(fp)
- if not os.path.isdir(os.path.join(os.getcwd(), "prev_ci_results")):
- os.makedirs(os.path.join(os.getcwd(), "prev_ci_results"))
+ if not os.path.isdir(os.path.join(os.getcwd(), f"ci_results_{job_name}")):
+ os.makedirs(os.path.join(os.getcwd(), f"ci_results_{job_name}"))
# Only the model testing job is concerned: this condition is to avoid other jobs to upload the empty list as
# results.
- if job_name == "run_tests_gpu":
- with open("prev_ci_results/model_results.json", "w", encoding="UTF-8") as fp:
+ if job_name == "run_models_gpu":
+ with open(f"ci_results_{job_name}/model_results.json", "w", encoding="UTF-8") as fp:
json.dump(model_results, fp, indent=4, ensure_ascii=False)
+ # Must have the same keys as in `additional_results`.
+ # The values are used as the file names where to save the corresponding CI job results.
+ test_to_result_name = {
+ "PyTorch pipelines": "torch_pipeline",
+ "TensorFlow pipelines": "tf_pipeline",
+ "Examples directory": "example",
+ "Torch CUDA extension tests": "deepspeed",
+ }
+ for job, job_result in additional_results.items():
+ with open(f"ci_results_{job_name}/{test_to_result_name[job]}_results.json", "w", encoding="UTF-8") as fp:
+ json.dump(job_result, fp, indent=4, ensure_ascii=False)
+
prev_ci_artifacts = None
target_workflow = "huggingface/transformers/.github/workflows/self-scheduled.yml@refs/heads/main"
if os.environ.get("CI_WORKFLOW_REF") == target_workflow:
# Get the last previously completed CI's failure tables
- artifact_names = ["prev_ci_results"]
+ artifact_names = [f"ci_results_{job_name}"]
output_dir = os.path.join(os.getcwd(), "previous_reports")
os.makedirs(output_dir, exist_ok=True)
prev_ci_artifacts = get_last_daily_ci_reports(
diff --git a/utils/notification_service_quantization.py b/utils/notification_service_quantization.py
index 11bc57e618a7..6d026bc0d053 100644
--- a/utils/notification_service_quantization.py
+++ b/utils/notification_service_quantization.py
@@ -200,7 +200,7 @@ def post_reply(self):
"job_link": {},
}
for quant in quantization_matrix
- if f"run_tests_quantization_torch_gpu_{quant}" in available_artifacts
+ if f"run_quantization_torch_gpu_{ quant }_test_reports" in available_artifacts
}
github_actions_jobs = get_jobs(
@@ -217,7 +217,7 @@ def post_reply(self):
break
for quant in quantization_results.keys():
- for artifact_path in available_artifacts[f"run_tests_quantization_torch_gpu_{quant}"].paths:
+ for artifact_path in available_artifacts[f"run_quantization_torch_gpu_{ quant }_test_reports"].paths:
artifact = retrieve_artifact(artifact_path["path"], artifact_path["gpu"])
if "stats" in artifact:
# Link to the GitHub Action job
@@ -242,6 +242,13 @@ def post_reply(self):
{"line": line, "trace": stacktraces.pop(0)}
)
+ job_name = os.getenv("CI_TEST_JOB")
+ if not os.path.isdir(os.path.join(os.getcwd(), f"ci_results_{job_name}")):
+ os.makedirs(os.path.join(os.getcwd(), f"ci_results_{job_name}"))
+
+ with open(f"ci_results_{job_name}/quantization_results.json", "w", encoding="UTF-8") as fp:
+ json.dump(quantization_results, fp, indent=4, ensure_ascii=False)
+
message = QuantizationMessage(
title,
results=quantization_results,
diff --git a/utils/pr_slow_ci_models.py b/utils/pr_slow_ci_models.py
new file mode 100644
index 000000000000..391e99fc2276
--- /dev/null
+++ b/utils/pr_slow_ci_models.py
@@ -0,0 +1,145 @@
+# Copyright 2024 The HuggingFace Team. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""
+This script is used to get the models for which to run slow CI.
+
+A new model added in a pull request will be included, as well as models specified in a commit message with a prefix
+`[run-slow]`, `[run_slow]` or `[run slow]`. For example, the commit message `[run_slow]bert, gpt2` will give `bert` and
+`gpt2`.
+
+Usage:
+
+```bash
+python utils/pr_slow_ci_models.py.py
+```
+"""
+
+import argparse
+import re
+from pathlib import Path
+from typing import List
+
+from git import Repo
+
+
+PATH_TO_REPO = Path(__file__).parent.parent.resolve()
+
+
+def get_new_python_files_between_commits(base_commit: str, commits: List[str]) -> List[str]:
+ """
+ Get the list of added python files between a base commit and one or several commits.
+
+ Args:
+ repo (`git.Repo`):
+ A git repository (for instance the Transformers repo).
+ base_commit (`str`):
+ The commit reference of where to compare for the diff. This is the current commit, not the branching point!
+ commits (`List[str]`):
+ The list of commits with which to compare the repo at `base_commit` (so the branching point).
+
+ Returns:
+ `List[str]`: The list of python files added between a base commit and one or several commits.
+ """
+ code_diff = []
+ for commit in commits:
+ for diff_obj in commit.diff(base_commit):
+ # We always add new python files
+ if diff_obj.change_type == "A" and diff_obj.b_path.endswith(".py"):
+ code_diff.append(diff_obj.b_path)
+
+ return code_diff
+
+
+def get_new_python_files() -> List[str]:
+ """
+ Return a list of python files that have been added between the current head and the main branch.
+
+ Returns:
+ `List[str]`: The list of python files added.
+ """
+ repo = Repo(PATH_TO_REPO)
+
+ try:
+ # For the cases where the main branch exists locally
+ main = repo.refs.main
+ except AttributeError:
+ # On GitHub Actions runners, it doesn't have local main branch
+ main = repo.remotes.origin.refs.main
+
+ print(f"main is at {main.commit}")
+ print(f"Current head is at {repo.head.commit}")
+
+ branching_commits = repo.merge_base(main, repo.head)
+ for commit in branching_commits:
+ print(f"Branching commit: {commit}")
+ return get_new_python_files_between_commits(repo.head.commit, branching_commits)
+
+
+def get_new_model():
+ new_files = get_new_python_files()
+ reg = re.compile(r"src/transformers/(models/.*)/modeling_.*\.py")
+
+ new_model = ""
+ for x in new_files:
+ find_new_model = reg.findall(x)
+ if len(find_new_model) > 0:
+ new_model = find_new_model[0]
+ # It's unlikely we have 2 new modeling files in a pull request.
+ break
+ return new_model
+
+
+def parse_commit_message(commit_message: str) -> str:
+ """
+ Parses the commit message to find the models specified in it to run slow CI.
+
+ Args:
+ commit_message (`str`): The commit message of the current commit.
+
+ Returns:
+ `str`: The substring in `commit_message` after `[run-slow]`, [run_slow]` or [run slow]`. If no such prefix is
+ found, the empty string is returned.
+ """
+ if commit_message is None:
+ return ""
+
+ command_search = re.search(r"\[([^\]]*)\](.*)", commit_message)
+ if command_search is None:
+ return ""
+
+ command = command_search.groups()[0]
+ command = command.lower().replace("-", " ").replace("_", " ")
+ run_slow = command == "run slow"
+ if run_slow:
+ models = command_search.groups()[1].strip()
+ return models
+ else:
+ return ""
+
+
+def get_models(commit_message: str):
+ models = parse_commit_message(commit_message)
+ return [f"models/{x}" for x in models.replace(",", " ").split()]
+
+
+if __name__ == "__main__":
+ parser = argparse.ArgumentParser()
+ parser.add_argument("--commit_message", type=str, default="", help="The commit message.")
+ args = parser.parse_args()
+
+ new_model = get_new_model()
+ specified_models = get_models(args.commit_message)
+ models = ([] if new_model == "" else [new_model]) + specified_models
+ print(sorted(set(models)))
diff --git a/utils/slow_documentation_tests.txt b/utils/slow_documentation_tests.txt
index dc5a6b5c30b8..71b0f27839f7 100644
--- a/utils/slow_documentation_tests.txt
+++ b/utils/slow_documentation_tests.txt
@@ -6,6 +6,7 @@ docs/source/en/model_doc/seamless_m4t.md
docs/source/en/model_doc/seamless_m4t_v2.md
docs/source/en/task_summary.md
docs/source/en/tasks/prompting.md
+docs/source/ja/model_doc/code_llama.md
src/transformers/models/blip_2/modeling_blip_2.py
src/transformers/models/ctrl/modeling_ctrl.py
src/transformers/models/fuyu/modeling_fuyu.py
diff --git a/utils/split_model_tests.py b/utils/split_model_tests.py
index fc8800ffcf1c..e5083aaeb46f 100644
--- a/utils/split_model_tests.py
+++ b/utils/split_model_tests.py
@@ -18,7 +18,7 @@
to split the list of jobs to run into multiple slices each containing a smaller number of jobs. This way, we can bypass
the maximum of 256 jobs in a matrix.
-See the `setup` and `run_tests_gpu` jobs defined in the workflow file `.github/workflows/self-scheduled.yml` for more
+See the `setup` and `run_models_gpu` jobs defined in the workflow file `.github/workflows/self-scheduled.yml` for more
details.
Usage: