diff --git a/docs/source/en/main_classes/pipelines.md b/docs/source/en/main_classes/pipelines.md index d7a701700d13..d5d132aaaba5 100644 --- a/docs/source/en/main_classes/pipelines.md +++ b/docs/source/en/main_classes/pipelines.md @@ -270,6 +270,11 @@ This is a simplified view, since the pipeline can handle automatically the batch about how many forward passes you inputs are actually going to trigger, you can optimize the `batch_size` independently of the inputs. The caveats from the previous section still apply. +## Pipeline FP16 inference +Models can be run in FP16 which can be significantly faster on GPU while saving memory. Most models will not suffer noticeable performance loss from this. The larger the model, the less likely that it will. + +To enable FP16 inference, you can simply pass `torch_dtype=torch.float16` or `torch_dtype='float16'` to the pipeline constructor. Note that this only works for models with a PyTorch backend. Your inputs will be converted to FP16 internally. + ## Pipeline custom code If you want to override a specific pipeline. diff --git a/docs/source/en/pipeline_tutorial.md b/docs/source/en/pipeline_tutorial.md index 8518f639ab9d..838b89432b4a 100644 --- a/docs/source/en/pipeline_tutorial.md +++ b/docs/source/en/pipeline_tutorial.md @@ -113,7 +113,9 @@ This will work regardless of whether you are using PyTorch or Tensorflow. transcriber = pipeline(model="openai/whisper-large-v2", device=0) ``` -If the model is too large for a single GPU and you are using PyTorch, you can set `device_map="auto"` to automatically +If the model is too large for a single GPU and you are using PyTorch, you can set `torch_dtype='float16'` to enable FP16 precision inference. Usually this would not cause significant performance drops but make sure you evaluate it on your models! + +Alternatively, you can set `device_map="auto"` to automatically determine how to load and store the model weights. Using the `device_map` argument requires the 🤗 [Accelerate](https://huggingface.co/docs/accelerate) package: @@ -342,4 +344,3 @@ gr.Interface.from_pipeline(pipe).launch() By default, the web demo runs on a local server. If you'd like to share it with others, you can generate a temporary public link by setting `share=True` in `launch()`. You can also host your demo on [Hugging Face Spaces](https://huggingface.co/spaces) for a permanent link. - diff --git a/src/transformers/pipelines/depth_estimation.py b/src/transformers/pipelines/depth_estimation.py index c6431a499717..79a85008e7cf 100644 --- a/src/transformers/pipelines/depth_estimation.py +++ b/src/transformers/pipelines/depth_estimation.py @@ -91,6 +91,8 @@ def preprocess(self, image, timeout=None): image = load_image(image, timeout) self.image_size = image.size model_inputs = self.image_processor(images=image, return_tensors=self.framework) + if self.framework == "pt": + model_inputs = model_inputs.to(self.torch_dtype) return model_inputs def _forward(self, model_inputs): diff --git a/src/transformers/pipelines/document_question_answering.py b/src/transformers/pipelines/document_question_answering.py index 64714390b04f..c840c14a7191 100644 --- a/src/transformers/pipelines/document_question_answering.py +++ b/src/transformers/pipelines/document_question_answering.py @@ -294,7 +294,10 @@ def preprocess( if input.get("image", None) is not None: image = load_image(input["image"], timeout=timeout) if self.image_processor is not None: - image_features.update(self.image_processor(images=image, return_tensors=self.framework)) + image_inputs = self.image_processor(images=image, return_tensors=self.framework) + if self.framework == "pt": + image_inputs = image_inputs.to(self.torch_dtype) + image_features.update(image_inputs) elif self.feature_extractor is not None: image_features.update(self.feature_extractor(images=image, return_tensors=self.framework)) elif self.model_type == ModelType.VisionEncoderDecoder: diff --git a/src/transformers/pipelines/image_classification.py b/src/transformers/pipelines/image_classification.py index bfa005f06bab..c54f372baa9d 100644 --- a/src/transformers/pipelines/image_classification.py +++ b/src/transformers/pipelines/image_classification.py @@ -161,6 +161,8 @@ def __call__(self, images: Union[str, List[str], "Image.Image", List["Image.Imag def preprocess(self, image, timeout=None): image = load_image(image, timeout=timeout) model_inputs = self.image_processor(images=image, return_tensors=self.framework) + if self.framework == "pt": + model_inputs = model_inputs.to(self.torch_dtype) return model_inputs def _forward(self, model_inputs): diff --git a/src/transformers/pipelines/image_feature_extraction.py b/src/transformers/pipelines/image_feature_extraction.py index 3a361deabd79..391eb2b3aec7 100644 --- a/src/transformers/pipelines/image_feature_extraction.py +++ b/src/transformers/pipelines/image_feature_extraction.py @@ -60,6 +60,8 @@ def _sanitize_parameters(self, image_processor_kwargs=None, return_tensors=None, def preprocess(self, image, timeout=None, **image_processor_kwargs) -> Dict[str, GenericTensor]: image = load_image(image, timeout=timeout) model_inputs = self.image_processor(image, return_tensors=self.framework, **image_processor_kwargs) + if self.framework == "pt": + model_inputs = model_inputs.to(self.torch_dtype) return model_inputs def _forward(self, model_inputs): diff --git a/src/transformers/pipelines/image_segmentation.py b/src/transformers/pipelines/image_segmentation.py index 23fbd4fb79b1..e0fd3b7d85ab 100644 --- a/src/transformers/pipelines/image_segmentation.py +++ b/src/transformers/pipelines/image_segmentation.py @@ -147,6 +147,8 @@ def preprocess(self, image, subtask=None, timeout=None): else: kwargs = {"task_inputs": [subtask]} inputs = self.image_processor(images=[image], return_tensors="pt", **kwargs) + if self.framework == "pt": + inputs = inputs.to(self.torch_dtype) inputs["task_inputs"] = self.tokenizer( inputs["task_inputs"], padding="max_length", @@ -155,6 +157,8 @@ def preprocess(self, image, subtask=None, timeout=None): )["input_ids"] else: inputs = self.image_processor(images=[image], return_tensors="pt") + if self.framework == "pt": + inputs = inputs.to(self.torch_dtype) inputs["target_size"] = target_size return inputs diff --git a/src/transformers/pipelines/image_to_image.py b/src/transformers/pipelines/image_to_image.py index 8c34ee8dd3c8..cb66359a4ddd 100644 --- a/src/transformers/pipelines/image_to_image.py +++ b/src/transformers/pipelines/image_to_image.py @@ -119,6 +119,8 @@ def _forward(self, model_inputs): def preprocess(self, image, timeout=None): image = load_image(image, timeout=timeout) inputs = self.image_processor(images=[image], return_tensors="pt") + if self.framework == "pt": + inputs = inputs.to(self.torch_dtype) return inputs def postprocess(self, model_outputs): diff --git a/src/transformers/pipelines/image_to_text.py b/src/transformers/pipelines/image_to_text.py index 4a9a3744d841..88dce8e591ae 100644 --- a/src/transformers/pipelines/image_to_text.py +++ b/src/transformers/pipelines/image_to_text.py @@ -138,6 +138,8 @@ def preprocess(self, image, prompt=None, timeout=None): if model_type == "git": model_inputs = self.image_processor(images=image, return_tensors=self.framework) + if self.framework == "pt": + model_inputs = model_inputs.to(self.torch_dtype) input_ids = self.tokenizer(text=prompt, add_special_tokens=False).input_ids input_ids = [self.tokenizer.cls_token_id] + input_ids input_ids = torch.tensor(input_ids).unsqueeze(0) @@ -145,10 +147,14 @@ def preprocess(self, image, prompt=None, timeout=None): elif model_type == "pix2struct": model_inputs = self.image_processor(images=image, header_text=prompt, return_tensors=self.framework) + if self.framework == "pt": + model_inputs = model_inputs.to(self.torch_dtype) elif model_type != "vision-encoder-decoder": # vision-encoder-decoder does not support conditional generation model_inputs = self.image_processor(images=image, return_tensors=self.framework) + if self.framework == "pt": + model_inputs = model_inputs.to(self.torch_dtype) text_inputs = self.tokenizer(prompt, return_tensors=self.framework) model_inputs.update(text_inputs) @@ -157,6 +163,8 @@ def preprocess(self, image, prompt=None, timeout=None): else: model_inputs = self.image_processor(images=image, return_tensors=self.framework) + if self.framework == "pt": + model_inputs = model_inputs.to(self.torch_dtype) if self.model.config.model_type == "git" and prompt is None: model_inputs["input_ids"] = None diff --git a/src/transformers/pipelines/mask_generation.py b/src/transformers/pipelines/mask_generation.py index 68d407aff2d4..f87e45b7f8ec 100644 --- a/src/transformers/pipelines/mask_generation.py +++ b/src/transformers/pipelines/mask_generation.py @@ -181,6 +181,8 @@ def preprocess( image, target_size, crops_n_layers, crop_overlap_ratio, points_per_crop, crop_n_points_downscale_factor ) model_inputs = self.image_processor(images=cropped_images, return_tensors="pt") + if self.framework == "pt": + model_inputs = model_inputs.to(self.torch_dtype) with self.device_placement(): if self.framework == "pt": diff --git a/src/transformers/pipelines/object_detection.py b/src/transformers/pipelines/object_detection.py index 36946cbf8a45..d3e2135790ff 100644 --- a/src/transformers/pipelines/object_detection.py +++ b/src/transformers/pipelines/object_detection.py @@ -107,6 +107,8 @@ def preprocess(self, image, timeout=None): image = load_image(image, timeout=timeout) target_size = torch.IntTensor([[image.height, image.width]]) inputs = self.image_processor(images=[image], return_tensors="pt") + if self.framework == "pt": + inputs = inputs.to(self.torch_dtype) if self.tokenizer is not None: inputs = self.tokenizer(text=inputs["words"], boxes=inputs["boxes"], return_tensors="pt") inputs["target_size"] = target_size diff --git a/src/transformers/pipelines/video_classification.py b/src/transformers/pipelines/video_classification.py index 5702f23c5f60..68ea928bce56 100644 --- a/src/transformers/pipelines/video_classification.py +++ b/src/transformers/pipelines/video_classification.py @@ -106,6 +106,8 @@ def preprocess(self, video, num_frames=None, frame_sampling_rate=1): video = list(video) model_inputs = self.image_processor(video, return_tensors=self.framework) + if self.framework == "pt": + model_inputs = model_inputs.to(self.torch_dtype) return model_inputs def _forward(self, model_inputs): diff --git a/src/transformers/pipelines/visual_question_answering.py b/src/transformers/pipelines/visual_question_answering.py index 9455b0d85928..e5849cbdec19 100644 --- a/src/transformers/pipelines/visual_question_answering.py +++ b/src/transformers/pipelines/visual_question_answering.py @@ -155,6 +155,8 @@ def preprocess(self, inputs, padding=False, truncation=False, timeout=None): truncation=truncation, ) image_features = self.image_processor(images=image, return_tensors=self.framework) + if self.framework == "pt": + image_features = image_features.to(self.torch_dtype) model_inputs.update(image_features) return model_inputs diff --git a/src/transformers/pipelines/zero_shot_audio_classification.py b/src/transformers/pipelines/zero_shot_audio_classification.py index c3606e3c2b83..d9109aebd9c5 100644 --- a/src/transformers/pipelines/zero_shot_audio_classification.py +++ b/src/transformers/pipelines/zero_shot_audio_classification.py @@ -121,6 +121,8 @@ def preprocess(self, audio, candidate_labels=None, hypothesis_template="This is inputs = self.feature_extractor( [audio], sampling_rate=self.feature_extractor.sampling_rate, return_tensors="pt" ) + if self.framework == "pt": + inputs = inputs.to(self.torch_dtype) inputs["candidate_labels"] = candidate_labels sequences = [hypothesis_template.format(x) for x in candidate_labels] text_inputs = self.tokenizer(sequences, return_tensors=self.framework, padding=True) diff --git a/src/transformers/pipelines/zero_shot_image_classification.py b/src/transformers/pipelines/zero_shot_image_classification.py index 8e40d0e6a5cb..b0ceba8cbe67 100644 --- a/src/transformers/pipelines/zero_shot_image_classification.py +++ b/src/transformers/pipelines/zero_shot_image_classification.py @@ -120,6 +120,8 @@ def _sanitize_parameters(self, **kwargs): def preprocess(self, image, candidate_labels=None, hypothesis_template="This is a photo of {}.", timeout=None): image = load_image(image, timeout=timeout) inputs = self.image_processor(images=[image], return_tensors=self.framework) + if self.framework == "pt": + inputs = inputs.to(self.torch_dtype) inputs["candidate_labels"] = candidate_labels sequences = [hypothesis_template.format(x) for x in candidate_labels] padding = "max_length" if self.model.config.model_type == "siglip" else True diff --git a/src/transformers/pipelines/zero_shot_object_detection.py b/src/transformers/pipelines/zero_shot_object_detection.py index 5be89332cbd9..9ad575202266 100644 --- a/src/transformers/pipelines/zero_shot_object_detection.py +++ b/src/transformers/pipelines/zero_shot_object_detection.py @@ -156,6 +156,8 @@ def preprocess(self, inputs, timeout=None): for i, candidate_label in enumerate(candidate_labels): text_inputs = self.tokenizer(candidate_label, return_tensors=self.framework) image_features = self.image_processor(image, return_tensors=self.framework) + if self.framework == "pt": + image_features = image_features.to(self.torch_dtype) yield { "is_last": i == len(candidate_labels) - 1, "target_size": target_size, diff --git a/tests/pipelines/test_pipelines_audio_classification.py b/tests/pipelines/test_pipelines_audio_classification.py index cdedf94be180..a8c5deb22844 100644 --- a/tests/pipelines/test_pipelines_audio_classification.py +++ b/tests/pipelines/test_pipelines_audio_classification.py @@ -35,8 +35,10 @@ class AudioClassificationPipelineTests(unittest.TestCase): model_mapping = MODEL_FOR_AUDIO_CLASSIFICATION_MAPPING tf_model_mapping = TF_MODEL_FOR_AUDIO_CLASSIFICATION_MAPPING - def get_test_pipeline(self, model, tokenizer, processor): - audio_classifier = AudioClassificationPipeline(model=model, feature_extractor=processor) + def get_test_pipeline(self, model, tokenizer, processor, torch_dtype="float32"): + audio_classifier = AudioClassificationPipeline( + model=model, feature_extractor=processor, torch_dtype=torch_dtype + ) # test with a raw waveform audio = np.zeros((34000,)) diff --git a/tests/pipelines/test_pipelines_automatic_speech_recognition.py b/tests/pipelines/test_pipelines_automatic_speech_recognition.py index 35c36aa0e660..11bbde4143f7 100644 --- a/tests/pipelines/test_pipelines_automatic_speech_recognition.py +++ b/tests/pipelines/test_pipelines_automatic_speech_recognition.py @@ -66,14 +66,14 @@ class AutomaticSpeechRecognitionPipelineTests(unittest.TestCase): + (MODEL_FOR_CTC_MAPPING.items() if MODEL_FOR_CTC_MAPPING else []) ) - def get_test_pipeline(self, model, tokenizer, processor): + def get_test_pipeline(self, model, tokenizer, processor, torch_dtype="float32"): if tokenizer is None: # Side effect of no Fast Tokenizer class for these model, so skipping # But the slow tokenizer test should still run as they're quite small self.skipTest(reason="No tokenizer available") speech_recognizer = AutomaticSpeechRecognitionPipeline( - model=model, tokenizer=tokenizer, feature_extractor=processor + model=model, tokenizer=tokenizer, feature_extractor=processor, torch_dtype=torch_dtype ) # test with a raw waveform diff --git a/tests/pipelines/test_pipelines_depth_estimation.py b/tests/pipelines/test_pipelines_depth_estimation.py index 259ab5ef4c39..1f2700fa747c 100644 --- a/tests/pipelines/test_pipelines_depth_estimation.py +++ b/tests/pipelines/test_pipelines_depth_estimation.py @@ -56,8 +56,8 @@ def hashimage(image: Image) -> str: class DepthEstimationPipelineTests(unittest.TestCase): model_mapping = MODEL_FOR_DEPTH_ESTIMATION_MAPPING - def get_test_pipeline(self, model, tokenizer, processor): - depth_estimator = DepthEstimationPipeline(model=model, image_processor=processor) + def get_test_pipeline(self, model, tokenizer, processor, torch_dtype="float32"): + depth_estimator = DepthEstimationPipeline(model=model, image_processor=processor, torch_dtype=torch_dtype) return depth_estimator, [ "./tests/fixtures/tests_samples/COCO/000000039769.png", "./tests/fixtures/tests_samples/COCO/000000039769.png", diff --git a/tests/pipelines/test_pipelines_document_question_answering.py b/tests/pipelines/test_pipelines_document_question_answering.py index d1fd87e18e37..41a6a0c383f9 100644 --- a/tests/pipelines/test_pipelines_document_question_answering.py +++ b/tests/pipelines/test_pipelines_document_question_answering.py @@ -61,9 +61,13 @@ class DocumentQuestionAnsweringPipelineTests(unittest.TestCase): @require_pytesseract @require_vision - def get_test_pipeline(self, model, tokenizer, processor): + def get_test_pipeline(self, model, tokenizer, processor, torch_dtype="float32"): dqa_pipeline = pipeline( - "document-question-answering", model=model, tokenizer=tokenizer, image_processor=processor + "document-question-answering", + model=model, + tokenizer=tokenizer, + image_processor=processor, + torch_dtype=torch_dtype, ) image = INVOICE_URL diff --git a/tests/pipelines/test_pipelines_feature_extraction.py b/tests/pipelines/test_pipelines_feature_extraction.py index ff5f8314b65c..4d25941c3f0f 100644 --- a/tests/pipelines/test_pipelines_feature_extraction.py +++ b/tests/pipelines/test_pipelines_feature_extraction.py @@ -174,7 +174,7 @@ def get_shape(self, input_, shape=None): raise ValueError("We expect lists of floats, nothing else") return shape - def get_test_pipeline(self, model, tokenizer, processor): + def get_test_pipeline(self, model, tokenizer, processor, torch_dtype="float32"): if tokenizer is None: self.skipTest(reason="No tokenizer") elif ( @@ -193,7 +193,9 @@ def get_test_pipeline(self, model, tokenizer, processor): For now ignore those. """ ) - feature_extractor = FeatureExtractionPipeline(model=model, tokenizer=tokenizer, feature_extractor=processor) + feature_extractor = FeatureExtractionPipeline( + model=model, tokenizer=tokenizer, feature_extractor=processor, torch_dtype=torch_dtype + ) return feature_extractor, ["This is a test", "This is another test"] def run_pipeline_test(self, feature_extractor, examples): diff --git a/tests/pipelines/test_pipelines_fill_mask.py b/tests/pipelines/test_pipelines_fill_mask.py index 93dacbd15bf4..81aa23563710 100644 --- a/tests/pipelines/test_pipelines_fill_mask.py +++ b/tests/pipelines/test_pipelines_fill_mask.py @@ -251,11 +251,11 @@ def test_model_no_pad_tf(self): unmasker.tokenizer.pad_token = None self.run_pipeline_test(unmasker, []) - def get_test_pipeline(self, model, tokenizer, processor): + def get_test_pipeline(self, model, tokenizer, processor, torch_dtype="float32"): if tokenizer is None or tokenizer.mask_token_id is None: self.skipTest(reason="The provided tokenizer has no mask token, (probably reformer or wav2vec2)") - fill_masker = FillMaskPipeline(model=model, tokenizer=tokenizer) + fill_masker = FillMaskPipeline(model=model, tokenizer=tokenizer, torch_dtype=torch_dtype) examples = [ f"This is another {tokenizer.mask_token} test", ] diff --git a/tests/pipelines/test_pipelines_image_classification.py b/tests/pipelines/test_pipelines_image_classification.py index 3e93f31d1880..823c66c16f32 100644 --- a/tests/pipelines/test_pipelines_image_classification.py +++ b/tests/pipelines/test_pipelines_image_classification.py @@ -55,8 +55,10 @@ class ImageClassificationPipelineTests(unittest.TestCase): model_mapping = MODEL_FOR_IMAGE_CLASSIFICATION_MAPPING tf_model_mapping = TF_MODEL_FOR_IMAGE_CLASSIFICATION_MAPPING - def get_test_pipeline(self, model, tokenizer, processor): - image_classifier = ImageClassificationPipeline(model=model, image_processor=processor, top_k=2) + def get_test_pipeline(self, model, tokenizer, processor, torch_dtype="float32"): + image_classifier = ImageClassificationPipeline( + model=model, image_processor=processor, top_k=2, torch_dtype=torch_dtype + ) examples = [ Image.open("./tests/fixtures/tests_samples/COCO/000000039769.png"), "http://images.cocodataset.org/val2017/000000039769.jpg", diff --git a/tests/pipelines/test_pipelines_image_feature_extraction.py b/tests/pipelines/test_pipelines_image_feature_extraction.py index 53af000d6de9..07b27e7b6465 100644 --- a/tests/pipelines/test_pipelines_image_feature_extraction.py +++ b/tests/pipelines/test_pipelines_image_feature_extraction.py @@ -157,7 +157,7 @@ def test_return_tensors_tf(self): outputs = feature_extractor(img, return_tensors=True) self.assertTrue(tf.is_tensor(outputs)) - def get_test_pipeline(self, model, tokenizer, processor): + def get_test_pipeline(self, model, tokenizer, processor, torch_dtype="float32"): if processor is None: self.skipTest(reason="No image processor") @@ -175,7 +175,9 @@ def get_test_pipeline(self, model, tokenizer, processor): """ ) - feature_extractor = ImageFeatureExtractionPipeline(model=model, image_processor=processor) + feature_extractor = ImageFeatureExtractionPipeline( + model=model, image_processor=processor, torch_dtype=torch_dtype + ) img = prepare_img() return feature_extractor, [img, img] diff --git a/tests/pipelines/test_pipelines_image_segmentation.py b/tests/pipelines/test_pipelines_image_segmentation.py index 8f2ae47f14ba..523bd0b52b68 100644 --- a/tests/pipelines/test_pipelines_image_segmentation.py +++ b/tests/pipelines/test_pipelines_image_segmentation.py @@ -87,8 +87,8 @@ class ImageSegmentationPipelineTests(unittest.TestCase): + (MODEL_FOR_INSTANCE_SEGMENTATION_MAPPING.items() if MODEL_FOR_INSTANCE_SEGMENTATION_MAPPING else []) ) - def get_test_pipeline(self, model, tokenizer, processor): - image_segmenter = ImageSegmentationPipeline(model=model, image_processor=processor) + def get_test_pipeline(self, model, tokenizer, processor, torch_dtype="float32"): + image_segmenter = ImageSegmentationPipeline(model=model, image_processor=processor, torch_dtype=torch_dtype) return image_segmenter, [ "./tests/fixtures/tests_samples/COCO/000000039769.png", "./tests/fixtures/tests_samples/COCO/000000039769.png", diff --git a/tests/pipelines/test_pipelines_image_to_image.py b/tests/pipelines/test_pipelines_image_to_image.py index e9110bb69295..29d590a8e34c 100644 --- a/tests/pipelines/test_pipelines_image_to_image.py +++ b/tests/pipelines/test_pipelines_image_to_image.py @@ -54,9 +54,9 @@ class ImageToImagePipelineTests(unittest.TestCase): @require_torch @require_vision @slow - def test_pipeline(self): + def test_pipeline(self, torch_dtype="float32"): model_id = "caidas/swin2SR-classical-sr-x2-64" - upscaler = pipeline("image-to-image", model=model_id) + upscaler = pipeline("image-to-image", model=model_id, torch_dtype=torch_dtype) upscaled_list = upscaler(self.examples) self.assertEqual(len(upscaled_list), len(self.examples)) @@ -66,6 +66,12 @@ def test_pipeline(self): self.assertEqual(upscaled_list[0].size, (1296, 976)) self.assertEqual(upscaled_list[1].size, (1296, 976)) + @require_torch + @require_vision + @slow + def test_pipeline_fp16(self): + self.test_pipeline(torch_dtype="float16") + @require_torch @require_vision @slow diff --git a/tests/pipelines/test_pipelines_image_to_text.py b/tests/pipelines/test_pipelines_image_to_text.py index c77353a261f9..6d6c11a59c10 100644 --- a/tests/pipelines/test_pipelines_image_to_text.py +++ b/tests/pipelines/test_pipelines_image_to_text.py @@ -45,8 +45,10 @@ class ImageToTextPipelineTests(unittest.TestCase): model_mapping = MODEL_FOR_VISION_2_SEQ_MAPPING tf_model_mapping = TF_MODEL_FOR_VISION_2_SEQ_MAPPING - def get_test_pipeline(self, model, tokenizer, processor): - pipe = pipeline("image-to-text", model=model, tokenizer=tokenizer, image_processor=processor) + def get_test_pipeline(self, model, tokenizer, processor, torch_dtype="float32"): + pipe = pipeline( + "image-to-text", model=model, tokenizer=tokenizer, image_processor=processor, torch_dtype=torch_dtype + ) examples = [ Image.open("./tests/fixtures/tests_samples/COCO/000000039769.png"), "./tests/fixtures/tests_samples/COCO/000000039769.png", diff --git a/tests/pipelines/test_pipelines_mask_generation.py b/tests/pipelines/test_pipelines_mask_generation.py index 643ee84e6837..50fcd676da50 100644 --- a/tests/pipelines/test_pipelines_mask_generation.py +++ b/tests/pipelines/test_pipelines_mask_generation.py @@ -67,8 +67,8 @@ class MaskGenerationPipelineTests(unittest.TestCase): (list(TF_MODEL_FOR_MASK_GENERATION_MAPPING.items()) if TF_MODEL_FOR_MASK_GENERATION_MAPPING else []) ) - def get_test_pipeline(self, model, tokenizer, processor): - image_segmenter = MaskGenerationPipeline(model=model, image_processor=processor) + def get_test_pipeline(self, model, tokenizer, processor, torch_dtype="float32"): + image_segmenter = MaskGenerationPipeline(model=model, image_processor=processor, torch_dtype=torch_dtype) return image_segmenter, [ "./tests/fixtures/tests_samples/COCO/000000039769.png", "./tests/fixtures/tests_samples/COCO/000000039769.png", diff --git a/tests/pipelines/test_pipelines_object_detection.py b/tests/pipelines/test_pipelines_object_detection.py index 76a6ab807cd9..f14e5e6b68d7 100644 --- a/tests/pipelines/test_pipelines_object_detection.py +++ b/tests/pipelines/test_pipelines_object_detection.py @@ -53,8 +53,8 @@ def open(*args, **kwargs): class ObjectDetectionPipelineTests(unittest.TestCase): model_mapping = MODEL_FOR_OBJECT_DETECTION_MAPPING - def get_test_pipeline(self, model, tokenizer, processor): - object_detector = ObjectDetectionPipeline(model=model, image_processor=processor) + def get_test_pipeline(self, model, tokenizer, processor, torch_dtype="float32"): + object_detector = ObjectDetectionPipeline(model=model, image_processor=processor, torch_dtype=torch_dtype) return object_detector, ["./tests/fixtures/tests_samples/COCO/000000039769.png"] def run_pipeline_test(self, object_detector, examples): diff --git a/tests/pipelines/test_pipelines_question_answering.py b/tests/pipelines/test_pipelines_question_answering.py index f7683aec15c3..8b68989600ee 100644 --- a/tests/pipelines/test_pipelines_question_answering.py +++ b/tests/pipelines/test_pipelines_question_answering.py @@ -50,12 +50,12 @@ class QAPipelineTests(unittest.TestCase): config: model for config, model in tf_model_mapping.items() if config.__name__ not in _TO_SKIP } - def get_test_pipeline(self, model, tokenizer, processor): + def get_test_pipeline(self, model, tokenizer, processor, torch_dtype="float32"): if isinstance(model.config, LxmertConfig): # This is an bimodal model, we need to find a more consistent way # to switch on those models. return None, None - question_answerer = QuestionAnsweringPipeline(model, tokenizer) + question_answerer = QuestionAnsweringPipeline(model, tokenizer, torch_dtype=torch_dtype) examples = [ {"question": "Where was HuggingFace founded ?", "context": "HuggingFace was founded in Paris."}, diff --git a/tests/pipelines/test_pipelines_summarization.py b/tests/pipelines/test_pipelines_summarization.py index 8d745c376d84..fb1dce0ca384 100644 --- a/tests/pipelines/test_pipelines_summarization.py +++ b/tests/pipelines/test_pipelines_summarization.py @@ -32,8 +32,8 @@ class SummarizationPipelineTests(unittest.TestCase): model_mapping = MODEL_FOR_SEQ_TO_SEQ_CAUSAL_LM_MAPPING tf_model_mapping = TF_MODEL_FOR_SEQ_TO_SEQ_CAUSAL_LM_MAPPING - def get_test_pipeline(self, model, tokenizer, processor): - summarizer = SummarizationPipeline(model=model, tokenizer=tokenizer) + def get_test_pipeline(self, model, tokenizer, processor, torch_dtype="float32"): + summarizer = SummarizationPipeline(model=model, tokenizer=tokenizer, torch_dtype=torch_dtype) return summarizer, ["(CNN)The Palestinian Authority officially became", "Some other text"] def run_pipeline_test(self, summarizer, _): diff --git a/tests/pipelines/test_pipelines_table_question_answering.py b/tests/pipelines/test_pipelines_table_question_answering.py index a30763fc096d..9481ab200063 100644 --- a/tests/pipelines/test_pipelines_table_question_answering.py +++ b/tests/pipelines/test_pipelines_table_question_answering.py @@ -152,9 +152,9 @@ def test_small_model_tf(self): @unittest.skipIf(not is_torch_greater_or_equal_than_1_12, reason="Tapas is only available in torch v1.12+") @require_torch - def test_small_model_pt(self): + def test_small_model_pt(self, torch_dtype="float32"): model_id = "lysandre/tiny-tapas-random-wtq" - model = AutoModelForTableQuestionAnswering.from_pretrained(model_id) + model = AutoModelForTableQuestionAnswering.from_pretrained(model_id, torch_dtype=torch_dtype) tokenizer = AutoTokenizer.from_pretrained(model_id) self.assertIsInstance(model.config.aggregation_labels, dict) self.assertIsInstance(model.config.no_aggregation_label_index, int) @@ -255,9 +255,14 @@ def test_small_model_pt(self): @unittest.skipIf(not is_torch_greater_or_equal_than_1_12, reason="Tapas is only available in torch v1.12+") @require_torch - def test_slow_tokenizer_sqa_pt(self): + def test_small_model_pt_fp16(self): + self.test_small_model_pt(torch_dtype="float16") + + @unittest.skipIf(not is_torch_greater_or_equal_than_1_12, reason="Tapas is only available in torch v1.12+") + @require_torch + def test_slow_tokenizer_sqa_pt(self, torch_dtype="float32"): model_id = "lysandre/tiny-tapas-random-sqa" - model = AutoModelForTableQuestionAnswering.from_pretrained(model_id) + model = AutoModelForTableQuestionAnswering.from_pretrained(model_id, torch_dtype=torch_dtype) tokenizer = AutoTokenizer.from_pretrained(model_id) table_querier = TableQuestionAnsweringPipeline(model=model, tokenizer=tokenizer) @@ -373,6 +378,11 @@ def test_slow_tokenizer_sqa_pt(self): }, ) + @unittest.skipIf(not is_torch_greater_or_equal_than_1_12, reason="Tapas is only available in torch v1.12+") + @require_torch + def test_slow_tokenizer_sqa_pt_fp16(self): + self.test_slow_tokenizer_sqa_pt(torch_dtype="float16") + @require_tf @require_tensorflow_probability @require_pandas @@ -498,8 +508,8 @@ def test_slow_tokenizer_sqa_tf(self): @unittest.skipIf(not is_torch_greater_or_equal_than_1_12, reason="Tapas is only available in torch v1.12+") @slow @require_torch - def test_integration_wtq_pt(self): - table_querier = pipeline("table-question-answering") + def test_integration_wtq_pt(self, torch_dtype="float32"): + table_querier = pipeline("table-question-answering", torch_dtype=torch_dtype) data = { "Repository": ["Transformers", "Datasets", "Tokenizers"], @@ -541,6 +551,12 @@ def test_integration_wtq_pt(self): ] self.assertListEqual(results, expected_results) + @unittest.skipIf(not is_torch_greater_or_equal_than_1_12, reason="Tapas is only available in torch v1.12+") + @slow + @require_torch + def test_integration_wtq_pt_fp16(self): + self.test_integration_wtq_pt(torch_dtype="float16") + @slow @require_tensorflow_probability @require_pandas @@ -593,11 +609,12 @@ def test_integration_wtq_tf(self): @unittest.skipIf(not is_torch_greater_or_equal_than_1_12, reason="Tapas is only available in torch v1.12+") @slow @require_torch - def test_integration_sqa_pt(self): + def test_integration_sqa_pt(self, torch_dtype="float32"): table_querier = pipeline( "table-question-answering", model="google/tapas-base-finetuned-sqa", tokenizer="google/tapas-base-finetuned-sqa", + torch_dtype=torch_dtype, ) data = { "Actors": ["Brad Pitt", "Leonardo Di Caprio", "George Clooney"], @@ -615,6 +632,12 @@ def test_integration_sqa_pt(self): ] self.assertListEqual(results, expected_results) + @unittest.skipIf(not is_torch_greater_or_equal_than_1_12, reason="Tapas is only available in torch v1.12+") + @slow + @require_torch + def test_integration_sqa_pt_fp16(self): + self.test_integration_sqa_pt(torch_dtype="float16") + @slow @require_tensorflow_probability @require_pandas @@ -645,11 +668,12 @@ def test_integration_sqa_tf(self): @slow @require_torch - def test_large_model_pt_tapex(self): + def test_large_model_pt_tapex(self, torch_dtype="float32"): model_id = "microsoft/tapex-large-finetuned-wtq" table_querier = pipeline( "table-question-answering", model=model_id, + torch_dtype=torch_dtype, ) data = { "Actors": ["Brad Pitt", "Leonardo Di Caprio", "George Clooney"], diff --git a/tests/pipelines/test_pipelines_text2text_generation.py b/tests/pipelines/test_pipelines_text2text_generation.py index eccae9850b3b..52fb59edd364 100644 --- a/tests/pipelines/test_pipelines_text2text_generation.py +++ b/tests/pipelines/test_pipelines_text2text_generation.py @@ -35,8 +35,8 @@ class Text2TextGenerationPipelineTests(unittest.TestCase): model_mapping = MODEL_FOR_SEQ_TO_SEQ_CAUSAL_LM_MAPPING tf_model_mapping = TF_MODEL_FOR_SEQ_TO_SEQ_CAUSAL_LM_MAPPING - def get_test_pipeline(self, model, tokenizer, processor): - generator = Text2TextGenerationPipeline(model=model, tokenizer=tokenizer) + def get_test_pipeline(self, model, tokenizer, processor, torch_dtype="float32"): + generator = Text2TextGenerationPipeline(model=model, tokenizer=tokenizer, torch_dtype=torch_dtype) return generator, ["Something to write", "Something else"] def run_pipeline_test(self, generator, _): diff --git a/tests/pipelines/test_pipelines_text_classification.py b/tests/pipelines/test_pipelines_text_classification.py index 63adfc45a029..4956cb8aed13 100644 --- a/tests/pipelines/test_pipelines_text_classification.py +++ b/tests/pipelines/test_pipelines_text_classification.py @@ -179,8 +179,8 @@ def test_tf_bert(self): outputs = text_classifier("Birds are a type of animal") self.assertEqual(nested_simplify(outputs), [{"label": "POSITIVE", "score": 0.988}]) - def get_test_pipeline(self, model, tokenizer, processor): - text_classifier = TextClassificationPipeline(model=model, tokenizer=tokenizer) + def get_test_pipeline(self, model, tokenizer, processor, torch_dtype="float32"): + text_classifier = TextClassificationPipeline(model=model, tokenizer=tokenizer, torch_dtype=torch_dtype) return text_classifier, ["HuggingFace is in", "This is another test"] def run_pipeline_test(self, text_classifier, _): diff --git a/tests/pipelines/test_pipelines_text_generation.py b/tests/pipelines/test_pipelines_text_generation.py index 695befe32928..94132b5f5597 100644 --- a/tests/pipelines/test_pipelines_text_generation.py +++ b/tests/pipelines/test_pipelines_text_generation.py @@ -320,8 +320,8 @@ def test_small_chat_model_tf(self): ], ) - def get_test_pipeline(self, model, tokenizer, processor): - text_generator = TextGenerationPipeline(model=model, tokenizer=tokenizer) + def get_test_pipeline(self, model, tokenizer, processor, torch_dtype="float32"): + text_generator = TextGenerationPipeline(model=model, tokenizer=tokenizer, torch_dtype=torch_dtype) return text_generator, ["This is a test", "Another test"] def test_stop_sequence_stopping_criteria(self): diff --git a/tests/pipelines/test_pipelines_text_to_audio.py b/tests/pipelines/test_pipelines_text_to_audio.py index b780d26d79a4..655fe5961b52 100644 --- a/tests/pipelines/test_pipelines_text_to_audio.py +++ b/tests/pipelines/test_pipelines_text_to_audio.py @@ -250,8 +250,8 @@ def test_generative_model_kwargs(self): outputs = music_generator("This is a test", forward_params=forward_params, generate_kwargs=generate_kwargs) self.assertListEqual(outputs["audio"].tolist(), audio.tolist()) - def get_test_pipeline(self, model, tokenizer, processor): - speech_generator = TextToAudioPipeline(model=model, tokenizer=tokenizer) + def get_test_pipeline(self, model, tokenizer, processor, torch_dtype="float32"): + speech_generator = TextToAudioPipeline(model=model, tokenizer=tokenizer, torch_dtype=torch_dtype) return speech_generator, ["This is a test", "Another test"] def run_pipeline_test(self, speech_generator, _): diff --git a/tests/pipelines/test_pipelines_token_classification.py b/tests/pipelines/test_pipelines_token_classification.py index eda9ac014bf7..41415c8c3458 100644 --- a/tests/pipelines/test_pipelines_token_classification.py +++ b/tests/pipelines/test_pipelines_token_classification.py @@ -56,8 +56,8 @@ class TokenClassificationPipelineTests(unittest.TestCase): config: model for config, model in tf_model_mapping.items() if config.__name__ not in _TO_SKIP } - def get_test_pipeline(self, model, tokenizer, processor): - token_classifier = TokenClassificationPipeline(model=model, tokenizer=tokenizer) + def get_test_pipeline(self, model, tokenizer, processor, torch_dtype="float32"): + token_classifier = TokenClassificationPipeline(model=model, tokenizer=tokenizer, torch_dtype=torch_dtype) return token_classifier, ["A simple string", "A simple string that is quite a bit longer"] def run_pipeline_test(self, token_classifier, _): diff --git a/tests/pipelines/test_pipelines_translation.py b/tests/pipelines/test_pipelines_translation.py index 61d390fe76eb..c31ba49e7660 100644 --- a/tests/pipelines/test_pipelines_translation.py +++ b/tests/pipelines/test_pipelines_translation.py @@ -35,12 +35,14 @@ class TranslationPipelineTests(unittest.TestCase): model_mapping = MODEL_FOR_SEQ_TO_SEQ_CAUSAL_LM_MAPPING tf_model_mapping = TF_MODEL_FOR_SEQ_TO_SEQ_CAUSAL_LM_MAPPING - def get_test_pipeline(self, model, tokenizer, processor): + def get_test_pipeline(self, model, tokenizer, processor, torch_dtype="float32"): if isinstance(model.config, MBartConfig): src_lang, tgt_lang = list(tokenizer.lang_code_to_id.keys())[:2] - translator = TranslationPipeline(model=model, tokenizer=tokenizer, src_lang=src_lang, tgt_lang=tgt_lang) + translator = TranslationPipeline( + model=model, tokenizer=tokenizer, src_lang=src_lang, tgt_lang=tgt_lang, torch_dtype=torch_dtype + ) else: - translator = TranslationPipeline(model=model, tokenizer=tokenizer) + translator = TranslationPipeline(model=model, tokenizer=tokenizer, torch_dtype=torch_dtype) return translator, ["Some string", "Some other text"] def run_pipeline_test(self, translator, _): diff --git a/tests/pipelines/test_pipelines_video_classification.py b/tests/pipelines/test_pipelines_video_classification.py index 392d3b31b4c9..280d6990788e 100644 --- a/tests/pipelines/test_pipelines_video_classification.py +++ b/tests/pipelines/test_pipelines_video_classification.py @@ -38,11 +38,13 @@ class VideoClassificationPipelineTests(unittest.TestCase): model_mapping = MODEL_FOR_VIDEO_CLASSIFICATION_MAPPING - def get_test_pipeline(self, model, tokenizer, processor): + def get_test_pipeline(self, model, tokenizer, processor, torch_dtype="float32"): example_video_filepath = hf_hub_download( repo_id="nateraw/video-demo", filename="archery.mp4", repo_type="dataset" ) - video_classifier = VideoClassificationPipeline(model=model, image_processor=processor, top_k=2) + video_classifier = VideoClassificationPipeline( + model=model, image_processor=processor, top_k=2, torch_dtype=torch_dtype + ) examples = [ example_video_filepath, "https://huggingface.co/datasets/nateraw/video-demo/resolve/main/archery.mp4", diff --git a/tests/pipelines/test_pipelines_visual_question_answering.py b/tests/pipelines/test_pipelines_visual_question_answering.py index e056adee2331..45f935a62aaf 100644 --- a/tests/pipelines/test_pipelines_visual_question_answering.py +++ b/tests/pipelines/test_pipelines_visual_question_answering.py @@ -55,8 +55,10 @@ def open(*args, **kwargs): class VisualQuestionAnsweringPipelineTests(unittest.TestCase): model_mapping = MODEL_FOR_VISUAL_QUESTION_ANSWERING_MAPPING - def get_test_pipeline(self, model, tokenizer, processor): - vqa_pipeline = pipeline("visual-question-answering", model="hf-internal-testing/tiny-vilt-random-vqa") + def get_test_pipeline(self, model, tokenizer, processor, torch_dtype="float32"): + vqa_pipeline = pipeline( + "visual-question-answering", model="hf-internal-testing/tiny-vilt-random-vqa", torch_dtype=torch_dtype + ) examples = [ { "image": Image.open("./tests/fixtures/tests_samples/COCO/000000039769.png"), diff --git a/tests/pipelines/test_pipelines_zero_shot.py b/tests/pipelines/test_pipelines_zero_shot.py index 2e61d97c1dc8..1003898df6c9 100644 --- a/tests/pipelines/test_pipelines_zero_shot.py +++ b/tests/pipelines/test_pipelines_zero_shot.py @@ -42,9 +42,9 @@ class ZeroShotClassificationPipelineTests(unittest.TestCase): config: model for config, model in tf_model_mapping.items() if config.__name__ not in _TO_SKIP } - def get_test_pipeline(self, model, tokenizer, processor): + def get_test_pipeline(self, model, tokenizer, processor, torch_dtype="float32"): classifier = ZeroShotClassificationPipeline( - model=model, tokenizer=tokenizer, candidate_labels=["polics", "health"] + model=model, tokenizer=tokenizer, candidate_labels=["polics", "health"], torch_dtype=torch_dtype ) return classifier, ["Who are you voting for in 2020?", "My stomach hurts."] diff --git a/tests/pipelines/test_pipelines_zero_shot_audio_classification.py b/tests/pipelines/test_pipelines_zero_shot_audio_classification.py index 60562fe7aa11..c0894fb394b7 100644 --- a/tests/pipelines/test_pipelines_zero_shot_audio_classification.py +++ b/tests/pipelines/test_pipelines_zero_shot_audio_classification.py @@ -28,9 +28,11 @@ class ZeroShotAudioClassificationPipelineTests(unittest.TestCase): # model_mapping = {CLAPConfig: CLAPModel} @require_torch - def test_small_model_pt(self): + def test_small_model_pt(self, torch_dtype="float32"): audio_classifier = pipeline( - task="zero-shot-audio-classification", model="hf-internal-testing/tiny-clap-htsat-unfused" + task="zero-shot-audio-classification", + model="hf-internal-testing/tiny-clap-htsat-unfused", + torch_dtype=torch_dtype, ) dataset = load_dataset("hf-internal-testing/ashraq-esc50-1-dog-example") audio = dataset["train"]["audio"][-1]["array"] @@ -40,6 +42,10 @@ def test_small_model_pt(self): [{"score": 0.501, "label": "Sound of a dog"}, {"score": 0.499, "label": "Sound of vaccum cleaner"}], ) + @require_torch + def test_small_model_pt_fp16(self): + self.test_small_model_pt(torch_dtype="float16") + @unittest.skip(reason="No models are available in TF") def test_small_model_tf(self): pass diff --git a/tests/pipelines/test_pipelines_zero_shot_image_classification.py b/tests/pipelines/test_pipelines_zero_shot_image_classification.py index 5c3208866ee2..b4501e437335 100644 --- a/tests/pipelines/test_pipelines_zero_shot_image_classification.py +++ b/tests/pipelines/test_pipelines_zero_shot_image_classification.py @@ -71,9 +71,9 @@ class ZeroShotImageClassificationPipelineTests(unittest.TestCase): # outputs = pipe([image] * 3, batch_size=2, candidate_labels=["A", "B"]) @require_torch - def test_small_model_pt(self): + def test_small_model_pt(self, torch_dtype="float32"): image_classifier = pipeline( - model="hf-internal-testing/tiny-random-clip-zero-shot-image-classification", + model="hf-internal-testing/tiny-random-clip-zero-shot-image-classification", torch_dtype=torch_dtype ) image = Image.open("./tests/fixtures/tests_samples/COCO/000000039769.png") output = image_classifier(image, candidate_labels=["a", "b", "c"]) @@ -127,6 +127,10 @@ def test_small_model_pt(self): ], ) + @require_torch + def test_small_model_pt_fp16(self): + self.test_small_model_pt(torch_dtype="float16") + @require_tf def test_small_model_tf(self): image_classifier = pipeline( diff --git a/tests/pipelines/test_pipelines_zero_shot_object_detection.py b/tests/pipelines/test_pipelines_zero_shot_object_detection.py index 065e5c211e67..799c54dfbb87 100644 --- a/tests/pipelines/test_pipelines_zero_shot_object_detection.py +++ b/tests/pipelines/test_pipelines_zero_shot_object_detection.py @@ -43,9 +43,11 @@ def open(*args, **kwargs): class ZeroShotObjectDetectionPipelineTests(unittest.TestCase): model_mapping = MODEL_FOR_ZERO_SHOT_OBJECT_DETECTION_MAPPING - def get_test_pipeline(self, model, tokenizer, processor): + def get_test_pipeline(self, model, tokenizer, processor, torch_dtype="float32"): object_detector = pipeline( - "zero-shot-object-detection", model="hf-internal-testing/tiny-random-owlvit-object-detection" + "zero-shot-object-detection", + model="hf-internal-testing/tiny-random-owlvit-object-detection", + torch_dtype=torch_dtype, ) examples = [ diff --git a/tests/test_pipeline_mixin.py b/tests/test_pipeline_mixin.py index f2292510f711..6ca7ea0681db 100644 --- a/tests/test_pipeline_mixin.py +++ b/tests/test_pipeline_mixin.py @@ -126,16 +126,18 @@ class PipelineTesterMixin: pipeline_model_mapping = None supported_frameworks = ["pt", "tf"] - def run_task_tests(self, task): + def run_task_tests(self, task, torch_dtype="float32"): """Run pipeline tests for a specific `task` Args: task (`str`): A task name. This should be a key in the mapping `pipeline_test_mapping`. + torch_dtype (`str`, `optional`, defaults to `'float32'`): + The torch dtype to use for the model. Can be used for FP16/other precision inference. """ if task not in self.pipeline_model_mapping: self.skipTest( - f"{self.__class__.__name__}::test_pipeline_{task.replace('-', '_')} is skipped: `{task}` is not in " + f"{self.__class__.__name__}::test_pipeline_{task.replace('-', '_')}_{torch_dtype} is skipped: `{task}` is not in " f"`self.pipeline_model_mapping` for `{self.__class__.__name__}`." ) @@ -171,10 +173,12 @@ def run_task_tests(self, task): repo_name = model_arch_name self.run_model_pipeline_tests( - task, repo_name, model_architecture, tokenizer_names, processor_names, commit + task, repo_name, model_architecture, tokenizer_names, processor_names, commit, torch_dtype ) - def run_model_pipeline_tests(self, task, repo_name, model_architecture, tokenizer_names, processor_names, commit): + def run_model_pipeline_tests( + self, task, repo_name, model_architecture, tokenizer_names, processor_names, commit, torch_dtype="float32" + ): """Run pipeline tests for a specific `task` with the give model class and tokenizer/processor class names Args: @@ -188,6 +192,10 @@ def run_model_pipeline_tests(self, task, repo_name, model_architecture, tokenize A list of names of a subclasses of `PreTrainedTokenizerFast` or `PreTrainedTokenizer`. processor_names (`List[str]`): A list of names of subclasses of `BaseImageProcessor` or `FeatureExtractionMixin`. + commit (`str`): + The commit hash of the model repository on the Hub. + torch_dtype (`str`, `optional`, defaults to `'float32'`): + The torch dtype to use for the model. Can be used for FP16/other precision inference. """ # Get an instance of the corresponding class `XXXPipelineTests` in order to use `get_test_pipeline` and # `run_pipeline_test`. @@ -203,14 +211,18 @@ def run_model_pipeline_tests(self, task, repo_name, model_architecture, tokenize processor_name, ): logger.warning( - f"{self.__class__.__name__}::test_pipeline_{task.replace('-', '_')} is skipped: test is " + f"{self.__class__.__name__}::test_pipeline_{task.replace('-', '_')}_{torch_dtype} is skipped: test is " f"currently known to fail for: model `{model_architecture.__name__}` | tokenizer " f"`{tokenizer_name}` | processor `{processor_name}`." ) continue - self.run_pipeline_test(task, repo_name, model_architecture, tokenizer_name, processor_name, commit) + self.run_pipeline_test( + task, repo_name, model_architecture, tokenizer_name, processor_name, commit, torch_dtype + ) - def run_pipeline_test(self, task, repo_name, model_architecture, tokenizer_name, processor_name, commit): + def run_pipeline_test( + self, task, repo_name, model_architecture, tokenizer_name, processor_name, commit, torch_dtype="float32" + ): """Run pipeline tests for a specific `task` with the give model class and tokenizer/processor class name The model will be loaded from a model repository on the Hub. @@ -226,6 +238,10 @@ def run_pipeline_test(self, task, repo_name, model_architecture, tokenizer_name, The name of a subclass of `PreTrainedTokenizerFast` or `PreTrainedTokenizer`. processor_name (`str`): The name of a subclass of `BaseImageProcessor` or `FeatureExtractionMixin`. + commit (`str`): + The commit hash of the model repository on the Hub. + torch_dtype (`str`, `optional`, defaults to `'float32'`): + The torch dtype to use for the model. Can be used for FP16/other precision inference. """ repo_id = f"{TRANSFORMERS_TINY_MODEL_PATH}/{repo_name}" if TRANSFORMERS_TINY_MODEL_PATH != "hf-internal-testing": @@ -245,7 +261,7 @@ def run_pipeline_test(self, task, repo_name, model_architecture, tokenizer_name, processor = processor_class.from_pretrained(repo_id, revision=commit) except Exception: logger.warning( - f"{self.__class__.__name__}::test_pipeline_{task.replace('-', '_')} is skipped: Could not load the " + f"{self.__class__.__name__}::test_pipeline_{task.replace('-', '_')}_{torch_dtype} is skipped: Could not load the " f"processor from `{repo_id}` with `{processor_name}`." ) self.skipTest(f"Could not load the processor from {repo_id} with {processor_name}.") @@ -253,7 +269,7 @@ def run_pipeline_test(self, task, repo_name, model_architecture, tokenizer_name, # TODO: Maybe not upload such problematic tiny models to Hub. if tokenizer is None and processor is None: logger.warning( - f"{self.__class__.__name__}::test_pipeline_{task.replace('-', '_')} is skipped: Could not find or load " + f"{self.__class__.__name__}::test_pipeline_{task.replace('-', '_')}_{torch_dtype} is skipped: Could not find or load " f"any tokenizer / processor from `{repo_id}`." ) self.skipTest(f"Could not find or load any tokenizer / processor from {repo_id}.") @@ -263,7 +279,7 @@ def run_pipeline_test(self, task, repo_name, model_architecture, tokenizer_name, model = model_architecture.from_pretrained(repo_id, revision=commit) except Exception: logger.warning( - f"{self.__class__.__name__}::test_pipeline_{task.replace('-', '_')} is skipped: Could not find or load " + f"{self.__class__.__name__}::test_pipeline_{task.replace('-', '_')}_{torch_dtype} is skipped: Could not find or load " f"the model from `{repo_id}` with `{model_architecture}`." ) self.skipTest(f"Could not find or load the model from {repo_id} with {model_architecture}.") @@ -271,7 +287,7 @@ def run_pipeline_test(self, task, repo_name, model_architecture, tokenizer_name, pipeline_test_class_name = pipeline_test_mapping[task]["test"].__name__ if self.is_pipeline_test_to_skip_more(pipeline_test_class_name, model.config, model, tokenizer, processor): logger.warning( - f"{self.__class__.__name__}::test_pipeline_{task.replace('-', '_')} is skipped: test is " + f"{self.__class__.__name__}::test_pipeline_{task.replace('-', '_')}_{torch_dtype} is skipped: test is " f"currently known to fail for: model `{model_architecture.__name__}` | tokenizer " f"`{tokenizer_name}` | processor `{processor_name}`." ) @@ -289,12 +305,12 @@ def run_pipeline_test(self, task, repo_name, model_architecture, tokenizer_name, # `run_pipeline_test`. task_test = pipeline_test_mapping[task]["test"]() - pipeline, examples = task_test.get_test_pipeline(model, tokenizer, processor) + pipeline, examples = task_test.get_test_pipeline(model, tokenizer, processor, torch_dtype=torch_dtype) if pipeline is None: # The test can disable itself, but it should be very marginal # Concerns: Wav2Vec2ForCTC without tokenizer test (FastTokenizer don't exist) logger.warning( - f"{self.__class__.__name__}::test_pipeline_{task.replace('-', '_')} is skipped: Could not get the " + f"{self.__class__.__name__}::test_pipeline_{task.replace('-', '_')}_{torch_dtype} is skipped: Could not get the " "pipeline for testing." ) self.skipTest(reason="Could not get the pipeline for testing.") @@ -324,10 +340,20 @@ def data(n): def test_pipeline_audio_classification(self): self.run_task_tests(task="audio-classification") + @is_pipeline_test + @require_torch + def test_pipeline_audio_classification_fp16(self): + self.run_task_tests(task="audio-classification", torch_dtype="float16") + @is_pipeline_test def test_pipeline_automatic_speech_recognition(self): self.run_task_tests(task="automatic-speech-recognition") + @is_pipeline_test + @require_torch + def test_pipeline_automatic_speech_recognition_fp16(self): + self.run_task_tests(task="automatic-speech-recognition", torch_dtype="float16") + @is_pipeline_test @require_vision @require_timm @@ -335,6 +361,13 @@ def test_pipeline_automatic_speech_recognition(self): def test_pipeline_depth_estimation(self): self.run_task_tests(task="depth-estimation") + @is_pipeline_test + @require_vision + @require_timm + @require_torch + def test_pipeline_depth_estimation_fp16(self): + self.run_task_tests(task="depth-estimation", torch_dtype="float16") + @is_pipeline_test @require_pytesseract @require_torch @@ -342,20 +375,43 @@ def test_pipeline_depth_estimation(self): def test_pipeline_document_question_answering(self): self.run_task_tests(task="document-question-answering") + @is_pipeline_test + @require_pytesseract + @require_torch + @require_vision + def test_pipeline_document_question_answering_fp16(self): + self.run_task_tests(task="document-question-answering", torch_dtype="float16") + @is_pipeline_test def test_pipeline_feature_extraction(self): self.run_task_tests(task="feature-extraction") + @is_pipeline_test + @require_torch + def test_pipeline_feature_extraction_fp16(self): + self.run_task_tests(task="feature-extraction", torch_dtype="float16") + @is_pipeline_test def test_pipeline_fill_mask(self): self.run_task_tests(task="fill-mask") + @is_pipeline_test + @require_torch + def test_pipeline_fill_mask_fp16(self): + self.run_task_tests(task="fill-mask", torch_dtype="float16") + @is_pipeline_test @require_torch_or_tf @require_vision def test_pipeline_image_classification(self): self.run_task_tests(task="image-classification") + @is_pipeline_test + @require_vision + @require_torch + def test_pipeline_image_classification_fp16(self): + self.run_task_tests(task="image-classification", torch_dtype="float16") + @is_pipeline_test @require_vision @require_timm @@ -363,11 +419,24 @@ def test_pipeline_image_classification(self): def test_pipeline_image_segmentation(self): self.run_task_tests(task="image-segmentation") + @is_pipeline_test + @require_vision + @require_timm + @require_torch + def test_pipeline_image_segmentation_fp16(self): + self.run_task_tests(task="image-segmentation", torch_dtype="float16") + @is_pipeline_test @require_vision def test_pipeline_image_to_text(self): self.run_task_tests(task="image-to-text") + @is_pipeline_test + @require_vision + @require_torch + def test_pipeline_image_to_text_fp16(self): + self.run_task_tests(task="image-to-text", torch_dtype="float16") + @is_pipeline_test @require_timm @require_vision @@ -375,6 +444,13 @@ def test_pipeline_image_to_text(self): def test_pipeline_image_feature_extraction(self): self.run_task_tests(task="image-feature-extraction") + @is_pipeline_test + @require_timm + @require_vision + @require_torch + def test_pipeline_image_feature_extraction_fp16(self): + self.run_task_tests(task="image-feature-extraction", torch_dtype="float16") + @unittest.skip(reason="`run_pipeline_test` is currently not implemented.") @is_pipeline_test @require_vision @@ -382,6 +458,13 @@ def test_pipeline_image_feature_extraction(self): def test_pipeline_mask_generation(self): self.run_task_tests(task="mask-generation") + @unittest.skip(reason="`run_pipeline_test` is currently not implemented.") + @is_pipeline_test + @require_vision + @require_torch + def test_pipeline_mask_generation_fp16(self): + self.run_task_tests(task="mask-generation", torch_dtype="float16") + @is_pipeline_test @require_vision @require_timm @@ -389,44 +472,96 @@ def test_pipeline_mask_generation(self): def test_pipeline_object_detection(self): self.run_task_tests(task="object-detection") + @is_pipeline_test + @require_vision + @require_timm + @require_torch + def test_pipeline_object_detection_fp16(self): + self.run_task_tests(task="object-detection", torch_dtype="float16") + @is_pipeline_test def test_pipeline_question_answering(self): self.run_task_tests(task="question-answering") + @is_pipeline_test + @require_torch + def test_pipeline_question_answering_fp16(self): + self.run_task_tests(task="question-answering", torch_dtype="float16") + @is_pipeline_test def test_pipeline_summarization(self): self.run_task_tests(task="summarization") + @is_pipeline_test + @require_torch + def test_pipeline_summarization_fp16(self): + self.run_task_tests(task="summarization", torch_dtype="float16") + @is_pipeline_test def test_pipeline_table_question_answering(self): self.run_task_tests(task="table-question-answering") + @is_pipeline_test + @require_torch + def test_pipeline_table_question_answering_fp16(self): + self.run_task_tests(task="table-question-answering", torch_dtype="float16") + @is_pipeline_test def test_pipeline_text2text_generation(self): self.run_task_tests(task="text2text-generation") + @is_pipeline_test + @require_torch + def test_pipeline_text2text_generation_fp16(self): + self.run_task_tests(task="text2text-generation", torch_dtype="float16") + @is_pipeline_test def test_pipeline_text_classification(self): self.run_task_tests(task="text-classification") + @is_pipeline_test + @require_torch + def test_pipeline_text_classification_fp16(self): + self.run_task_tests(task="text-classification", torch_dtype="float16") + @is_pipeline_test @require_torch_or_tf def test_pipeline_text_generation(self): self.run_task_tests(task="text-generation") + @is_pipeline_test + @require_torch + def test_pipeline_text_generation_fp16(self): + self.run_task_tests(task="text-generation", torch_dtype="float16") + @is_pipeline_test @require_torch def test_pipeline_text_to_audio(self): self.run_task_tests(task="text-to-audio") + @is_pipeline_test + @require_torch + def test_pipeline_text_to_audio_fp16(self): + self.run_task_tests(task="text-to-audio", torch_dtype="float16") + @is_pipeline_test def test_pipeline_token_classification(self): self.run_task_tests(task="token-classification") + @is_pipeline_test + @require_torch + def test_pipeline_token_classification_fp16(self): + self.run_task_tests(task="token-classification", torch_dtype="float16") + @is_pipeline_test def test_pipeline_translation(self): self.run_task_tests(task="translation") + @is_pipeline_test + @require_torch + def test_pipeline_translation_fp16(self): + self.run_task_tests(task="translation", torch_dtype="float16") + @is_pipeline_test @require_torch_or_tf @require_vision @@ -434,32 +569,67 @@ def test_pipeline_translation(self): def test_pipeline_video_classification(self): self.run_task_tests(task="video-classification") + @is_pipeline_test + @require_vision + @require_decord + @require_torch + def test_pipeline_video_classification_fp16(self): + self.run_task_tests(task="video-classification", torch_dtype="float16") + @is_pipeline_test @require_torch @require_vision def test_pipeline_visual_question_answering(self): self.run_task_tests(task="visual-question-answering") + @is_pipeline_test + @require_torch + @require_vision + def test_pipeline_visual_question_answering_fp16(self): + self.run_task_tests(task="visual-question-answering", torch_dtype="float16") + @is_pipeline_test def test_pipeline_zero_shot(self): self.run_task_tests(task="zero-shot") + @is_pipeline_test + @require_torch + def test_pipeline_zero_shot_fp16(self): + self.run_task_tests(task="zero-shot", torch_dtype="float16") + @is_pipeline_test @require_torch def test_pipeline_zero_shot_audio_classification(self): self.run_task_tests(task="zero-shot-audio-classification") + @is_pipeline_test + @require_torch + def test_pipeline_zero_shot_audio_classification_fp16(self): + self.run_task_tests(task="zero-shot-audio-classification", torch_dtype="float16") + @is_pipeline_test @require_vision def test_pipeline_zero_shot_image_classification(self): self.run_task_tests(task="zero-shot-image-classification") + @is_pipeline_test + @require_vision + @require_torch + def test_pipeline_zero_shot_image_classification_fp16(self): + self.run_task_tests(task="zero-shot-image-classification", torch_dtype="float16") + @is_pipeline_test @require_vision @require_torch def test_pipeline_zero_shot_object_detection(self): self.run_task_tests(task="zero-shot-object-detection") + @is_pipeline_test + @require_vision + @require_torch + def test_pipeline_zero_shot_object_detection_fp16(self): + self.run_task_tests(task="zero-shot-object-detection", torch_dtype="float16") + # This contains the test cases to be skipped without model architecture being involved. def is_pipeline_test_to_skip( self, pipeline_test_casse_name, config_class, model_architecture, tokenizer_name, processor_name