Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix output data type of image classification #31444

Merged
merged 9 commits into from
Jun 25, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 6 additions & 1 deletion src/transformers/pipelines/image_classification.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,8 @@
from ..models.auto.modeling_tf_auto import TF_MODEL_FOR_IMAGE_CLASSIFICATION_MAPPING_NAMES

if is_torch_available():
import torch

from ..models.auto.modeling_auto import MODEL_FOR_IMAGE_CLASSIFICATION_MAPPING_NAMES

logger = logging.get_logger(__name__)
Expand Down Expand Up @@ -180,7 +182,10 @@ def postprocess(self, model_outputs, function_to_apply=None, top_k=5):
top_k = self.model.config.num_labels

outputs = model_outputs["logits"][0]
outputs = outputs.numpy()
if self.framework == "pt" and outputs.dtype in (torch.bfloat16, torch.float16):
outputs = outputs.to(torch.float32).numpy()
else:
outputs = outputs.numpy()

if function_to_apply == ClassificationFunction.SIGMOID:
scores = sigmoid(outputs)
Expand Down
28 changes: 28 additions & 0 deletions tests/pipelines/test_pipelines_image_classification.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
MODEL_FOR_IMAGE_CLASSIFICATION_MAPPING,
TF_MODEL_FOR_IMAGE_CLASSIFICATION_MAPPING,
PreTrainedTokenizerBase,
is_torch_available,
is_vision_available,
)
from transformers.pipelines import ImageClassificationPipeline, pipeline
Expand All @@ -34,6 +35,9 @@
from .test_pipelines_common import ANY


if is_torch_available():
import torch

if is_vision_available():
from PIL import Image
else:
Expand Down Expand Up @@ -177,6 +181,30 @@ def test_custom_tokenizer(self):

self.assertIs(image_classifier.tokenizer, tokenizer)

@require_torch
def test_torch_float16_pipeline(self):
image_classifier = pipeline(
"image-classification", model="hf-internal-testing/tiny-random-vit", torch_dtype=torch.float16
)
outputs = image_classifier("http://images.cocodataset.org/val2017/000000039769.jpg")

self.assertEqual(
nested_simplify(outputs, decimals=3),
[{"label": "LABEL_1", "score": 0.574}, {"label": "LABEL_0", "score": 0.426}],
)

@require_torch
def test_torch_bfloat16_pipeline(self):
image_classifier = pipeline(
"image-classification", model="hf-internal-testing/tiny-random-vit", torch_dtype=torch.bfloat16
)
outputs = image_classifier("http://images.cocodataset.org/val2017/000000039769.jpg")

self.assertEqual(
nested_simplify(outputs, decimals=3),
[{"label": "LABEL_1", "score": 0.574}, {"label": "LABEL_0", "score": 0.426}],
)

@slow
@require_torch
def test_perceiver(self):
Expand Down