pytorch · pmeier · Aug 2, 2023 · Jul 19, 2023 · Jul 19, 2023 · Jul 26, 2023
diff --git a/test/test_transforms_v2.py b/test/test_transforms_v2.py
@@ -1419,8 +1419,6 @@ def test_antialias_warning():
     with pytest.warns(UserWarning, match=match):
         datapoints.Image(tensor_img).resized_crop(0, 0, 10, 10, (20, 20))
 
-    with pytest.warns(UserWarning, match=match):
-        datapoints.Video(tensor_video).resize((20, 20))
     with pytest.warns(UserWarning, match=match):
         datapoints.Video(tensor_video).resized_crop(0, 0, 10, 10, (20, 20))
 

diff --git a/test/test_transforms_v2_refactored.py b/test/test_transforms_v2_refactored.py
@@ -34,6 +34,7 @@
 from torchvision.transforms._functional_tensor import _max_value as get_max_value
 from torchvision.transforms.functional import pil_modes_mapping
 from torchvision.transforms.v2 import functional as F
+from torchvision.transforms.v2.functional._utils import _KERNEL_REGISTRY
 
 
 @pytest.fixture(autouse=True)
@@ -167,17 +168,20 @@ def _check_dispatcher_dispatch(dispatcher, kernel, input, *args, **kwargs):
     preserved in doing so. For bounding boxes also checks that the format is preserved.
     """
     if isinstance(input, datapoints._datapoint.Datapoint):
-        # Due to our complex dispatch architecture for datapoints, we cannot spy on the kernel directly,
-        # but rather have to patch the `Datapoint.__F` attribute to contain the spied on kernel.
-        spy = mock.MagicMock(wraps=kernel, name=kernel.__name__)
-        with mock.patch.object(F, kernel.__name__, spy):
-            # Due to Python's name mangling, the `Datapoint.__F` attribute is only accessible from inside the class.
-            # Since that is not the case here, we need to prefix f"_{cls.__name__}"
-            # See https://docs.python.org/3/tutorial/classes.html#private-variables for details
-            with mock.patch.object(datapoints._datapoint.Datapoint, "_Datapoint__F", new=F):
-                output = dispatcher(input, *args, **kwargs)
-
-        spy.assert_called_once()
+        if dispatcher in {F.resize, F.adjust_brightness}:
+            output = dispatcher(input, *args, **kwargs)
+        else:
+            # Due to our complex dispatch architecture for datapoints, we cannot spy on the kernel directly,
+            # but rather have to patch the `Datapoint.__F` attribute to contain the spied on kernel.
+            spy = mock.MagicMock(wraps=kernel, name=kernel.__name__)
+            with mock.patch.object(F, kernel.__name__, spy):
+                # Due to Python's name mangling, the `Datapoint.__F` attribute is only accessible from inside the class.
+                # Since that is not the case here, we need to prefix f"_{cls.__name__}"
+                # See https://docs.python.org/3/tutorial/classes.html#private-variables for details
+                with mock.patch.object(datapoints._datapoint.Datapoint, "_Datapoint__F", new=F):
+                    output = dispatcher(input, *args, **kwargs)
+
+            spy.assert_called_once()
     else:
         with mock.patch(f"{dispatcher.__module__}.{kernel.__name__}", wraps=kernel) as spy:
             output = dispatcher(input, *args, **kwargs)
@@ -251,6 +255,8 @@ def _check_dispatcher_kernel_signature_match(dispatcher, *, kernel, input_type):
 
 def _check_dispatcher_datapoint_signature_match(dispatcher):
     """Checks if the signature of the dispatcher matches the corresponding method signature on the Datapoint class."""
+    if dispatcher in {F.resize, F.adjust_brightness}:
+        return
     dispatcher_signature = inspect.signature(dispatcher)
     dispatcher_params = list(dispatcher_signature.parameters.values())[1:]
 
@@ -423,6 +429,33 @@ def transform(bbox):
     return torch.stack([transform(b) for b in bounding_box.reshape(-1, 4).unbind()]).reshape(bounding_box.shape)
 
 
+@pytest.mark.parametrize(
+    ("dispatcher", "registered_datapoint_clss"),
+    [(dispatcher, set(registry.keys())) for dispatcher, registry in _KERNEL_REGISTRY.items()],
+)
+def test_exhaustive_kernel_registration(dispatcher, registered_datapoint_clss):
+    missing = {
+        datapoints.Image,
+        datapoints.BoundingBox,
+        datapoints.Mask,
+        datapoints.Video,
+    } - registered_datapoint_clss
+    if missing:
+        names = sorted(f"datapoints.{cls.__name__}" for cls in missing)
+        raise AssertionError(
+            "\n".join(
+                [
+                    f"The dispatcher '{dispatcher.__name__}' hs no kernels registered for",
+                    "",
+                    *[f"- {name}" for name in names],
+                    "",
+                    f"If available, register the kernels with @_register_kernel_internal({dispatcher.__name__}, ...).",
+                    f"If not, register explicit no-ops with _register_explicit_noops({dispatcher.__name__}, {', '.join(names)})",
+                ]
+            )
+        )
+
+
 class TestResize:
     INPUT_SIZE = (17, 11)
     OUTPUT_SIZES = [17, [17], (17,), [12, 13], (12, 13)]

diff --git a/torchvision/datapoints/_bounding_box.py b/torchvision/datapoints/_bounding_box.py
@@ -110,21 +110,6 @@ def vertical_flip(self) -> BoundingBox:
         )
         return BoundingBox.wrap_like(self, output)
 
-    def resize(  # type: ignore[override]
-        self,
-        size: List[int],
-        interpolation: Union[InterpolationMode, int] = InterpolationMode.BILINEAR,
-        max_size: Optional[int] = None,
-        antialias: Optional[Union[str, bool]] = "warn",
-    ) -> BoundingBox:
-        output, spatial_size = self._F.resize_bounding_box(
-            self.as_subclass(torch.Tensor),
-            spatial_size=self.spatial_size,
-            size=size,
-            max_size=max_size,
-        )
-        return BoundingBox.wrap_like(self, output, spatial_size=spatial_size)
-
     def crop(self, top: int, left: int, height: int, width: int) -> BoundingBox:
         output, spatial_size = self._F.crop_bounding_box(
             self.as_subclass(torch.Tensor), self.format, top=top, left=left, height=height, width=width

diff --git a/torchvision/datapoints/_datapoint.py b/torchvision/datapoints/_datapoint.py
@@ -148,17 +148,6 @@ def horizontal_flip(self) -> Datapoint:
     def vertical_flip(self) -> Datapoint:
         return self
 
-    # TODO: We have to ignore override mypy error as there is torch.Tensor built-in deprecated op: Tensor.resize
-    # https://github.com/pytorch/pytorch/blob/e8727994eb7cdb2ab642749d6549bc497563aa06/torch/_tensor.py#L588-L593
-    def resize(  # type: ignore[override]
-        self,
-        size: List[int],
-        interpolation: Union[InterpolationMode, int] = InterpolationMode.BILINEAR,
-        max_size: Optional[int] = None,
-        antialias: Optional[Union[str, bool]] = "warn",
-    ) -> Datapoint:
-        return self
-
     def crop(self, top: int, left: int, height: int, width: int) -> Datapoint:
         return self
 

diff --git a/torchvision/datapoints/_image.py b/torchvision/datapoints/_image.py
@@ -72,18 +72,6 @@ def vertical_flip(self) -> Image:
         output = self._F.vertical_flip_image_tensor(self.as_subclass(torch.Tensor))
         return Image.wrap_like(self, output)
 
-    def resize(  # type: ignore[override]
-        self,
-        size: List[int],
-        interpolation: Union[InterpolationMode, int] = InterpolationMode.BILINEAR,
-        max_size: Optional[int] = None,
-        antialias: Optional[Union[str, bool]] = "warn",
-    ) -> Image:
-        output = self._F.resize_image_tensor(
-            self.as_subclass(torch.Tensor), size, interpolation=interpolation, max_size=max_size, antialias=antialias
-        )
-        return Image.wrap_like(self, output)
-
     def crop(self, top: int, left: int, height: int, width: int) -> Image:
         output = self._F.crop_image_tensor(self.as_subclass(torch.Tensor), top, left, height, width)
         return Image.wrap_like(self, output)

diff --git a/torchvision/datapoints/_mask.py b/torchvision/datapoints/_mask.py
@@ -63,16 +63,6 @@ def vertical_flip(self) -> Mask:
         output = self._F.vertical_flip_mask(self.as_subclass(torch.Tensor))
         return Mask.wrap_like(self, output)
 
-    def resize(  # type: ignore[override]
-        self,
-        size: List[int],
-        interpolation: Union[InterpolationMode, int] = InterpolationMode.NEAREST,
-        max_size: Optional[int] = None,
-        antialias: Optional[Union[str, bool]] = "warn",
-    ) -> Mask:
-        output = self._F.resize_mask(self.as_subclass(torch.Tensor), size, max_size=max_size)
-        return Mask.wrap_like(self, output)
-
     def crop(self, top: int, left: int, height: int, width: int) -> Mask:
         output = self._F.crop_mask(self.as_subclass(torch.Tensor), top, left, height, width)
         return Mask.wrap_like(self, output)

diff --git a/torchvision/datapoints/_video.py b/torchvision/datapoints/_video.py
@@ -66,22 +66,6 @@ def vertical_flip(self) -> Video:
         output = self._F.vertical_flip_video(self.as_subclass(torch.Tensor))
         return Video.wrap_like(self, output)
 
-    def resize(  # type: ignore[override]
-        self,
-        size: List[int],
-        interpolation: Union[InterpolationMode, int] = InterpolationMode.BILINEAR,
-        max_size: Optional[int] = None,
-        antialias: Optional[Union[str, bool]] = "warn",
-    ) -> Video:
-        output = self._F.resize_video(
-            self.as_subclass(torch.Tensor),
-            size,
-            interpolation=interpolation,
-            max_size=max_size,
-            antialias=antialias,
-        )
-        return Video.wrap_like(self, output)
-
     def crop(self, top: int, left: int, height: int, width: int) -> Video:
         output = self._F.crop_video(self.as_subclass(torch.Tensor), top, left, height, width)
         return Video.wrap_like(self, output)

diff --git a/torchvision/transforms/v2/functional/__init__.py b/torchvision/transforms/v2/functional/__init__.py
@@ -1,6 +1,6 @@
 from torchvision.transforms import InterpolationMode  # usort: skip
 
-from ._utils import is_simple_tensor  # usort: skip
+from ._utils import is_simple_tensor, register_kernel  # usort: skip
 
 from ._meta import (
     clamp_bounding_box,

diff --git a/torchvision/transforms/v2/functional/_color.py b/torchvision/transforms/v2/functional/_color.py
@@ -10,7 +10,7 @@
 from torchvision.utils import _log_api_usage_once
 
 from ._meta import _num_value_bits, convert_dtype_image_tensor
-from ._utils import is_simple_tensor
+from ._utils import _get_kernel, _register_explicit_noops, _register_kernel_internal, is_simple_tensor
 
 
 def _rgb_to_grayscale_image_tensor(
@@ -69,6 +69,28 @@ def _blend(image1: torch.Tensor, image2: torch.Tensor, ratio: float) -> torch.Te
     return output if fp else output.to(image1.dtype)
 
 
+def adjust_brightness(inpt: datapoints._InputTypeJIT, brightness_factor: float) -> datapoints._InputTypeJIT:
+    if not torch.jit.is_scripting():
+        _log_api_usage_once(adjust_brightness)
+
+    if torch.jit.is_scripting() or is_simple_tensor(inpt):
+        kernel = _get_kernel(adjust_brightness, type(inpt))
+        return kernel(inpt, brightness_factor=brightness_factor)
+    elif isinstance(inpt, datapoints._datapoint.Datapoint):
+        return inpt.adjust_brightness(brightness_factor=brightness_factor)
+    elif isinstance(inpt, PIL.Image.Image):
+        return adjust_brightness_image_pil(inpt, brightness_factor=brightness_factor)
+    else:
+        raise TypeError(
+            f"Input can either be a plain tensor, any TorchVision datapoint, or a PIL image, "
+            f"but got {type(inpt)} instead."
+        )
+
+
+_register_explicit_noops(adjust_brightness, datapoints.BoundingBox, datapoints.Mask)
+
+
+@_register_kernel_internal(adjust_brightness, datapoints.Image)
 def adjust_brightness_image_tensor(image: torch.Tensor, brightness_factor: float) -> torch.Tensor:
     if brightness_factor < 0:
         raise ValueError(f"brightness_factor ({brightness_factor}) is not non-negative.")
@@ -86,27 +108,11 @@ def adjust_brightness_image_tensor(image: torch.Tensor, brightness_factor: float
 adjust_brightness_image_pil = _FP.adjust_brightness
 
 
+@_register_kernel_internal(adjust_brightness, datapoints.Video)
 def adjust_brightness_video(video: torch.Tensor, brightness_factor: float) -> torch.Tensor:
     return adjust_brightness_image_tensor(video, brightness_factor=brightness_factor)
 
 
-def adjust_brightness(inpt: datapoints._InputTypeJIT, brightness_factor: float) -> datapoints._InputTypeJIT:
-    if not torch.jit.is_scripting():
-        _log_api_usage_once(adjust_brightness)
-
-    if torch.jit.is_scripting() or is_simple_tensor(inpt):
-        return adjust_brightness_image_tensor(inpt, brightness_factor=brightness_factor)
-    elif isinstance(inpt, datapoints._datapoint.Datapoint):
-        return inpt.adjust_brightness(brightness_factor=brightness_factor)
-    elif isinstance(inpt, PIL.Image.Image):
-        return adjust_brightness_image_pil(inpt, brightness_factor=brightness_factor)
-    else:
-        raise TypeError(
-            f"Input can either be a plain tensor, any TorchVision datapoint, or a PIL image, "
-            f"but got {type(inpt)} instead."
-        )
-
-
 def adjust_saturation_image_tensor(image: torch.Tensor, saturation_factor: float) -> torch.Tensor:
     if saturation_factor < 0:
         raise ValueError(f"saturation_factor ({saturation_factor}) is not non-negative.")

diff --git a/torchvision/transforms/v2/functional/_geometry.py b/torchvision/transforms/v2/functional/_geometry.py
@@ -25,7 +25,7 @@
 
 from ._meta import clamp_bounding_box, convert_format_bounding_box, get_spatial_size_image_pil
 
-from ._utils import is_simple_tensor
+from ._utils import _get_kernel, _register_kernel_internal, is_simple_tensor
 
 
 def _check_interpolation(interpolation: Union[InterpolationMode, int]) -> InterpolationMode:
@@ -158,6 +158,32 @@ def _compute_resized_output_size(
     return __compute_resized_output_size(spatial_size, size=size, max_size=max_size)
 
 
+def resize(
+    inpt: datapoints._InputTypeJIT,
+    size: List[int],
+    interpolation: Union[InterpolationMode, int] = InterpolationMode.BILINEAR,
+    max_size: Optional[int] = None,
+    antialias: Optional[Union[str, bool]] = "warn",
+) -> datapoints._InputTypeJIT:
+    if not torch.jit.is_scripting():
+        _log_api_usage_once(resize)
+    if torch.jit.is_scripting() or is_simple_tensor(inpt):
+        return resize_image_tensor(inpt, size, interpolation=interpolation, max_size=max_size, antialias=antialias)
+    elif isinstance(inpt, datapoints._datapoint.Datapoint):
+        kernel = _get_kernel(resize, type(inpt))
+        return kernel(inpt, size, interpolation=interpolation, max_size=max_size, antialias=antialias)
+    elif isinstance(inpt, PIL.Image.Image):
+        if antialias is False:
+            warnings.warn("Anti-alias option is always applied for PIL Image input. Argument antialias is ignored.")
+        return resize_image_pil(inpt, size, interpolation=interpolation, max_size=max_size)
+    else:
+        raise TypeError(
+            f"Input can either be a plain tensor, any TorchVision datapoint, or a PIL image, "
+            f"but got {type(inpt)} instead."
+        )
+
+
+@_register_kernel_internal(resize, datapoints.Image)
 def resize_image_tensor(
     image: torch.Tensor,
     size: List[int],
@@ -259,6 +285,7 @@ def resize_image_pil(
     return image.resize((new_width, new_height), resample=pil_modes_mapping[interpolation])
 
 
+@_register_kernel_internal(resize, datapoints.Mask)
 def resize_mask(mask: torch.Tensor, size: List[int], max_size: Optional[int] = None) -> torch.Tensor:
     if mask.ndim < 3:
         mask = mask.unsqueeze(0)
@@ -274,6 +301,7 @@ def resize_mask(mask: torch.Tensor, size: List[int], max_size: Optional[int] = N
     return output
 
 
+@_register_kernel_internal(resize, datapoints.BoundingBox)
 def resize_bounding_box(
     bounding_box: torch.Tensor, spatial_size: Tuple[int, int], size: List[int], max_size: Optional[int] = None
 ) -> Tuple[torch.Tensor, Tuple[int, int]]:
@@ -292,6 +320,7 @@ def resize_bounding_box(
     )
 
 
+@_register_kernel_internal(resize, datapoints.Video)
 def resize_video(
     video: torch.Tensor,
     size: List[int],
@@ -302,30 +331,6 @@ def resize_video(
     return resize_image_tensor(video, size=size, interpolation=interpolation, max_size=max_size, antialias=antialias)
 
 
-def resize(
-    inpt: datapoints._InputTypeJIT,
-    size: List[int],
-    interpolation: Union[InterpolationMode, int] = InterpolationMode.BILINEAR,
-    max_size: Optional[int] = None,
-    antialias: Optional[Union[str, bool]] = "warn",
-) -> datapoints._InputTypeJIT:
-    if not torch.jit.is_scripting():
-        _log_api_usage_once(resize)
-    if torch.jit.is_scripting() or is_simple_tensor(inpt):
-        return resize_image_tensor(inpt, size, interpolation=interpolation, max_size=max_size, antialias=antialias)
-    elif isinstance(inpt, datapoints._datapoint.Datapoint):
-        return inpt.resize(size, interpolation=interpolation, max_size=max_size, antialias=antialias)
-    elif isinstance(inpt, PIL.Image.Image):
-        if antialias is False:
-            warnings.warn("Anti-alias option is always applied for PIL Image input. Argument antialias is ignored.")
-        return resize_image_pil(inpt, size, interpolation=interpolation, max_size=max_size)
-    else:
-        raise TypeError(
-            f"Input can either be a plain tensor, any TorchVision datapoint, or a PIL image, "
-            f"but got {type(inpt)} instead."
-        )
-
-
 def _affine_parse_args(
     angle: Union[int, float],
     translate: List[float],