pytorch · NicolasHug · Jul 31, 2023 · Jul 31, 2023 · Jul 31, 2023 · Jul 31, 2023
diff --git a/docs/source/datapoints.rst b/docs/source/datapoints.rst
@@ -14,6 +14,6 @@ see e.g. :ref:`sphx_glr_auto_examples_plot_transforms_v2_e2e.py`.
 
     Image
     Video
-    BoundingBoxFormat
-    BoundingBox
+    BBoxFormat
+    BBoxes
     Mask
diff --git a/docs/source/transforms.rst b/docs/source/transforms.rst
@@ -206,8 +206,8 @@ Miscellaneous
     v2.RandomErasing
     Lambda
     v2.Lambda
-    v2.SanitizeBoundingBox
-    v2.ClampBoundingBox
+    v2.SanitizeBBoxes
+    v2.ClampBBoxes
     v2.UniformTemporalSubsample
 
 .. _conversion_transforms:
@@ -236,7 +236,7 @@ Conversion
     ConvertImageDtype
     v2.ConvertImageDtype
     v2.ToDtype
-    v2.ConvertBoundingBoxFormat
+    v2.ConvertBBoxFormat
 
 Auto-Augmentation
 -----------------

diff --git a/references/detection/presets.py b/references/detection/presets.py
@@ -77,8 +77,8 @@ def __init__(
 
         if use_v2:
             transforms += [
-                T.ConvertBoundingBoxFormat(datapoints.BoundingBoxFormat.XYXY),
-                T.SanitizeBoundingBox(),
+                T.ConvertBBoxFormat(datapoints.BBoxFormat.XYXY),
+                T.SanitizeBBoxes(),
             ]
 
         self.transforms = T.Compose(transforms)

diff --git a/test/common_utils.py b/test/common_utils.py
@@ -620,15 +620,15 @@ def make_image_loaders_for_interpolation(
 
 
 @dataclasses.dataclass
-class BoundingBoxLoader(TensorLoader):
-    format: datapoints.BoundingBoxFormat
+class BBoxesLoader(TensorLoader):
+    format: datapoints.BBoxFormat
     spatial_size: Tuple[int, int]
 
 
 def make_bounding_box(
     size=None,
     *,
-    format=datapoints.BoundingBoxFormat.XYXY,
+    format=datapoints.BBoxFormat.XYXY,
     spatial_size=None,
     batch_dims=(),
     dtype=None,
@@ -639,7 +639,7 @@ def make_bounding_box(
         - (box[3] - box[1], box[2] - box[0]) for XYXY
         - (H, W) for XYWH and CXCYWH
     spatial_size: Size of the reference object, e.g. an image. Corresponds to the .spatial_size attribute on
-        returned datapoints.BoundingBox
+        returned datapoints.BBoxes
 
     To generate a valid joint sample, you need to set spatial_size here to the same value as size on the other maker
     functions, e.g.
@@ -666,7 +666,7 @@ def sample_position(values, max_value):
         return torch.stack([torch.randint(max_value - v, ()) for v in values.flatten().tolist()]).reshape(values.shape)
 
     if isinstance(format, str):
-        format = datapoints.BoundingBoxFormat[format]
+        format = datapoints.BBoxFormat[format]
 
     if spatial_size is None:
         if size is None:
@@ -679,7 +679,7 @@ def sample_position(values, max_value):
     dtype = dtype or torch.float32
 
     if any(dim == 0 for dim in batch_dims):
-        return datapoints.BoundingBox(
+        return datapoints.BBoxes(
             torch.empty(*batch_dims, 4, dtype=dtype, device=device), format=format, spatial_size=spatial_size
         )
 
@@ -691,28 +691,28 @@ def sample_position(values, max_value):
     y = sample_position(h, spatial_size[0])
     x = sample_position(w, spatial_size[1])
 
-    if format is datapoints.BoundingBoxFormat.XYWH:
+    if format is datapoints.BBoxFormat.XYWH:
         parts = (x, y, w, h)
-    elif format is datapoints.BoundingBoxFormat.XYXY:
+    elif format is datapoints.BBoxFormat.XYXY:
         x1, y1 = x, y
         x2 = x1 + w
         y2 = y1 + h
         parts = (x1, y1, x2, y2)
-    elif format is datapoints.BoundingBoxFormat.CXCYWH:
+    elif format is datapoints.BBoxFormat.CXCYWH:
         cx = x + w / 2
         cy = y + h / 2
         parts = (cx, cy, w, h)
     else:
         raise ValueError(f"Format {format} is not supported")
 
-    return datapoints.BoundingBox(
+    return datapoints.BBoxes(
         torch.stack(parts, dim=-1).to(dtype=dtype, device=device), format=format, spatial_size=spatial_size
     )
 
 
 def make_bounding_box_loader(*, extra_dims=(), format, spatial_size=DEFAULT_PORTRAIT_SPATIAL_SIZE, dtype=torch.float32):
     if isinstance(format, str):
-        format = datapoints.BoundingBoxFormat[format]
+        format = datapoints.BBoxFormat[format]
 
     spatial_size = _parse_spatial_size(spatial_size, name="spatial_size")
 
@@ -725,13 +725,13 @@ def fn(shape, dtype, device):
             format=format, spatial_size=spatial_size, batch_dims=batch_dims, dtype=dtype, device=device
         )
 
-    return BoundingBoxLoader(fn, shape=(*extra_dims, 4), dtype=dtype, format=format, spatial_size=spatial_size)
+    return BBoxesLoader(fn, shape=(*extra_dims, 4), dtype=dtype, format=format, spatial_size=spatial_size)
 
 
 def make_bounding_box_loaders(
     *,
     extra_dims=DEFAULT_EXTRA_DIMS,
-    formats=tuple(datapoints.BoundingBoxFormat),
+    formats=tuple(datapoints.BBoxFormat),
     spatial_size=DEFAULT_PORTRAIT_SPATIAL_SIZE,
     dtypes=(torch.float32, torch.float64, torch.int64),
 ):

diff --git a/test/test_datapoints.py b/test/test_datapoints.py
@@ -24,14 +24,14 @@ def test_mask_instance(data):
 
 @pytest.mark.parametrize("data", [torch.randint(0, 32, size=(5, 4)), [[0, 0, 5, 5], [2, 2, 7, 7]]])
 @pytest.mark.parametrize(
-    "format", ["XYXY", "CXCYWH", datapoints.BoundingBoxFormat.XYXY, datapoints.BoundingBoxFormat.XYWH]
+    "format", ["XYXY", "CXCYWH", datapoints.BBoxFormat.XYXY, datapoints.BBoxFormat.XYWH]
 )
 def test_bbox_instance(data, format):
-    bboxes = datapoints.BoundingBox(data, format=format, spatial_size=(32, 32))
+    bboxes = datapoints.BBoxes(data, format=format, spatial_size=(32, 32))
     assert isinstance(bboxes, torch.Tensor)
     assert bboxes.ndim == 2 and bboxes.shape[1] == 4
     if isinstance(format, str):
-        format = datapoints.BoundingBoxFormat[(format.upper())]
+        format = datapoints.BBoxFormat[(format.upper())]
     assert bboxes.format == format
 
 
@@ -164,7 +164,7 @@ def test_wrap_like():
     [
         datapoints.Image(torch.rand(3, 16, 16)),
         datapoints.Video(torch.rand(2, 3, 16, 16)),
-        datapoints.BoundingBox([0.0, 1.0, 2.0, 3.0], format=datapoints.BoundingBoxFormat.XYXY, spatial_size=(10, 10)),
+        datapoints.BBoxes([0.0, 1.0, 2.0, 3.0], format=datapoints.BBoxFormat.XYXY, spatial_size=(10, 10)),
         datapoints.Mask(torch.randint(0, 256, (16, 16), dtype=torch.uint8)),
     ],
 )

diff --git a/test/test_prototype_transforms.py b/test/test_prototype_transforms.py
@@ -20,7 +20,7 @@
 
 from prototype_common_utils import make_label, make_one_hot_labels
 
-from torchvision.datapoints import BoundingBox, BoundingBoxFormat, Image, Mask, Video
+from torchvision.datapoints import BBoxes, BBoxFormat, Image, Mask, Video
 from torchvision.prototype import datapoints, transforms
 from torchvision.transforms.v2._utils import _convert_fill_arg
 from torchvision.transforms.v2.functional import InterpolationMode, pil_to_tensor, to_image_pil
@@ -101,10 +101,10 @@ def test__extract_image_targets_assertion(self, mocker):
             self.create_fake_image(mocker, Image),
             # labels, bboxes, masks
             mocker.MagicMock(spec=datapoints.Label),
-            mocker.MagicMock(spec=BoundingBox),
+            mocker.MagicMock(spec=BBoxes),
             mocker.MagicMock(spec=Mask),
             # labels, bboxes, masks
-            mocker.MagicMock(spec=BoundingBox),
+            mocker.MagicMock(spec=BBoxes),
             mocker.MagicMock(spec=Mask),
         ]
 
@@ -122,11 +122,11 @@ def test__extract_image_targets(self, image_type, label_type, mocker):
             self.create_fake_image(mocker, image_type),
             # labels, bboxes, masks
             mocker.MagicMock(spec=label_type),
-            mocker.MagicMock(spec=BoundingBox),
+            mocker.MagicMock(spec=BBoxes),
             mocker.MagicMock(spec=Mask),
             # labels, bboxes, masks
             mocker.MagicMock(spec=label_type),
-            mocker.MagicMock(spec=BoundingBox),
+            mocker.MagicMock(spec=BBoxes),
             mocker.MagicMock(spec=Mask),
         ]
 
@@ -142,7 +142,7 @@ def test__extract_image_targets(self, image_type, label_type, mocker):
 
         for target in targets:
             for key, type_ in [
-                ("boxes", BoundingBox),
+                ("boxes", BBoxes),
                 ("masks", Mask),
                 ("labels", label_type),
             ]:
@@ -163,7 +163,7 @@ def test__copy_paste(self, label_type):
         if label_type == datapoints.OneHotLabel:
             labels = torch.nn.functional.one_hot(labels, num_classes=5)
         target = {
-            "boxes": BoundingBox(
+            "boxes": BBoxes(
                 torch.tensor([[2.0, 3.0, 8.0, 9.0], [20.0, 20.0, 30.0, 30.0]]), format="XYXY", spatial_size=(32, 32)
             ),
             "masks": Mask(masks),
@@ -178,7 +178,7 @@ def test__copy_paste(self, label_type):
         if label_type == datapoints.OneHotLabel:
             paste_labels = torch.nn.functional.one_hot(paste_labels, num_classes=5)
         paste_target = {
-            "boxes": BoundingBox(
+            "boxes": BBoxes(
                 torch.tensor([[12.0, 13.0, 19.0, 18.0], [1.0, 15.0, 8.0, 19.0]]), format="XYXY", spatial_size=(32, 32)
             ),
             "masks": Mask(paste_masks),
@@ -216,7 +216,7 @@ def test__get_params(self, mocker):
 
         flat_inputs = [
             make_image(size=spatial_size, color_space="RGB"),
-            make_bounding_box(format=BoundingBoxFormat.XYXY, spatial_size=spatial_size, batch_dims=batch_shape),
+            make_bounding_box(format=BBoxFormat.XYXY, spatial_size=spatial_size, batch_dims=batch_shape),
         ]
         params = transform._get_params(flat_inputs)
 
@@ -312,7 +312,7 @@ def test__transform_culling(self, mocker):
         )
 
         bounding_boxes = make_bounding_box(
-            format=BoundingBoxFormat.XYXY, spatial_size=spatial_size, batch_dims=(batch_size,)
+            format=BBoxFormat.XYXY, spatial_size=spatial_size, batch_dims=(batch_size,)
         )
         masks = make_detection_mask(size=spatial_size, batch_dims=(batch_size,))
         labels = make_label(extra_dims=(batch_size,))
@@ -350,7 +350,7 @@ def test__transform_bounding_box_clamping(self, mocker):
         )
 
         bounding_box = make_bounding_box(
-            format=BoundingBoxFormat.XYXY, spatial_size=spatial_size, batch_dims=(batch_size,)
+            format=BBoxFormat.XYXY, spatial_size=spatial_size, batch_dims=(batch_size,)
         )
         mock = mocker.patch("torchvision.prototype.transforms._geometry.F.clamp_bounding_box")
 
@@ -390,7 +390,7 @@ class TestPermuteDimensions:
     def test_call(self, dims, inverse_dims):
         sample = dict(
             image=make_image(),
-            bounding_box=make_bounding_box(format=BoundingBoxFormat.XYXY),
+            bounding_box=make_bounding_box(format=BBoxFormat.XYXY),
             video=make_video(),
             str="str",
             int=0,
@@ -434,7 +434,7 @@ class TestTransposeDimensions:
     def test_call(self, dims):
         sample = dict(
             image=make_image(),
-            bounding_box=make_bounding_box(format=BoundingBoxFormat.XYXY),
+            bounding_box=make_bounding_box(format=BBoxFormat.XYXY),
             video=make_video(),
             str="str",
             int=0,