Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

port vertical flip #7712

Merged
merged 1 commit into from
Jun 30, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
55 changes: 1 addition & 54 deletions test/test_transforms_v2.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@
from torch.utils._pytree import tree_flatten, tree_unflatten
from torchvision import datapoints
from torchvision.ops.boxes import box_iou
from torchvision.transforms.functional import InterpolationMode, pil_to_tensor, to_pil_image
from torchvision.transforms.functional import InterpolationMode, to_pil_image
from torchvision.transforms.v2 import functional as F
from torchvision.transforms.v2.utils import check_type, is_simple_tensor, query_chw

Expand Down Expand Up @@ -406,59 +406,6 @@ def was_applied(output, inpt):
assert transform.was_applied(output, input)


@pytest.mark.parametrize("p", [0.0, 1.0])
class TestRandomVerticalFlip:
def input_expected_image_tensor(self, p, dtype=torch.float32):
input = torch.tensor([[[1, 1], [0, 0]], [[1, 1], [0, 0]]], dtype=dtype)
expected = torch.tensor([[[0, 0], [1, 1]], [[0, 0], [1, 1]]], dtype=dtype)

return input, expected if p == 1 else input

def test_simple_tensor(self, p):
input, expected = self.input_expected_image_tensor(p)
transform = transforms.RandomVerticalFlip(p=p)

actual = transform(input)

assert_equal(expected, actual)

def test_pil_image(self, p):
input, expected = self.input_expected_image_tensor(p, dtype=torch.uint8)
transform = transforms.RandomVerticalFlip(p=p)

actual = transform(to_pil_image(input))

assert_equal(expected, pil_to_tensor(actual))

def test_datapoints_image(self, p):
input, expected = self.input_expected_image_tensor(p)
transform = transforms.RandomVerticalFlip(p=p)

actual = transform(datapoints.Image(input))

assert_equal(datapoints.Image(expected), actual)

def test_datapoints_mask(self, p):
input, expected = self.input_expected_image_tensor(p)
transform = transforms.RandomVerticalFlip(p=p)

actual = transform(datapoints.Mask(input))

assert_equal(datapoints.Mask(expected), actual)

def test_datapoints_bounding_box(self, p):
input = datapoints.BoundingBox([0, 0, 5, 5], format=datapoints.BoundingBoxFormat.XYXY, spatial_size=(10, 10))
transform = transforms.RandomVerticalFlip(p=p)

actual = transform(input)

expected_image_tensor = torch.tensor([0, 5, 5, 10]) if p == 1.0 else input
expected = datapoints.BoundingBox.wrap_like(input, expected_image_tensor)
assert_equal(expected, actual)
assert actual.format == expected.format
assert actual.spatial_size == expected.spatial_size


class TestPad:
def test_assertions(self):
with pytest.raises(TypeError, match="Got inappropriate padding arg"):
Expand Down
148 changes: 143 additions & 5 deletions test/test_transforms_v2_refactored.py
Original file line number Diff line number Diff line change
Expand Up @@ -842,7 +842,7 @@ def _reference_horizontal_flip_bounding_box(self, bounding_box):
"fn", [F.horizontal_flip, transform_cls_to_functional(transforms.RandomHorizontalFlip, p=1)]
)
def test_bounding_box_correctness(self, format, fn):
bounding_box = self._make_input(datapoints.BoundingBox)
bounding_box = self._make_input(datapoints.BoundingBox, format=format)

actual = fn(bounding_box)
expected = self._reference_horizontal_flip_bounding_box(bounding_box)
Expand Down Expand Up @@ -1025,12 +1025,10 @@ def test_kernel_bounding_box(self, param, value, format, dtype, device):

@pytest.mark.parametrize("mask_type", ["segmentation", "detection"])
def test_kernel_mask(self, mask_type):
check_kernel(
F.affine_mask, self._make_input(datapoints.Mask, mask_type=mask_type), **self._MINIMAL_AFFINE_KWARGS
)
self._check_kernel(F.affine_mask, self._make_input(datapoints.Mask, mask_type=mask_type))

def test_kernel_video(self):
check_kernel(F.affine_video, self._make_input(datapoints.Video), **self._MINIMAL_AFFINE_KWARGS)
self._check_kernel(F.affine_video, self._make_input(datapoints.Video))

@pytest.mark.parametrize(
("input_type", "kernel"),
Expand Down Expand Up @@ -1301,3 +1299,143 @@ def test_transform_negative_shear_error(self):
def test_transform_unknown_fill_error(self):
with pytest.raises(TypeError, match="Got inappropriate fill arg"):
transforms.RandomAffine(degrees=0, fill="fill")


class TestVerticalFlip:
def _make_input(self, input_type, *, dtype=None, device="cpu", spatial_size=(17, 11), **kwargs):
if input_type in {torch.Tensor, PIL.Image.Image, datapoints.Image}:
input = make_image(size=spatial_size, dtype=dtype or torch.uint8, device=device, **kwargs)
if input_type is torch.Tensor:
input = input.as_subclass(torch.Tensor)
elif input_type is PIL.Image.Image:
input = F.to_image_pil(input)
elif input_type is datapoints.BoundingBox:
kwargs.setdefault("format", datapoints.BoundingBoxFormat.XYXY)
input = make_bounding_box(
dtype=dtype or torch.float32,
device=device,
spatial_size=spatial_size,
**kwargs,
)
elif input_type is datapoints.Mask:
input = make_segmentation_mask(size=spatial_size, dtype=dtype or torch.uint8, device=device, **kwargs)
elif input_type is datapoints.Video:
input = make_video(size=spatial_size, dtype=dtype or torch.uint8, device=device, **kwargs)

return input

@pytest.mark.parametrize("dtype", [torch.float32, torch.uint8])
@pytest.mark.parametrize("device", cpu_and_cuda())
def test_kernel_image_tensor(self, dtype, device):
check_kernel(F.vertical_flip_image_tensor, self._make_input(torch.Tensor, dtype=dtype, device=device))

@pytest.mark.parametrize("format", list(datapoints.BoundingBoxFormat))
@pytest.mark.parametrize("dtype", [torch.float32, torch.int64])
@pytest.mark.parametrize("device", cpu_and_cuda())
def test_kernel_bounding_box(self, format, dtype, device):
bounding_box = self._make_input(datapoints.BoundingBox, dtype=dtype, device=device, format=format)
check_kernel(
F.vertical_flip_bounding_box,
bounding_box,
format=format,
spatial_size=bounding_box.spatial_size,
)

@pytest.mark.parametrize(
"dtype_and_make_mask", [(torch.uint8, make_segmentation_mask), (torch.bool, make_detection_mask)]
)
def test_kernel_mask(self, dtype_and_make_mask):
dtype, make_mask = dtype_and_make_mask
check_kernel(F.vertical_flip_mask, make_mask(dtype=dtype))

def test_kernel_video(self):
check_kernel(F.vertical_flip_video, self._make_input(datapoints.Video))

@pytest.mark.parametrize(
("input_type", "kernel"),
[
(torch.Tensor, F.vertical_flip_image_tensor),
(PIL.Image.Image, F.vertical_flip_image_pil),
(datapoints.Image, F.vertical_flip_image_tensor),
(datapoints.BoundingBox, F.vertical_flip_bounding_box),
(datapoints.Mask, F.vertical_flip_mask),
(datapoints.Video, F.vertical_flip_video),
],
)
def test_dispatcher(self, kernel, input_type):
check_dispatcher(F.vertical_flip, kernel, self._make_input(input_type))

@pytest.mark.parametrize(
("input_type", "kernel"),
[
(torch.Tensor, F.vertical_flip_image_tensor),
(PIL.Image.Image, F.vertical_flip_image_pil),
(datapoints.Image, F.vertical_flip_image_tensor),
(datapoints.BoundingBox, F.vertical_flip_bounding_box),
(datapoints.Mask, F.vertical_flip_mask),
(datapoints.Video, F.vertical_flip_video),
],
)
def test_dispatcher_signature(self, kernel, input_type):
check_dispatcher_signatures_match(F.vertical_flip, kernel=kernel, input_type=input_type)

@pytest.mark.parametrize(
"input_type",
[torch.Tensor, PIL.Image.Image, datapoints.Image, datapoints.BoundingBox, datapoints.Mask, datapoints.Video],
)
@pytest.mark.parametrize("device", cpu_and_cuda())
def test_transform(self, input_type, device):
input = self._make_input(input_type, device=device)

check_transform(transforms.RandomVerticalFlip, input, p=1)

@pytest.mark.parametrize("fn", [F.vertical_flip, transform_cls_to_functional(transforms.RandomVerticalFlip, p=1)])
def test_image_correctness(self, fn):
image = self._make_input(torch.Tensor, dtype=torch.uint8, device="cpu")

actual = fn(image)
expected = F.to_image_tensor(F.vertical_flip(F.to_image_pil(image)))

torch.testing.assert_close(actual, expected)

def _reference_vertical_flip_bounding_box(self, bounding_box):
affine_matrix = np.array(
[
[1, 0, 0],
[0, -1, bounding_box.spatial_size[0]],
],
dtype="float64" if bounding_box.dtype == torch.float64 else "float32",
)

expected_bboxes = reference_affine_bounding_box_helper(
bounding_box,
format=bounding_box.format,
spatial_size=bounding_box.spatial_size,
affine_matrix=affine_matrix,
)

return datapoints.BoundingBox.wrap_like(bounding_box, expected_bboxes)

@pytest.mark.parametrize("format", list(datapoints.BoundingBoxFormat))
@pytest.mark.parametrize("fn", [F.vertical_flip, transform_cls_to_functional(transforms.RandomVerticalFlip, p=1)])
def test_bounding_box_correctness(self, format, fn):
bounding_box = self._make_input(datapoints.BoundingBox, format=format)

actual = fn(bounding_box)
expected = self._reference_vertical_flip_bounding_box(bounding_box)

torch.testing.assert_close(actual, expected)

@pytest.mark.parametrize(
"input_type",
[torch.Tensor, PIL.Image.Image, datapoints.Image, datapoints.BoundingBox, datapoints.Mask, datapoints.Video],
)
@pytest.mark.parametrize("device", cpu_and_cuda())
def test_transform_noop(self, input_type, device):
input = self._make_input(input_type, device=device)

transform = transforms.RandomVerticalFlip(p=0)

output = transform(input)

assert_equal(output, input)
10 changes: 0 additions & 10 deletions test/transforms_v2_dispatcher_infos.py
Original file line number Diff line number Diff line change
Expand Up @@ -138,16 +138,6 @@ def fill_sequence_needs_broadcast(args_kwargs):


DISPATCHER_INFOS = [
DispatcherInfo(
F.vertical_flip,
kernels={
datapoints.Image: F.vertical_flip_image_tensor,
datapoints.Video: F.vertical_flip_video,
datapoints.BoundingBox: F.vertical_flip_bounding_box,
datapoints.Mask: F.vertical_flip_mask,
},
pil_kernel_info=PILKernelInfo(F.vertical_flip_image_pil, kernel_name="vertical_flip_image_pil"),
),
DispatcherInfo(
F.rotate,
kernels={
Expand Down
81 changes: 0 additions & 81 deletions test/transforms_v2_kernel_infos.py
Original file line number Diff line number Diff line change
Expand Up @@ -264,87 +264,6 @@ def reference_inputs_convert_format_bounding_box():
)


def sample_inputs_vertical_flip_image_tensor():
for image_loader in make_image_loaders(sizes=["random"], dtypes=[torch.float32]):
yield ArgsKwargs(image_loader)


def reference_inputs_vertical_flip_image_tensor():
for image_loader in make_image_loaders(extra_dims=[()], dtypes=[torch.uint8]):
yield ArgsKwargs(image_loader)


def sample_inputs_vertical_flip_bounding_box():
for bounding_box_loader in make_bounding_box_loaders(
formats=[datapoints.BoundingBoxFormat.XYXY], dtypes=[torch.float32]
):
yield ArgsKwargs(
bounding_box_loader, format=bounding_box_loader.format, spatial_size=bounding_box_loader.spatial_size
)


def sample_inputs_vertical_flip_mask():
for image_loader in make_mask_loaders(sizes=["random"], dtypes=[torch.uint8]):
yield ArgsKwargs(image_loader)


def sample_inputs_vertical_flip_video():
for video_loader in make_video_loaders(sizes=["random"], num_frames=["random"]):
yield ArgsKwargs(video_loader)


def reference_vertical_flip_bounding_box(bounding_box, *, format, spatial_size):
affine_matrix = np.array(
[
[1, 0, 0],
[0, -1, spatial_size[0]],
],
dtype="float64" if bounding_box.dtype == torch.float64 else "float32",
)

expected_bboxes = reference_affine_bounding_box_helper(
bounding_box, format=format, spatial_size=spatial_size, affine_matrix=affine_matrix
)

return expected_bboxes


def reference_inputs_vertical_flip_bounding_box():
for bounding_box_loader in make_bounding_box_loaders(extra_dims=[()]):
yield ArgsKwargs(
bounding_box_loader,
format=bounding_box_loader.format,
spatial_size=bounding_box_loader.spatial_size,
)


KERNEL_INFOS.extend(
[
KernelInfo(
F.vertical_flip_image_tensor,
kernel_name="vertical_flip_image_tensor",
sample_inputs_fn=sample_inputs_vertical_flip_image_tensor,
reference_fn=pil_reference_wrapper(F.vertical_flip_image_pil),
reference_inputs_fn=reference_inputs_vertical_flip_image_tensor,
float32_vs_uint8=True,
),
KernelInfo(
F.vertical_flip_bounding_box,
sample_inputs_fn=sample_inputs_vertical_flip_bounding_box,
reference_fn=reference_vertical_flip_bounding_box,
reference_inputs_fn=reference_inputs_vertical_flip_bounding_box,
),
KernelInfo(
F.vertical_flip_mask,
sample_inputs_fn=sample_inputs_vertical_flip_mask,
),
KernelInfo(
F.vertical_flip_video,
sample_inputs_fn=sample_inputs_vertical_flip_video,
),
]
)

_ROTATE_ANGLES = [-87, 15, 90]


Expand Down
3 changes: 2 additions & 1 deletion torchvision/transforms/v2/functional/_geometry.py
Original file line number Diff line number Diff line change
Expand Up @@ -93,7 +93,8 @@ def vertical_flip_image_tensor(image: torch.Tensor) -> torch.Tensor:
return image.flip(-2)


vertical_flip_image_pil = _FP.vflip
def vertical_flip_image_pil(image: PIL.Image) -> PIL.Image:
Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Same as #7703 (comment)

return _FP.vflip(image)


def vertical_flip_mask(mask: torch.Tensor) -> torch.Tensor:
Expand Down