Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

SDK/CLI: improve mask support in the auto-annotation functionality #8724

Merged
merged 6 commits into from
Nov 25, 2024
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,9 @@
# SPDX-License-Identifier: MIT

import math
from collections.abc import Iterator

import numpy as np
import PIL.Image
from skimage import measure
from torch import Tensor
Expand All @@ -15,6 +17,38 @@
from ._torchvision import TorchvisionFunction


def _is_positively_oriented(contour: np.ndarray) -> bool:
ys, xs = contour.T

# This is the shoelace formula, except we only need the sign of the result,
# so we compare instead of subtracting. Compared to the typical formula,
# the sign is inverted, because the Y axis points downwards.
return np.sum(xs * np.roll(ys, -1)) < np.sum(ys * np.roll(xs, -1))


def _generate_shapes(
context: cvataa.DetectionFunctionContext, box: Tensor, mask: Tensor, label: Tensor
) -> Iterator[models.LabeledShapeRequest]:
LEVEL = 0.5

if context.conv_mask_to_poly:
# Since we treat mask values of exactly LEVEL as true, we'd like them
# to also be considered high by find_contours. And for that, the level
# parameter must be slightly less than LEVEL.
contours = measure.find_contours(mask[0].detach().numpy(), level=math.nextafter(LEVEL, 0))

for contour in contours:
if len(contour) < 3 or _is_positively_oriented(contour):
continue

contour = measure.approximate_polygon(contour, tolerance=2.5)

yield cvataa.polygon(label.item(), contour[:, ::-1].ravel().tolist())

else:
yield cvataa.mask(label.item(), encode_mask(mask[0] >= LEVEL, box.tolist()))


class _TorchvisionInstanceSegmentationFunction(TorchvisionFunction):
def detect(
self, context: cvataa.DetectionFunctionContext, image: PIL.Image.Image
Expand All @@ -29,40 +63,8 @@ def detect(
result["boxes"], result["masks"], result["labels"], result["scores"]
)
if score >= conf_threshold
for shape in self._generate_shapes(context, box, mask, label)
for shape in _generate_shapes(context, box, mask, label)
]

def _generate_shapes(
self, context: cvataa.DetectionFunctionContext, box: Tensor, mask: Tensor, label: Tensor
) -> list[models.LabeledShapeRequest]:
LEVEL = 0.5

if context.conv_mask_to_poly:
# Since we treat mask values of exactly LEVEL as true, we'd like them
# to also be considered high by find_contours. And for that, the level
# parameter must be slightly less than LEVEL.
contours = measure.find_contours(
mask[0].detach().numpy(), level=math.nextafter(LEVEL, 0)
)
if not contours:
return []

contour = contours[0]
if len(contour) < 3:
return []

contour = measure.approximate_polygon(contour, tolerance=2.5)

return [
cvataa.polygon(
label.item(),
contour[:, ::-1].ravel().tolist(),
)
]
else:
return [
cvataa.mask(label.item(), encode_mask((mask[0] >= LEVEL).numpy(), box.tolist()))
]


create = _TorchvisionInstanceSegmentationFunction
8 changes: 7 additions & 1 deletion tests/python/sdk/test_auto_annotation.py
Original file line number Diff line number Diff line change
Expand Up @@ -673,13 +673,18 @@ def make_box(im, a1, a2):
return [im.shape[2] * a1, im.shape[1] * a1, im.shape[2] * a2, im.shape[1] * a2]

def make_mask(im, a1, a2):
# creates a rectangular mask
# creates a rectangular mask with a hole
mask = torch.full((1, im.shape[1], im.shape[2]), 0.49)
mask[
0,
math.ceil(im.shape[1] * a1) : math.floor(im.shape[1] * a2),
math.ceil(im.shape[2] * a1) : math.floor(im.shape[2] * a2),
] = 0.5
mask[
0,
math.ceil(im.shape[1] * a1) + 3 : math.floor(im.shape[1] * a2) - 3,
math.ceil(im.shape[2] * a1) + 3 : math.floor(im.shape[2] * a2) - 3,
] = 0.49
return mask

return [
Expand Down Expand Up @@ -827,6 +832,7 @@ def test_torchvision_instance_segmentation(self, monkeypatch: pytest.MonkeyPatch

expected_bitmap = torch.zeros((100, 100), dtype=torch.bool)
expected_bitmap[17:33, 17:33] = True
expected_bitmap[20:30, 20:30] = False

assert annotations.shapes[0].type.value == "mask"
assert annotations.shapes[0].points == encode_mask(expected_bitmap, [16, 16, 34, 34])
Expand Down
Loading