cvat-ai · SpecLad · Nov 25, 2024 · Nov 14, 2024 · Nov 20, 2024 · Nov 20, 2024
@@ -3,7 +3,9 @@
 # SPDX-License-Identifier: MIT
 
 import math
+from collections.abc import Iterator
 
+import numpy as np
 import PIL.Image
 from skimage import measure
 from torch import Tensor
@@ -15,6 +17,38 @@
 from ._torchvision import TorchvisionFunction
 
 
+def _is_positively_oriented(contour: np.ndarray) -> bool:
+    ys, xs = contour.T
+
+    # This is the shoelace formula, except we only need the sign of the result,
+    # so we compare instead of subtracting. Compared to the typical formula,
+    # the sign is inverted, because the Y axis points downwards.
+    return np.sum(xs * np.roll(ys, -1)) < np.sum(ys * np.roll(xs, -1))
+
+
+def _generate_shapes(
+    context: cvataa.DetectionFunctionContext, box: Tensor, mask: Tensor, label: Tensor
+) -> Iterator[models.LabeledShapeRequest]:
+    LEVEL = 0.5
+
+    if context.conv_mask_to_poly:
+        # Since we treat mask values of exactly LEVEL as true, we'd like them
+        # to also be considered high by find_contours. And for that, the level
+        # parameter must be slightly less than LEVEL.
+        contours = measure.find_contours(mask[0].detach().numpy(), level=math.nextafter(LEVEL, 0))
+
+        for contour in contours:
+            if len(contour) < 3 or _is_positively_oriented(contour):
+                continue
+
+            contour = measure.approximate_polygon(contour, tolerance=2.5)
+
+            yield cvataa.polygon(label.item(), contour[:, ::-1].ravel().tolist())
+
+    else:
+        yield cvataa.mask(label.item(), encode_mask(mask[0] >= LEVEL, box.tolist()))
+
+
 class _TorchvisionInstanceSegmentationFunction(TorchvisionFunction):
     def detect(
         self, context: cvataa.DetectionFunctionContext, image: PIL.Image.Image
@@ -29,40 +63,8 @@ def detect(
                 result["boxes"], result["masks"], result["labels"], result["scores"]
             )
             if score >= conf_threshold
-            for shape in self._generate_shapes(context, box, mask, label)
+            for shape in _generate_shapes(context, box, mask, label)
         ]
 
-    def _generate_shapes(
-        self, context: cvataa.DetectionFunctionContext, box: Tensor, mask: Tensor, label: Tensor
-    ) -> list[models.LabeledShapeRequest]:
-        LEVEL = 0.5
-
-        if context.conv_mask_to_poly:
-            # Since we treat mask values of exactly LEVEL as true, we'd like them
-            # to also be considered high by find_contours. And for that, the level
-            # parameter must be slightly less than LEVEL.
-            contours = measure.find_contours(
-                mask[0].detach().numpy(), level=math.nextafter(LEVEL, 0)
-            )
-            if not contours:
-                return []
-
-            contour = contours[0]
-            if len(contour) < 3:
-                return []
-
-            contour = measure.approximate_polygon(contour, tolerance=2.5)
-
-            return [
-                cvataa.polygon(
-                    label.item(),
-                    contour[:, ::-1].ravel().tolist(),
-                )
-            ]
-        else:
-            return [
-                cvataa.mask(label.item(), encode_mask((mask[0] >= LEVEL).numpy(), box.tolist()))
-            ]
-
 
 create = _TorchvisionInstanceSegmentationFunction
@@ -673,13 +673,18 @@ def make_box(im, a1, a2):
                 return [im.shape[2] * a1, im.shape[1] * a1, im.shape[2] * a2, im.shape[1] * a2]
 
             def make_mask(im, a1, a2):
-                # creates a rectangular mask
+                # creates a rectangular mask with a hole
                 mask = torch.full((1, im.shape[1], im.shape[2]), 0.49)
                 mask[
                     0,
                     math.ceil(im.shape[1] * a1) : math.floor(im.shape[1] * a2),
                     math.ceil(im.shape[2] * a1) : math.floor(im.shape[2] * a2),
                 ] = 0.5
+                mask[
+                    0,
+                    math.ceil(im.shape[1] * a1) + 3 : math.floor(im.shape[1] * a2) - 3,
+                    math.ceil(im.shape[2] * a1) + 3 : math.floor(im.shape[2] * a2) - 3,
+                ] = 0.49
                 return mask
 
             return [
@@ -827,6 +832,7 @@ def test_torchvision_instance_segmentation(self, monkeypatch: pytest.MonkeyPatch
 
         expected_bitmap = torch.zeros((100, 100), dtype=torch.bool)
         expected_bitmap[17:33, 17:33] = True
+        expected_bitmap[20:30, 20:30] = False
 
         assert annotations.shapes[0].type.value == "mask"
         assert annotations.shapes[0].points == encode_mask(expected_bitmap, [16, 16, 34, 34])