diff --git a/.github/workflows/full.yml b/.github/workflows/full.yml index e587e26aa1b8..e42380de5ead 100644 --- a/.github/workflows/full.yml +++ b/.github/workflows/full.yml @@ -156,7 +156,7 @@ jobs: - name: Install SDK run: | pip3 install -r ./tests/python/requirements.txt \ - -e './cvat-sdk[pytorch]' -e ./cvat-cli \ + -e './cvat-sdk[masks,pytorch]' -e ./cvat-cli \ --extra-index-url https://download.pytorch.org/whl/cpu - name: Running REST API and SDK tests diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index f4e3f11d1052..becca0218f94 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -166,7 +166,7 @@ jobs: - name: Install SDK run: | pip3 install -r ./tests/python/requirements.txt \ - -e './cvat-sdk[pytorch]' -e ./cvat-cli \ + -e './cvat-sdk[masks,pytorch]' -e ./cvat-cli \ --extra-index-url https://download.pytorch.org/whl/cpu - name: Run REST API and SDK tests diff --git a/changelog.d/20241120_143739_roman_aa_masks.md b/changelog.d/20241120_143739_roman_aa_masks.md new file mode 100644 index 000000000000..97422dfe6060 --- /dev/null +++ b/changelog.d/20241120_143739_roman_aa_masks.md @@ -0,0 +1,13 @@ +### Added + +- \[SDK\] Added new auto-annotation helpers (`mask`, `polygon`, `encode_mask`) + to support AA functions that return masks or polygons + () + +- \[SDK\] Added a new built-in auto-annotation function, + `torchvision_instance_segmentation` + () + +- \[SDK, CLI\] Added a new auto-annotation parameter, `conv_mask_to_poly` + (`--conv-mask-to-poly` in the CLI) + () diff --git a/cvat-cli/src/cvat_cli/_internal/commands.py b/cvat-cli/src/cvat_cli/_internal/commands.py index f49416c843e5..324d427a64b8 100644 --- a/cvat-cli/src/cvat_cli/_internal/commands.py +++ b/cvat-cli/src/cvat_cli/_internal/commands.py @@ -476,6 +476,12 @@ def configure_parser(self, parser: argparse.ArgumentParser) -> None: default=None, ) + parser.add_argument( + "--conv-mask-to-poly", + action="store_true", + help="Convert mask shapes to polygon shapes", + ) + def execute( self, client: Client, @@ -487,6 +493,7 @@ def execute( clear_existing: bool = False, allow_unmatched_labels: bool = False, conf_threshold: Optional[float], + conv_mask_to_poly: bool, ) -> None: if function_module is not None: function = importlib.import_module(function_module) @@ -512,4 +519,5 @@ def execute( clear_existing=clear_existing, allow_unmatched_labels=allow_unmatched_labels, conf_threshold=conf_threshold, + conv_mask_to_poly=conv_mask_to_poly, ) diff --git a/cvat-sdk/README.md b/cvat-sdk/README.md index fa68c0e5d40d..89702c02abd4 100644 --- a/cvat-sdk/README.md +++ b/cvat-sdk/README.md @@ -20,7 +20,14 @@ To install a prebuilt package, run the following command in the terminal: pip install cvat-sdk ``` -To use the PyTorch adapter, request the `pytorch` extra: +To use the `cvat_sdk.masks` module, request the `masks` extra: + +```bash +pip install "cvat-sdk[masks]" +``` + +To use the PyTorch adapter or the built-in PyTorch-based auto-annotation functions, +request the `pytorch` extra: ```bash pip install "cvat-sdk[pytorch]" diff --git a/cvat-sdk/cvat_sdk/auto_annotation/__init__.py b/cvat-sdk/cvat_sdk/auto_annotation/__init__.py index e5dbdf9fcc42..adbb6007e125 100644 --- a/cvat-sdk/cvat_sdk/auto_annotation/__init__.py +++ b/cvat-sdk/cvat_sdk/auto_annotation/__init__.py @@ -10,8 +10,27 @@ keypoint, keypoint_spec, label_spec, + mask, + polygon, rectangle, shape, skeleton, skeleton_label_spec, ) + +__all__ = [ + "annotate_task", + "BadFunctionError", + "DetectionFunction", + "DetectionFunctionContext", + "DetectionFunctionSpec", + "keypoint_spec", + "keypoint", + "label_spec", + "mask", + "polygon", + "rectangle", + "shape", + "skeleton_label_spec", + "skeleton", +] diff --git a/cvat-sdk/cvat_sdk/auto_annotation/driver.py b/cvat-sdk/cvat_sdk/auto_annotation/driver.py index 175b96ab29b2..5ffdb36f5bee 100644 --- a/cvat-sdk/cvat_sdk/auto_annotation/driver.py +++ b/cvat-sdk/cvat_sdk/auto_annotation/driver.py @@ -99,9 +99,11 @@ def __init__( ds_labels: Sequence[models.ILabel], *, allow_unmatched_labels: bool, + conv_mask_to_poly: bool, ) -> None: self._logger = logger self._allow_unmatched_labels = allow_unmatched_labels + self._conv_mask_to_poly = conv_mask_to_poly ds_labels_by_name = {ds_label.name: ds_label for ds_label in ds_labels} @@ -217,6 +219,11 @@ def validate_and_remap(self, shapes: list[models.LabeledShapeRequest], ds_frame: if getattr(shape, "elements", None): raise BadFunctionError("function output non-skeleton shape with elements") + if shape.type.value == "mask" and self._conv_mask_to_poly: + raise BadFunctionError( + "function output mask shape despite conv_mask_to_poly=True" + ) + shapes[:] = new_shapes @@ -224,6 +231,7 @@ def validate_and_remap(self, shapes: list[models.LabeledShapeRequest], ds_frame: class _DetectionFunctionContextImpl(DetectionFunctionContext): frame_name: str conf_threshold: Optional[float] = None + conv_mask_to_poly: bool = False def annotate_task( @@ -235,6 +243,7 @@ def annotate_task( clear_existing: bool = False, allow_unmatched_labels: bool = False, conf_threshold: Optional[float] = None, + conv_mask_to_poly: bool = False, ) -> None: """ Downloads data for the task with the given ID, applies the given function to it @@ -268,7 +277,11 @@ def annotate_task( function that refer to this label are ignored. Otherwise, BadFunctionError is raised. The conf_threshold parameter must be None or a number between 0 and 1. It will be passed - to the function as the conf_threshold attribute of the context object. + to the AA function as the conf_threshold attribute of the context object. + + The conv_mask_to_poly parameter will be passed to the AA function as the conv_mask_to_poly + attribute of the context object. If it's true, and the AA function returns any mask shapes, + BadFunctionError will be raised. """ if pbar is None: @@ -286,6 +299,7 @@ def annotate_task( function.spec.labels, dataset.labels, allow_unmatched_labels=allow_unmatched_labels, + conv_mask_to_poly=conv_mask_to_poly, ) shapes = [] @@ -294,7 +308,9 @@ def annotate_task( for sample in pbar.iter(dataset.samples): frame_shapes = function.detect( _DetectionFunctionContextImpl( - frame_name=sample.frame_name, conf_threshold=conf_threshold + frame_name=sample.frame_name, + conf_threshold=conf_threshold, + conv_mask_to_poly=conv_mask_to_poly, ), sample.media.load_image(), ) diff --git a/cvat-sdk/cvat_sdk/auto_annotation/functions/_torchvision.py b/cvat-sdk/cvat_sdk/auto_annotation/functions/_torchvision.py new file mode 100644 index 000000000000..9fa88e0a7c07 --- /dev/null +++ b/cvat-sdk/cvat_sdk/auto_annotation/functions/_torchvision.py @@ -0,0 +1,26 @@ +# Copyright (C) 2024 CVAT.ai Corporation +# +# SPDX-License-Identifier: MIT + +from functools import cached_property + +import torchvision.models + +import cvat_sdk.auto_annotation as cvataa + + +class TorchvisionFunction: + def __init__(self, model_name: str, weights_name: str = "DEFAULT", **kwargs) -> None: + weights_enum = torchvision.models.get_model_weights(model_name) + self._weights = weights_enum[weights_name] + self._transforms = self._weights.transforms() + self._model = torchvision.models.get_model(model_name, weights=self._weights, **kwargs) + self._model.eval() + + @cached_property + def spec(self) -> cvataa.DetectionFunctionSpec: + return cvataa.DetectionFunctionSpec( + labels=[ + cvataa.label_spec(cat, i) for i, cat in enumerate(self._weights.meta["categories"]) + ] + ) diff --git a/cvat-sdk/cvat_sdk/auto_annotation/functions/torchvision_detection.py b/cvat-sdk/cvat_sdk/auto_annotation/functions/torchvision_detection.py index 423db05adbcb..b16e4d8874ae 100644 --- a/cvat-sdk/cvat_sdk/auto_annotation/functions/torchvision_detection.py +++ b/cvat-sdk/cvat_sdk/auto_annotation/functions/torchvision_detection.py @@ -2,31 +2,15 @@ # # SPDX-License-Identifier: MIT -from functools import cached_property - import PIL.Image -import torchvision.models import cvat_sdk.auto_annotation as cvataa import cvat_sdk.models as models +from ._torchvision import TorchvisionFunction -class _TorchvisionDetectionFunction: - def __init__(self, model_name: str, weights_name: str = "DEFAULT", **kwargs) -> None: - weights_enum = torchvision.models.get_model_weights(model_name) - self._weights = weights_enum[weights_name] - self._transforms = self._weights.transforms() - self._model = torchvision.models.get_model(model_name, weights=self._weights, **kwargs) - self._model.eval() - - @cached_property - def spec(self) -> cvataa.DetectionFunctionSpec: - return cvataa.DetectionFunctionSpec( - labels=[ - cvataa.label_spec(cat, i) for i, cat in enumerate(self._weights.meta["categories"]) - ] - ) +class _TorchvisionDetectionFunction(TorchvisionFunction): def detect( self, context: cvataa.DetectionFunctionContext, image: PIL.Image.Image ) -> list[models.LabeledShapeRequest]: diff --git a/cvat-sdk/cvat_sdk/auto_annotation/functions/torchvision_instance_segmentation.py b/cvat-sdk/cvat_sdk/auto_annotation/functions/torchvision_instance_segmentation.py new file mode 100644 index 000000000000..6aa891811f5b --- /dev/null +++ b/cvat-sdk/cvat_sdk/auto_annotation/functions/torchvision_instance_segmentation.py @@ -0,0 +1,70 @@ +# Copyright (C) 2024 CVAT.ai Corporation +# +# SPDX-License-Identifier: MIT + +import math +from collections.abc import Iterator + +import numpy as np +import PIL.Image +from skimage import measure +from torch import Tensor + +import cvat_sdk.auto_annotation as cvataa +import cvat_sdk.models as models +from cvat_sdk.masks import encode_mask + +from ._torchvision import TorchvisionFunction + + +def _is_positively_oriented(contour: np.ndarray) -> bool: + ys, xs = contour.T + + # This is the shoelace formula, except we only need the sign of the result, + # so we compare instead of subtracting. Compared to the typical formula, + # the sign is inverted, because the Y axis points downwards. + return np.sum(xs * np.roll(ys, -1)) < np.sum(ys * np.roll(xs, -1)) + + +def _generate_shapes( + context: cvataa.DetectionFunctionContext, box: Tensor, mask: Tensor, label: Tensor +) -> Iterator[models.LabeledShapeRequest]: + LEVEL = 0.5 + + if context.conv_mask_to_poly: + # Since we treat mask values of exactly LEVEL as true, we'd like them + # to also be considered high by find_contours. And for that, the level + # parameter must be slightly less than LEVEL. + contours = measure.find_contours(mask[0].detach().numpy(), level=math.nextafter(LEVEL, 0)) + + for contour in contours: + if len(contour) < 3 or _is_positively_oriented(contour): + continue + + contour = measure.approximate_polygon(contour, tolerance=2.5) + + yield cvataa.polygon(label.item(), contour[:, ::-1].ravel().tolist()) + + else: + yield cvataa.mask(label.item(), encode_mask(mask[0] >= LEVEL, box.tolist())) + + +class _TorchvisionInstanceSegmentationFunction(TorchvisionFunction): + def detect( + self, context: cvataa.DetectionFunctionContext, image: PIL.Image.Image + ) -> list[models.LabeledShapeRequest]: + conf_threshold = context.conf_threshold or 0 + results = self._model([self._transforms(image)]) + + return [ + shape + for result in results + for box, mask, label, score in zip( + result["boxes"], result["masks"], result["labels"], result["scores"] + ) + if score >= conf_threshold + for shape in _generate_shapes(context, box, mask, label) + ] + + +create = _TorchvisionInstanceSegmentationFunction diff --git a/cvat-sdk/cvat_sdk/auto_annotation/functions/torchvision_keypoint_detection.py b/cvat-sdk/cvat_sdk/auto_annotation/functions/torchvision_keypoint_detection.py index 0756b0b1738c..4d2250d61c35 100644 --- a/cvat-sdk/cvat_sdk/auto_annotation/functions/torchvision_keypoint_detection.py +++ b/cvat-sdk/cvat_sdk/auto_annotation/functions/torchvision_keypoint_detection.py @@ -5,20 +5,14 @@ from functools import cached_property import PIL.Image -import torchvision.models import cvat_sdk.auto_annotation as cvataa import cvat_sdk.models as models +from ._torchvision import TorchvisionFunction -class _TorchvisionKeypointDetectionFunction: - def __init__(self, model_name: str, weights_name: str = "DEFAULT", **kwargs) -> None: - weights_enum = torchvision.models.get_model_weights(model_name) - self._weights = weights_enum[weights_name] - self._transforms = self._weights.transforms() - self._model = torchvision.models.get_model(model_name, weights=self._weights, **kwargs) - self._model.eval() +class _TorchvisionKeypointDetectionFunction(TorchvisionFunction): @cached_property def spec(self) -> cvataa.DetectionFunctionSpec: return cvataa.DetectionFunctionSpec( diff --git a/cvat-sdk/cvat_sdk/auto_annotation/interface.py b/cvat-sdk/cvat_sdk/auto_annotation/interface.py index 47e944a1de84..f95cb50b4f2d 100644 --- a/cvat-sdk/cvat_sdk/auto_annotation/interface.py +++ b/cvat-sdk/cvat_sdk/auto_annotation/interface.py @@ -68,6 +68,16 @@ def conf_threshold(self) -> Optional[float]: If the function is not able to estimate confidence levels, it can ignore this value. """ + @property + @abc.abstractmethod + def conv_mask_to_poly(self) -> bool: + """ + If this is true, the function must convert any mask shapes to polygon shapes + before returning them. + + If the function does not return any mask shapes, then it can ignore this value. + """ + class DetectionFunction(Protocol): """ @@ -168,6 +178,21 @@ def rectangle(label_id: int, points: Sequence[float], **kwargs) -> models.Labele return shape(label_id, type="rectangle", points=points, **kwargs) +def polygon(label_id: int, points: Sequence[float], **kwargs) -> models.LabeledShapeRequest: + """Helper factory function for LabeledShapeRequest with frame=0 and type="polygon".""" + return shape(label_id, type="polygon", points=points, **kwargs) + + +def mask(label_id: int, points: Sequence[float], **kwargs) -> models.LabeledShapeRequest: + """ + Helper factory function for LabeledShapeRequest with frame=0 and type="mask". + + It's recommended to use the cvat.masks.encode_mask function to build the + points argument. + """ + return shape(label_id, type="mask", points=points, **kwargs) + + def skeleton( label_id: int, elements: Sequence[models.SubLabeledShapeRequest], **kwargs ) -> models.LabeledShapeRequest: diff --git a/cvat-sdk/cvat_sdk/masks.py b/cvat-sdk/cvat_sdk/masks.py new file mode 100644 index 000000000000..f623aec7d043 --- /dev/null +++ b/cvat-sdk/cvat_sdk/masks.py @@ -0,0 +1,44 @@ +# Copyright (C) 2024 CVAT.ai Corporation +# +# SPDX-License-Identifier: MIT + +import math +from collections.abc import Sequence + +import numpy as np +from numpy.typing import ArrayLike + + +def encode_mask(bitmap: ArrayLike, /, bbox: Sequence[float]) -> list[float]: + """ + Encodes an image mask into an array of numbers suitable for the "points" + attribute of a LabeledShapeRequest object of type "mask". + + bitmap must be a boolean array of shape (H, W), where H is the height and + W is the width of the image that the mask applies to. + + bbox must have the form [x1, y1, x2, y2], where (0, 0) <= (x1, y1) < (x2, y2) <= (W, H). + The mask will be limited to points between (x1, y1) and (x2, y2). + """ + + bitmap = np.asanyarray(bitmap) + if bitmap.ndim != 2: + raise ValueError("bitmap must have 2 dimensions") + if bitmap.dtype != np.bool_: + raise ValueError("bitmap must have boolean items") + + x1, y1 = map(math.floor, bbox[0:2]) + x2, y2 = map(math.ceil, bbox[2:4]) + + if not (0 <= x1 < x2 <= bitmap.shape[1] and 0 <= y1 < y2 <= bitmap.shape[0]): + raise ValueError("bbox has invalid coordinates") + + flat = bitmap[y1:y2, x1:x2].ravel() + + (run_indices,) = np.diff(flat, prepend=[not flat[0]], append=[not flat[-1]]).nonzero() + if flat[0]: + run_lengths = np.diff(run_indices, prepend=[0]) + else: + run_lengths = np.diff(run_indices) + + return run_lengths.tolist() + [x1, y1, x2 - 1, y2 - 1] diff --git a/cvat-sdk/gen/templates/openapi-generator/setup.mustache b/cvat-sdk/gen/templates/openapi-generator/setup.mustache index eb89f5d20554..e0379cabd06e 100644 --- a/cvat-sdk/gen/templates/openapi-generator/setup.mustache +++ b/cvat-sdk/gen/templates/openapi-generator/setup.mustache @@ -77,7 +77,8 @@ setup( python_requires="{{{generatorLanguageVersion}}}", install_requires=BASE_REQUIREMENTS, extras_require={ - "pytorch": ['torch', 'torchvision'], + "masks": ["numpy>=2"], + "pytorch": ['torch', 'torchvision', 'scikit-image>=0.24', 'cvat_sdk[masks]'], }, package_dir={"": "."}, packages=find_packages(include=["cvat_sdk*"]), diff --git a/site/content/en/docs/api_sdk/sdk/_index.md b/site/content/en/docs/api_sdk/sdk/_index.md index e9683583ab0e..e855dadd979f 100644 --- a/site/content/en/docs/api_sdk/sdk/_index.md +++ b/site/content/en/docs/api_sdk/sdk/_index.md @@ -42,7 +42,14 @@ To install an [official release of CVAT SDK](https://pypi.org/project/cvat-sdk/) pip install cvat-sdk ``` -To use the PyTorch adapter, request the `pytorch` extra: +To use the `cvat_sdk.masks` module, request the `masks` extra: + +```bash +pip install "cvat-sdk[masks]" +``` + +To use the PyTorch adapter or the built-in PyTorch-based auto-annotation functions, +request the `pytorch` extra: ```bash pip install "cvat-sdk[pytorch]" diff --git a/site/content/en/docs/api_sdk/sdk/auto-annotation.md b/site/content/en/docs/api_sdk/sdk/auto-annotation.md index f97759efd175..d8401955da7f 100644 --- a/site/content/en/docs/api_sdk/sdk/auto-annotation.md +++ b/site/content/en/docs/api_sdk/sdk/auto-annotation.md @@ -181,10 +181,23 @@ The following helpers are available for use in `detect`: | Name | Model type | Fixed attributes | |-------------|--------------------------|-------------------------------| | `shape` | `LabeledShapeRequest` | `frame=0` | +| `mask` | `LabeledShapeRequest` | `frame=0`, `type="mask"` | +| `polygon` | `LabeledShapeRequest` | `frame=0`, `type="polygon"` | | `rectangle` | `LabeledShapeRequest` | `frame=0`, `type="rectangle"` | | `skeleton` | `LabeledShapeRequest` | `frame=0`, `type="skeleton"` | | `keypoint` | `SubLabeledShapeRequest` | `frame=0`, `type="points"` | +For `mask`, it is recommended to create the points list using +the `cvat.masks.encode_mask` function, which will convert a bitmap into a +list in the format that CVAT expects. For example: + +```python +cvataa.mask(my_label, encode_mask( + my_mask, # boolean 2D array, same size as the input image + [x1, y1, x2, y2], # top left and bottom right coordinates of the mask +)) +``` + ## Auto-annotation driver The `annotate_task` function uses an AA function to annotate a CVAT task. @@ -257,10 +270,18 @@ The `create` function accepts the following parameters: It also accepts arbitrary additional parameters, which are passed directly to the model constructor. +### `cvat_sdk.auto_annotation.functions.torchvision_instance_segmentation` + +This AA function is analogous to `torchvision_detection`, +except it uses torchvision's instance segmentation models and produces mask +or polygon annotations (depending on the value of `conv_mask_to_poly`). + +Refer to that function's description for usage instructions and parameter information. + ### `cvat_sdk.auto_annotation.functions.torchvision_keypoint_detection` This AA function is analogous to `torchvision_detection`, except it uses torchvision's keypoint detection models and produces skeleton annotations. Keypoints which the model marks as invisible will be marked as occluded in CVAT. -Refer to the previous section for usage instructions and parameter information. +Refer to that function's description for usage instructions and parameter information. diff --git a/tests/python/cli/cmtp_function.py b/tests/python/cli/cmtp_function.py new file mode 100644 index 000000000000..2ae5cb26f663 --- /dev/null +++ b/tests/python/cli/cmtp_function.py @@ -0,0 +1,22 @@ +# Copyright (C) 2024 CVAT.ai Corporation +# +# SPDX-License-Identifier: MIT + +import cvat_sdk.auto_annotation as cvataa +import cvat_sdk.models as models +import PIL.Image + +spec = cvataa.DetectionFunctionSpec( + labels=[ + cvataa.label_spec("car", 0), + ], +) + + +def detect( + context: cvataa.DetectionFunctionContext, image: PIL.Image.Image +) -> list[models.LabeledShapeRequest]: + if context.conv_mask_to_poly: + return [cvataa.polygon(0, [0, 0, 0, 1, 1, 1])] + else: + return [cvataa.mask(0, [1, 0, 0, 0, 0])] diff --git a/tests/python/cli/test_cli.py b/tests/python/cli/test_cli.py index a039fd3744bc..f57775ca67ab 100644 --- a/tests/python/cli/test_cli.py +++ b/tests/python/cli/test_cli.py @@ -361,3 +361,25 @@ def test_auto_annotate_with_threshold(self, fxt_new_task: Task): annotations = fxt_new_task.get_annotations() assert annotations.shapes[0].points[0] == 0.75 + + def test_auto_annotate_with_cmtp(self, fxt_new_task: Task): + self.run_cli( + "auto-annotate", + str(fxt_new_task.id), + f"--function-module={__package__}.cmtp_function", + "--clear-existing", + ) + + annotations = fxt_new_task.get_annotations() + assert annotations.shapes[0].type.value == "mask" + + self.run_cli( + "auto-annotate", + str(fxt_new_task.id), + f"--function-module={__package__}.cmtp_function", + "--clear-existing", + "--conv-mask-to-poly", + ) + + annotations = fxt_new_task.get_annotations() + assert annotations.shapes[0].type.value == "polygon" diff --git a/tests/python/requirements.txt b/tests/python/requirements.txt index 6ef44c0f5edb..5dfad3d6f7fb 100644 --- a/tests/python/requirements.txt +++ b/tests/python/requirements.txt @@ -4,9 +4,9 @@ pytest-cases==3.6.13 pytest-timeout==2.1.0 pytest-cov==4.1.0 requests==2.32.2 -deepdiff==5.6.0 +deepdiff==7.0.1 boto3==1.17.61 Pillow==10.3.0 python-dateutil==2.8.2 pyyaml==6.0.0 -numpy==1.22.0 \ No newline at end of file +numpy==2.0.0 diff --git a/tests/python/sdk/test_auto_annotation.py b/tests/python/sdk/test_auto_annotation.py index 6fa96a5843f4..ff7302c1d9c5 100644 --- a/tests/python/sdk/test_auto_annotation.py +++ b/tests/python/sdk/test_auto_annotation.py @@ -3,6 +3,7 @@ # SPDX-License-Identifier: MIT import io +import math from logging import Logger from pathlib import Path from types import SimpleNamespace as namespace @@ -307,6 +308,39 @@ def detect( conf_threshold=bad_threshold, ) + def test_conv_mask_to_poly(self): + spec = cvataa.DetectionFunctionSpec( + labels=[ + cvataa.label_spec("car", 123), + ], + ) + + received_cmtp = None + + def detect(context, image: PIL.Image.Image) -> list[models.LabeledShapeRequest]: + nonlocal received_cmtp + received_cmtp = context.conv_mask_to_poly + return [cvataa.mask(123, [1, 0, 0, 0, 0])] + + cvataa.annotate_task( + self.client, + self.task.id, + namespace(spec=spec, detect=detect), + conv_mask_to_poly=False, + ) + + assert received_cmtp is False + + with pytest.raises(cvataa.BadFunctionError, match=".*conv_mask_to_poly.*"): + cvataa.annotate_task( + self.client, + self.task.id, + namespace(spec=spec, detect=detect), + conv_mask_to_poly=True, + ) + + assert received_cmtp is True + def _test_bad_function_spec(self, spec: cvataa.DetectionFunctionSpec, exc_match: str) -> None: def detect(context, image): assert False @@ -626,6 +660,60 @@ def fake_get_detection_model(name: str, weights, test_param): return FakeTorchvisionDetector(label_id=car_label_id) + class FakeTorchvisionInstanceSegmenter(nn.Module): + def __init__(self, label_id: int) -> None: + super().__init__() + self._label_id = label_id + + def forward(self, images: list[torch.Tensor]) -> list[dict]: + assert isinstance(images, list) + assert all(isinstance(t, torch.Tensor) for t in images) + + def make_box(im, a1, a2): + return [im.shape[2] * a1, im.shape[1] * a1, im.shape[2] * a2, im.shape[1] * a2] + + def make_mask(im, a1, a2): + # creates a rectangular mask with a hole + mask = torch.full((1, im.shape[1], im.shape[2]), 0.49) + mask[ + 0, + math.ceil(im.shape[1] * a1) : math.floor(im.shape[1] * a2), + math.ceil(im.shape[2] * a1) : math.floor(im.shape[2] * a2), + ] = 0.5 + mask[ + 0, + math.ceil(im.shape[1] * a1) + 3 : math.floor(im.shape[1] * a2) - 3, + math.ceil(im.shape[2] * a1) + 3 : math.floor(im.shape[2] * a2) - 3, + ] = 0.49 + return mask + + return [ + { + "labels": torch.tensor([self._label_id, self._label_id]), + "boxes": torch.tensor( + [ + make_box(im, 1 / 6, 1 / 3), + make_box(im, 2 / 3, 5 / 6), + ] + ), + "masks": torch.stack( + [ + make_mask(im, 1 / 6, 1 / 3), + make_mask(im, 2 / 3, 5 / 6), + ] + ), + "scores": torch.tensor([0.75, 0.74]), + } + for im in images + ] + + def fake_get_instance_segmentation_model(name: str, weights, test_param): + assert test_param == "expected_value" + + car_label_id = weights.meta["categories"].index("car") + + return FakeTorchvisionInstanceSegmenter(label_id=car_label_id) + class FakeTorchvisionKeypointDetector(nn.Module): def __init__(self, label_id: int, keypoint_names: list[str]) -> None: super().__init__() @@ -723,6 +811,54 @@ def test_torchvision_detection(self, monkeypatch: pytest.MonkeyPatch): assert annotations.shapes[0].type.value == "rectangle" assert annotations.shapes[0].points == [1, 2, 3, 4] + def test_torchvision_instance_segmentation(self, monkeypatch: pytest.MonkeyPatch): + monkeypatch.setattr(torchvision_models, "get_model", fake_get_instance_segmentation_model) + + import cvat_sdk.auto_annotation.functions.torchvision_instance_segmentation as tis + from cvat_sdk.masks import encode_mask + + cvataa.annotate_task( + self.client, + self.task.id, + tis.create("maskrcnn_resnet50_fpn_v2", "COCO_V1", test_param="expected_value"), + allow_unmatched_labels=True, + conf_threshold=0.75, + ) + + annotations = self.task.get_annotations() + + assert len(annotations.shapes) == 1 + assert self.task_labels_by_id[annotations.shapes[0].label_id].name == "car" + + expected_bitmap = torch.zeros((100, 100), dtype=torch.bool) + expected_bitmap[17:33, 17:33] = True + expected_bitmap[20:30, 20:30] = False + + assert annotations.shapes[0].type.value == "mask" + assert annotations.shapes[0].points == encode_mask(expected_bitmap, [16, 16, 34, 34]) + + cvataa.annotate_task( + self.client, + self.task.id, + tis.create("maskrcnn_resnet50_fpn_v2", "COCO_V1", test_param="expected_value"), + allow_unmatched_labels=True, + conf_threshold=0.75, + conv_mask_to_poly=True, + clear_existing=True, + ) + + annotations = self.task.get_annotations() + + assert len(annotations.shapes) == 1 + assert self.task_labels_by_id[annotations.shapes[0].label_id].name == "car" + assert annotations.shapes[0].type.value == "polygon" + + # We shouldn't rely on the exact result of polygon conversion, + # since it depends on a 3rd-party library. Instead, we'll just + # check that all points are within the expected area. + for x, y in zip(*[iter(annotations.shapes[0].points)] * 2): + assert expected_bitmap[round(y), round(x)] + def test_torchvision_keypoint_detection(self, monkeypatch: pytest.MonkeyPatch): monkeypatch.setattr(torchvision_models, "get_model", fake_get_keypoint_detection_model) diff --git a/tests/python/sdk/test_masks.py b/tests/python/sdk/test_masks.py new file mode 100644 index 000000000000..46e8b9f214cc --- /dev/null +++ b/tests/python/sdk/test_masks.py @@ -0,0 +1,71 @@ +# Copyright (C) 2024 CVAT.ai Corporation +# +# SPDX-License-Identifier: MIT + +import pytest + +try: + import numpy as np + from cvat_sdk.masks import encode_mask + +except ModuleNotFoundError as e: + if e.name.split(".")[0] != "numpy": + raise + + encode_mask = None + + +@pytest.mark.skipif(encode_mask is None, reason="NumPy is not installed") +class TestMasks: + def test_encode_mask(self): + bitmap = np.array( + [ + np.fromstring("0 0 1 1 1 0", sep=" "), + np.fromstring("0 1 1 0 0 0", sep=" "), + ], + dtype=np.bool_, + ) + bbox = [2.9, 0.9, 4.1, 1.1] # will get rounded to [2, 0, 5, 2] + + # There's slightly different logic for when the cropped mask starts with + # 0 and 1, so test both. + # This one starts with 1: + # 111 + # 100 + + assert encode_mask(bitmap, bbox) == [0, 4, 2, 2, 0, 4, 1] + + bbox = [1, 0, 5, 2] + + # This one starts with 0: + # 0111 + # 1100 + + assert encode_mask(bitmap, bbox) == [1, 5, 2, 1, 0, 4, 1] + + # Edge case: full image + bbox = [0, 0, 6, 2] + assert encode_mask(bitmap, bbox) == [2, 3, 2, 2, 3, 0, 0, 5, 1] + + def test_encode_mask_invalid_dim(self): + with pytest.raises(ValueError, match="bitmap must have 2 dimensions"): + encode_mask([True], [0, 0, 1, 1]) + + def test_encode_mask_invalid_dtype(self): + with pytest.raises(ValueError, match="bitmap must have boolean items"): + encode_mask([[1]], [0, 0, 1, 1]) + + @pytest.mark.parametrize( + "bbox", + [ + [-0.1, 0, 1, 1], + [0, -0.1, 1, 1], + [0, 0, 1.1, 1], + [0, 0, 1, 1.1], + [1, 0, 0, 1], + [0, 1, 1, 0], + ], + ) + def test_encode_mask_invalid_bbox(self, bbox): + with pytest.raises(ValueError, match="bbox has invalid coordinates"): + encode_mask([[True]], bbox)