Merge pull request #115 from PUTvision/fix_114

Fix 114
PUTvision · Sep 28, 2023 · fbc99f0 · fbc99f0
2 parents 4747a84 + b6a7cf0
commit fbc99f0
Show file tree

Hide file tree

Showing 7 changed files with 220 additions and 41 deletions.
diff --git a/docs/source/creators/creators_description_classes.rst b/docs/source/creators/creators_description_classes.rst
@@ -43,7 +43,7 @@ Detection models allow to solve problem of objects detection, that is finding an
 Example application is detection of oil and water tanks on satellite images.
 
 The detection model output is list of bounding boxes, with assigned class and confidence value. This information is not really standardized between different model architectures.
-Currently plugin supports :code:`YOLOv5` and :code:`YOLOv7` output types.
+Currently plugin supports :code:`YOLOv5`, :code:`YOLOv7` and :code:`ULTRALYTICS` output types.
 
 For each object class, a separate vector layer can be created, with information saved as rectangle polygons (so the output can be potentially easily exported to a text).
 
@@ -65,6 +65,14 @@ Usually, only one output map (class) is used, as the model usually tries to solv
 
 Output report contains statistics for each class, that is average, min, max and standard deviation of values.
 
+=====================
+SuperResolution Model
+=====================
+SuperResolution models allow to solve problem of increasing the resolution of the image. The model takes a low resolution image as input and outputs a high resolution image.
+
+Example application is increasing the resolution of satellite images.
+
+The superresolution model output is also an image, with same dimension as the input tile, but with higher resolution (GDS).
 
 ================
 Extending Models

diff --git a/docs/source/main/model_zoo/MODEL_ZOO.md b/docs/source/main/model_zoo/MODEL_ZOO.md
@@ -28,6 +28,7 @@ The [Model ZOO](https://chmura.put.poznan.pl/s/2pJk4izRurzQwu3) is a collection
 | [Airbus Planes Detection](https://chmura.put.poznan.pl/s/bBIJ5FDPgyQvJ49)      | 256        | 70    | YOLOv7 tiny model for object detection on satellite images. Based on the [Airbus Aircraft Detection dataset](https://www.kaggle.com/datasets/airbusgeo/airbus-aircrafts-sample-dataset).      | [Image](https://chmura.put.poznan.pl/s/VfLmcWhvWf0UJfI) |
 | [Airbus Oil Storage Detection](https://chmura.put.poznan.pl/s/gMundpKsYUC7sNb) | 512        | 150   | YOLOv5-m model for object detection on satellite images. Based on the [Airbus Oil Storage Detection dataset](https://www.kaggle.com/datasets/airbusgeo/airbus-oil-storage-detection-dataset). | [Image](https://chmura.put.poznan.pl/s/T3pwaKlbFDBB2C3) |
 | [Aerial Cars Detection](https://chmura.put.poznan.pl/s/vgOeUN4H4tGsrGm)        | 640        | 10    | YOLOv7-m model for cars detection on aerial images. Based on the [ITCVD](https://arxiv.org/pdf/1801.07339.pdf).                                                                               | [Image](https://chmura.put.poznan.pl/s/cPzw1mkXlprSUIJ) |
+| [UAVVaste Instance Segmentation](https://chmura.put.poznan.pl/s/v99rDlSPbyNpOCH)        | 640        | 0.5    | YOLOv8-L Instance Segmentation model for litter detection on high-quality UAV images. Based on the [UAVVaste dataset](https://github.com/PUTvision/UAVVaste).                                                                               | [Image](https://chmura.put.poznan.pl/s/KFQTlS2qtVnaG0q) |
 
 ## Super Resolution Models
 | Model                                                                          | Input size | CM/PX | Scale Factor |Description                                                                                                                                                                                   | Example image                                           |

diff --git a/src/deepness/common/processing_parameters/detection_parameters.py b/src/deepness/common/processing_parameters/detection_parameters.py
@@ -21,6 +21,7 @@ class DetectorType(enum.Enum):
     YOLO_v5_v7_DEFAULT = 'YOLO_v5_or_v7_default'
     YOLO_v6 = 'YOLO_v6'
     YOLO_ULTRALYTICS = 'YOLO_Ultralytics'
+    YOLO_ULTRALYTICS_SEGMENTATION = 'YOLO_Ultralytics_segmentation'
 
     def get_parameters(self):
         if self == DetectorType.YOLO_v5_v7_DEFAULT:
@@ -29,7 +30,7 @@ def get_parameters(self):
             return DetectorTypeParameters(
                 ignore_objectness_probability=True,
             )
-        elif self == DetectorType.YOLO_ULTRALYTICS:
+        elif self == DetectorType.YOLO_ULTRALYTICS or self == DetectorType.YOLO_ULTRALYTICS_SEGMENTATION:
             return DetectorTypeParameters(
                 has_inverted_output_shape=True,
                 skipped_objectness_probability=True,

diff --git a/src/deepness/processing/map_processor/map_processor_detection.py b/src/deepness/processing/map_processor/map_processor_detection.py
@@ -1,17 +1,18 @@
 """ This file implements map processing for detection model """
 
+from itertools import count
 from typing import List
 
+import cv2
 import numpy as np
-from qgis.core import QgsVectorLayer, QgsProject, QgsGeometry, QgsFeature
+from qgis.core import QgsFeature, QgsGeometry, QgsProject, QgsVectorLayer
 
 from deepness.common.processing_parameters.detection_parameters import DetectionParameters, DetectorType
 from deepness.processing import processing_utils
-from deepness.processing.map_processor.map_processing_result import MapProcessingResultCanceled, \
-    MapProcessingResultSuccess, MapProcessingResult
+from deepness.processing.map_processor.map_processing_result import (MapProcessingResult, MapProcessingResultCanceled,
+                                                                     MapProcessingResultSuccess)
 from deepness.processing.map_processor.map_processor_with_model import MapProcessorWithModel
-from deepness.processing.models.detector import Detection
-from deepness.processing.models.detector import Detector
+from deepness.processing.models.detector import Detection, Detector
 from deepness.processing.tile_params import TileParams
 
 
@@ -119,19 +120,49 @@ def _create_vlayer_for_output_bounding_boxes(self, bounding_boxes: List[Detectio
 
             features = []
             for det in filtered_bounding_boxes:
-                bbox_corners_pixels = det.bbox.get_4_corners()
-                bbox_corners_crs = processing_utils.transform_points_list_xy_to_target_crs(
-                    points=bbox_corners_pixels,
-                    extent=self.extended_extent,
-                    rlayer_units_per_pixel=self.rlayer_units_per_pixel,
-                )
-                feature = QgsFeature()
-                polygon_xy_vec_vec = [
-                    bbox_corners_crs
-                ]
-                geometry = QgsGeometry.fromPolygonXY(polygon_xy_vec_vec)
-                feature.setGeometry(geometry)
-                features.append(feature)
+                if det.mask is None:
+                    bbox_corners_pixels = det.bbox.get_4_corners()
+                    bbox_corners_crs = processing_utils.transform_points_list_xy_to_target_crs(
+                        points=bbox_corners_pixels,
+                        extent=self.extended_extent,
+                        rlayer_units_per_pixel=self.rlayer_units_per_pixel,
+                    )
+                    feature = QgsFeature()
+                    polygon_xy_vec_vec = [
+                        bbox_corners_crs
+                    ]
+                    geometry = QgsGeometry.fromPolygonXY(polygon_xy_vec_vec)
+                    feature.setGeometry(geometry)
+                    features.append(feature)
+                else:
+                    contours, _ = cv2.findContours(det.mask, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
+                    contours = sorted(contours, key=cv2.contourArea, reverse=True)
+
+                    x_offset, y_offset = det.mask_offsets
+
+                    if len(contours) > 0:
+                        countur = contours[0]
+
+                        corners = []
+                        for point in countur:
+                            corners.append(int(point[0][0]) + x_offset)
+                            corners.append(int(point[0][1]) + y_offset)
+
+                        mask_corners_pixels = cv2.convexHull(np.array(corners).reshape((-1, 2))).squeeze()
+
+                        mask_corners_crs = processing_utils.transform_points_list_xy_to_target_crs(
+                            points=mask_corners_pixels,
+                            extent=self.extended_extent,
+                            rlayer_units_per_pixel=self.rlayer_units_per_pixel,
+                        )
+
+                        feature = QgsFeature()
+                        polygon_xy_vec_vec = [
+                            mask_corners_crs
+                        ]
+                        geometry = QgsGeometry.fromPolygonXY(polygon_xy_vec_vec)
+                        feature.setGeometry(geometry)
+                        features.append(feature)
 
             vlayer = QgsVectorLayer("multipolygon", self.model.get_channel_name(channel_id), "memory")
             vlayer.setCrs(self.rlayer.crs())

diff --git a/src/deepness/processing/models/detector.py b/src/deepness/processing/models/detector.py
@@ -1,8 +1,9 @@
 """ Module including the class for the object detection task and related functions
 """
 from dataclasses import dataclass
-from typing import List
+from typing import List, Optional, Tuple
 
+import cv2
 import numpy as np
 
 from deepness.common.processing_parameters.detection_parameters import DetectorType
@@ -30,6 +31,9 @@ class of the detected object
     """float: confidence of the detection"""
     clss: int
     """int: class of the detected object"""
+    mask: Optional[np.ndarray] = None
+    """np.ndarray: mask of the detected object"""
+    mask_offsets: Optional[Tuple[int, int]] = None
 
     def convert_to_global(self, offset_x: int, offset_y: int):
         """Apply (x,y) offset to bounding box coordinates
@@ -42,6 +46,9 @@ def convert_to_global(self, offset_x: int, offset_y: int):
             _description_
         """
         self.bbox.apply_offset(offset_x=offset_x, offset_y=offset_y)
+
+        if self.mask is not None:
+            self.mask_offsets = (offset_x, offset_y)
 
     def get_bbox_xyxy(self) -> np.ndarray:
         """Convert stored bounding box into x1y1x2y2 format
@@ -132,9 +139,11 @@ def get_number_of_output_channels(self):
             if model_type_params.skipped_objectness_probability:
                 return self.outputs_layers[0].shape[shape_index] - 4
             return self.outputs_layers[0].shape[shape_index] - 4 - 1  # shape - 4 bboxes - 1 conf
+        elif len(self.outputs_layers) == 2 and self.model_type == DetectorType.YOLO_ULTRALYTICS_SEGMENTATION:
+            return self.outputs_layers[0].shape[shape_index] - 4 - self.outputs_layers[1].shape[1]
         else:
             raise NotImplementedError("Model with multiple output layer is not supported! Use only one output layer.")
-
+            
     def preprocessing(self, image: np.ndarray):
         """Preprocess image before inference
 
@@ -182,28 +191,33 @@ def postprocessing(self, model_output):
                 "Model type is not set for model. Use self.set_model_type_param"
             )
 
-        model_output = model_output[0][0]
+        masks = None
 
         if self.model_type == DetectorType.YOLO_v5_v7_DEFAULT:
-            boxes, conf, classes = self._postprocessing_YOLO_v5_v7_DEFAULT(model_output)
+            boxes, conf, classes = self._postprocessing_YOLO_v5_v7_DEFAULT(model_output[0][0])
         elif self.model_type == DetectorType.YOLO_v6:
-            boxes, conf, classes = self._postprocessing_YOLO_v6(model_output)
+            boxes, conf, classes = self._postprocessing_YOLO_v6(model_output[0][0])
         elif self.model_type == DetectorType.YOLO_ULTRALYTICS:
-            boxes, conf, classes = self._postprocessing_YOLO_ULTRALYTICS(model_output)
+            boxes, conf, classes = self._postprocessing_YOLO_ULTRALYTICS(model_output[0][0])
+        elif self.model_type == DetectorType.YOLO_ULTRALYTICS_SEGMENTATION:
+            boxes, conf, classes, masks = self._postprocessing_YOLO_ULTRALYTICS_SEGMENTATION(model_output)
         else:
             raise NotImplementedError(f"Model type not implemented! ('{self.model_type}')")
 
         detections = []
+
+        masks = masks if masks is not None else [None] * len(boxes)
 
-        for b, c, cl in zip(boxes, conf, classes):
+        for b, c, cl, m in zip(boxes, conf, classes, masks):
             det = Detection(
                 bbox=BoundingBox(
                     x_min=b[0],
                     x_max=b[2],
                     y_min=b[1],
                     y_max=b[3]),
                 conf=c,
-                clss=cl
+                clss=cl,
+                mask=m,
             )
             detections.append(det)
 
@@ -286,7 +300,78 @@ def _postprocessing_YOLO_ULTRALYTICS(self, model_output):
 
         return boxes, conf, classes
 
+    def _postprocessing_YOLO_ULTRALYTICS_SEGMENTATION(self, model_output):
+        detections = model_output[0][0]
+        protos = model_output[1][0]
+
+        detections = np.transpose(detections, (1, 0))
+
+        number_of_class = self.get_number_of_output_channels()
+        mask_start_index = 4 + number_of_class
+
+        outputs_filtered = np.array(
+            list(filter(lambda x: np.max(x[4:4+number_of_class]) >= self.confidence, detections))
+        )
+
+        if len(outputs_filtered.shape) < 2:
+            return [], [], [], []
+
+        probabilities = np.max(outputs_filtered[:, 4:4+number_of_class], axis=1)
+
+        outputs_x1y1x2y2 = self.xywh2xyxy(outputs_filtered)
+
+        pick_indxs = self.non_max_suppression_fast(
+            outputs_x1y1x2y2,
+            probs=probabilities,
+            iou_threshold=self.iou_threshold)
+
+        outputs_nms = outputs_x1y1x2y2[pick_indxs]
+
+        boxes = np.array(outputs_nms[:, :4], dtype=int)
+        conf = np.max(outputs_nms[:, 4:4+number_of_class], axis=1)
+        classes = np.argmax(outputs_nms[:, 4:4+number_of_class], axis=1)
+        masks_in = np.array(outputs_nms[:, mask_start_index:], dtype=float)
+
+        masks = self.process_mask(protos, masks_in, boxes)
+
+        return boxes, conf, classes, masks
+
+    # based on https://github.com/ultralytics/ultralytics/blob/main/ultralytics/utils/ops.py#L638C1-L638C67
+    def process_mask(self, protos, masks_in, bboxes):
+        c, mh, mw = protos.shape  # CHW
+        ih, iw = self.input_shape[2:]
+
+        masks = self.sigmoid(np.matmul(masks_in, protos.astype(float).reshape(c, -1))).reshape(-1, mh, mw)
+
+        downsampled_bboxes = bboxes.copy().astype(float)
+        downsampled_bboxes[:, 0] *= mw / iw
+        downsampled_bboxes[:, 2] *= mw / iw
+        downsampled_bboxes[:, 3] *= mh / ih
+        downsampled_bboxes[:, 1] *= mh / ih
+
+        masks = self.crop_mask(masks, downsampled_bboxes)
+        scaled_masks = np.zeros((len(masks), ih, iw))
+
+        for i in range(len(masks)):
+            scaled_masks[i] = cv2.resize(masks[i], (iw, ih), interpolation=cv2.INTER_LINEAR)
 
+        masks = np.uint8(scaled_masks >= 0.5)
+
+        return masks
+
+    @staticmethod
+    def sigmoid(x):
+        return 1 / (1 + np.exp(-x))
+
+    @staticmethod
+    # based on https://github.com/ultralytics/ultralytics/blob/main/ultralytics/utils/ops.py#L598C1-L614C65
+    def crop_mask(masks, boxes):
+        n, h, w = masks.shape
+        x1, y1, x2, y2 = np.split(boxes[:, :, None], 4, axis=1)  # x1 shape(n,1,1)
+        r = np.arange(w, dtype=x1.dtype)[None, None, :]  # rows shape(1,1,w)
+        c = np.arange(h, dtype=x1.dtype)[None, :, None]  # cols shape(1,h,1)
+
+        return masks * ((r >= x1) * (r < x2) * (c >= y1) * (c < y2))
 
     @staticmethod
     def xywh2xyxy(x: np.ndarray) -> np.ndarray:
@@ -386,11 +471,12 @@ def non_max_suppression_fast(boxes: np.ndarray, probs: np.ndarray, iou_threshold
     def check_loaded_model_outputs(self):
         """Check if model outputs are valid.
         Valid model are:
-            - has 1 output layer
+            - has 1 or 2 outputs layer
             - output layer shape length is 3
             - batch size is 1
         """
-        if len(self.outputs_layers) == 1:
+
+        if len(self.outputs_layers) == 1 or len(self.outputs_layers) == 2:
             shape = self.outputs_layers[0].shape
 
             if len(shape) != 3:

diff --git a/src/deepness/processing/processing_utils.py b/src/deepness/processing/processing_utils.py
@@ -4,22 +4,17 @@
 
 import logging
 from dataclasses import dataclass
-from typing import Optional, List, Tuple
+from typing import List, Optional, Tuple
 
 import numpy as np
-from qgis.core import Qgis
-from qgis.core import QgsFeature, QgsGeometry, QgsPointXY
-from qgis.core import QgsRasterLayer, QgsCoordinateTransform
-from qgis.core import QgsRectangle
-from qgis.core import QgsUnitTypes
-from qgis.core import QgsWkbTypes
+from qgis.core import (Qgis, QgsCoordinateTransform, QgsFeature, QgsGeometry, QgsPointXY, QgsRasterLayer, QgsRectangle,
+                       QgsUnitTypes, QgsWkbTypes)
 
 from deepness.common.defines import IS_DEBUG
 from deepness.common.lazy_package_loader import LazyPackageLoader
 from deepness.common.processing_parameters.map_processing_parameters import MapProcessingParameters
 from deepness.common.processing_parameters.segmentation_parameters import SegmentationParameters
 
-
 cv2 = LazyPackageLoader('cv2')
 
 
@@ -391,9 +386,6 @@ def get_4_corners(self) -> List[Tuple]:
             (self.x_max, self.y_min),
         ]
 
-        roi_slice = np.s_[self.y_min:self.y_max + 1, self.x_min:self.x_max + 1]
-        return roi_slice
-
 
 def transform_polygon_with_rings_epsg_to_extended_xy_pixels(
         polygons: List[List[QgsPointXY]],