openvinotoolkit · vinnamkim · Jan 17, 2024 · Jan 11, 2024 · Jan 11, 2024 · Jan 11, 2024
@@ -0,0 +1,4 @@
+# Copyright (C) 2023 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+#
+"""Module for OTX instance segmentation models."""
@@ -0,0 +1,18 @@
+# Copyright (C) 2023 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+#
+"""ATSS model implementations."""
+
+from typing import Literal
+
+from otx.algo.utils.mmconfig import read_mmconfig
+from otx.core.model.entity.instance_segmentation import MMDetInstanceSegCompatibleModel
+
+
+class MaskRCNN(MMDetInstanceSegCompatibleModel):
+    """MaskRCNN Model."""
+
+    def __init__(self, num_classes: int, variant: Literal["efficientnetb2b", "r50", "swint"]) -> None:
+        model_name = f"maskrcnn_{variant}"
+        config = read_mmconfig(model_name=model_name)
+        super().__init__(num_classes=num_classes, config=config)
@@ -0,0 +1,198 @@
+load_from: https://storage.openvinotoolkit.org/repositories/openvino_training_extensions/models/instance_segmentation/v2/efficientnet_b2b-mask_rcnn-576x576.pth
+data_preprocessor:
+  type: "DetDataPreprocessor"
+  bgr_to_rgb: false
+  mean:
+    - 123.675
+    - 116.28
+    - 103.53
+  pad_mask: true
+  pad_size_divisor: 32
+  std:
+    - 1.0
+    - 1.0
+    - 1.0
+type: MaskRCNN
+backbone:
+  type: efficientnet_b2b
+  out_indices:
+    - 2
+    - 3
+    - 4
+    - 5
+  frozen_stages: -1
+  pretrained: true
+  activation_cfg:
+    type: torch_swish
+  norm_cfg:
+    type: BN
+    requires_grad: true
+neck:
+  type: FPN
+  in_channels:
+    - 24
+    - 48
+    - 120
+    - 352
+  out_channels: 80
+  num_outs: 5
+rpn_head:
+  type: RPNHead
+  in_channels: 80
+  feat_channels: 80
+  anchor_generator:
+    type: AnchorGenerator
+    scales:
+      - 8
+    ratios:
+      - 0.5
+      - 1.0
+      - 2.0
+    strides:
+      - 4
+      - 8
+      - 16
+      - 32
+      - 64
+  bbox_coder:
+    type: DeltaXYWHBBoxCoder
+    target_means:
+      - 0.0
+      - 0.0
+      - 0.0
+      - 0.0
+    target_stds:
+      - 1.0
+      - 1.0
+      - 1.0
+      - 1.0
+  loss_cls:
+    type: CrossEntropyLoss
+    use_sigmoid: true
+    loss_weight: 1.0
+  loss_bbox:
+    type: L1Loss
+    loss_weight: 1.0
+roi_head:
+  type: StandardRoIHead
+  bbox_roi_extractor:
+    type: SingleRoIExtractor
+    roi_layer:
+      type: RoIAlign
+      output_size: 7
+      sampling_ratio: 0
+    out_channels: 80
+    featmap_strides:
+      - 4
+      - 8
+      - 16
+      - 32
+  bbox_head:
+    type: Shared2FCBBoxHead
+    in_channels: 80
+    fc_out_channels: 1024
+    roi_feat_size: 7
+    num_classes: 80
+    bbox_coder:
+      type: DeltaXYWHBBoxCoder
+      target_means:
+        - 0.0
+        - 0.0
+        - 0.0
+        - 0.0
+      target_stds:
+        - 0.1
+        - 0.1
+        - 0.2
+        - 0.2
+    reg_class_agnostic: false
+    loss_cls:
+      type: CrossEntropyLoss
+      use_sigmoid: false
+      loss_weight: 1.0
+    loss_bbox:
+      type: L1Loss
+      loss_weight: 1.0
+  mask_roi_extractor:
+    type: SingleRoIExtractor
+    roi_layer:
+      type: RoIAlign
+      output_size: 14
+      sampling_ratio: 0
+    out_channels: 80
+    featmap_strides:
+      - 4
+      - 8
+      - 16
+      - 32
+  mask_head:
+    type: FCNMaskHead
+    num_convs: 4
+    in_channels: 80
+    conv_out_channels: 80
+    num_classes: 80
+    loss_mask:
+      type: CrossEntropyLoss
+      use_mask: true
+      loss_weight: 1.0
+train_cfg:
+  rpn:
+    assigner:
+      type: MaxIoUAssigner
+      pos_iou_thr: 0.7
+      neg_iou_thr: 0.3
+      min_pos_iou: 0.3
+      match_low_quality: true
+      ignore_iof_thr: -1
+      gpu_assign_thr: 300
+    sampler:
+      type: RandomSampler
+      num: 256
+      pos_fraction: 0.5
+      neg_pos_ub: -1
+      add_gt_as_proposals: false
+    allowed_border: -1
+    pos_weight: -1
+    debug: false
+  rpn_proposal:
+    nms_across_levels: false
+    nms_pre: 2000
+    max_per_img: 1000
+    nms:
+      type: nms
+      iou_threshold: 0.8
+    min_bbox_size: 0
+  rcnn:
+    assigner:
+      type: MaxIoUAssigner
+      pos_iou_thr: 0.5
+      neg_iou_thr: 0.5
+      min_pos_iou: 0.5
+      match_low_quality: true
+      ignore_iof_thr: -1
+      gpu_assign_thr: 300
+    sampler:
+      type: RandomSampler
+      num: 256
+      pos_fraction: 0.25
+      neg_pos_ub: -1
+      add_gt_as_proposals: true
+    mask_size: 28
+    pos_weight: -1
+    debug: false
+test_cfg:
+  rpn:
+    nms_across_levels: false
+    nms_pre: 800
+    max_per_img: 500
+    nms:
+      type: nms
+      iou_threshold: 0.8
+    min_bbox_size: 0
+  rcnn:
+    score_thr: 0.05
+    nms:
+      type: nms
+      iou_threshold: 0.7
+    max_per_img: 500
+    mask_thr_binary: 0.5