openvinotoolkit · sungmanc · Dec 11, 2023 · Dec 7, 2023 · Dec 7, 2023 · Dec 7, 2023
@@ -235,7 +235,7 @@ exclude = [
 
     # it will be cleaned up later
     "src/otx/core/engine/utils/*",
-    "src/otx/algo/classification/model/backbones/*",
+    "src/otx/algo/classification/backbones/*",
     "for_developers/helpers.py",
 ]
 

@@ -1,8 +1,9 @@
 # Copyright (C) 2023 Intel Corporation
 # SPDX-License-Identifier: Apache-2.0
 #
-"""Module for OTX classification."""
+"""Module for OTX classification models."""
 
-from . import model
+from . import backbones
+from .otx_dino_v2 import DINOv2, DINOv2RegisterClassifier
 
-__all__ = ["model"]
+__all__ = ["backbones", "DINOv2", "DINOv2RegisterClassifier"]
@@ -1,6 +1,6 @@
 # Copyright (C) 2023 Intel Corporation
 # SPDX-License-Identifier: Apache-2.0
-#
+
 """EfficientNetV2 model.
 
 Original papers:

@@ -0,0 +1,108 @@
+# Copyright (C) 2023 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+"""DINO-V2 model for the OTX classification."""
+
+from __future__ import annotations
+
+from typing import TYPE_CHECKING, Any
+
+import torch
+from torch import nn
+
+from otx.core.data.entity.base import OTXBatchLossEntity
+from otx.core.data.entity.classification import MulticlassClsBatchDataEntity, MulticlassClsBatchPredEntity
+from otx.core.model.entity.classification import OTXClassificationModel
+
+if TYPE_CHECKING:
+    from omegaconf import DictConfig
+
+class DINOv2(nn.Module):
+    """DINO-v2 Model."""
+    def __init__(
+        self,
+        backbone_name: str,
+        freeze_backbone: bool,
+        head_in_channels: int,
+        num_classes: int,
+    ):
+        super().__init__()
+        self.backbone = torch.hub.load(
+            repo_or_dir="facebookresearch/dinov2",
+            model=backbone_name,
+        )
+
+        if freeze_backbone:
+            self._freeze_backbone(self.backbone)
+
+        self.head = nn.Linear(
+            head_in_channels,
+            num_classes,
+        )
+
+        self.loss = nn.CrossEntropyLoss()
+        self.softmax = nn.Softmax()
+
+    def _freeze_backbone(self, backbone: nn.Module) -> None:
+        """Freeze the backbone."""
+        for _, v in backbone.named_parameters():
+            v.requires_grad = False
+
+    def forward(self, imgs: torch.Tensor, labels: torch.Tensor = None) -> torch.Tensor:
+        """Forward function."""
+        feats = self.backbone(imgs)
+        logits = self.head(feats)
+        if self.training:
+            return self.loss(logits, labels)
+        return self.softmax(logits)
+
+class DINOv2RegisterClassifier(OTXClassificationModel):
+    """DINO-v2 Classification Model with register."""
+    def __init__(self, config: DictConfig) -> None:
+        self.config = config
+        super().__init__() # create the model
+
+    def _create_model(self) -> nn.Module:
+        """Create the model."""
+        return DINOv2(
+            backbone_name=self.config.backbone.name,
+            freeze_backbone=self.config.backbone.frozen,
+            head_in_channels=self.config.head.in_channels,
+            num_classes=self.config.head.num_classes,
+        )
+
+    def _customize_inputs(self, entity: MulticlassClsBatchDataEntity) -> dict[str, Any]:
+        """Customize the inputs for the model."""
+        inputs: dict[str, Any] = {}
+        inputs["imgs"] = torch.stack(entity.images)
+        inputs["labels"] = torch.cat(entity.labels)
+        return inputs
+
+    def _customize_outputs(
+        self,
+        outputs: Any, # noqa: ANN401
+        inputs: MulticlassClsBatchDataEntity,
+    ) -> MulticlassClsBatchPredEntity | OTXBatchLossEntity:
+        """Customize the outputs for the model."""
+        if self.training:
+            if not isinstance(outputs, torch.Tensor):
+                raise TypeError(outputs)
+
+            losses = OTXBatchLossEntity()
+            losses["loss"] = outputs
+            return losses
+
+        max_pred_elements, max_pred_idxs = torch.max(outputs, dim=1)
+        pred_scores = max_pred_elements
+        pred_labels = max_pred_idxs
+
+        scores = torch.unbind(pred_scores, dim=0)
+        labels = torch.unbind(pred_labels, dim=0)
+
+        return MulticlassClsBatchPredEntity(
+            batch_size=pred_labels.shape[0],
+            images=inputs.images,
+            imgs_info=inputs.imgs_info,
+            scores=scores,
+            labels=labels,
+        )
@@ -0,0 +1,12 @@
+optimizer:
+  _target_: torch.optim.Adam
+  _partial_: true
+  lr: 1e-3
+  weight_decay: 0.0
+
+scheduler:
+  _target_: torch.optim.lr_scheduler.ReduceLROnPlateau
+  _partial_: true
+  mode: min
+  factor: 0.1
+  patience: 10
@@ -1,17 +1,7 @@
-_target_: otx.core.model.module.detection.OTXDetectionLitModule
-
-optimizer:
-  _target_: torch.optim.Adam
-  _partial_: true
-  lr: 1e-3
-  weight_decay: 0.0
+defaults:
+  - default
 
-scheduler:
-  _target_: torch.optim.lr_scheduler.ReduceLROnPlateau
-  _partial_: true
-  mode: min
-  factor: 0.1
-  patience: 10
+_target_: otx.core.model.module.detection.OTXDetectionLitModule
 
 otx_model:
   _target_: otx.core.model.entity.detection.MMDetCompatibleModel

@@ -1,17 +1,7 @@
-_target_: otx.core.model.module.classification.OTXClassificationLitModule
-
-optimizer:
-  _target_: torch.optim.Adam
-  _partial_: true
-  lr: 1e-3
-  weight_decay: 0.0
+defaults:
+  - default
 
-scheduler:
-  _target_: torch.optim.lr_scheduler.ReduceLROnPlateau
-  _partial_: true
-  mode: min
-  factor: 0.1
-  patience: 10
+_target_: otx.core.model.module.classification.OTXClassificationLitModule
 
 otx_model:
   _target_: otx.core.model.entity.classification.MMPretrainCompatibleModel

@@ -0,0 +1,11 @@
+defaults:
+  - default
+
+_target_: otx.core.model.module.classification.OTXClassificationLitModule
+
+otx_model:
+  _target_: otx.core.model.entity.classification.OTXClassificationModel
+  config: ???
+
+# compile model for faster training with pytorch 2.0
+torch_compile: false
@@ -56,7 +56,6 @@ model:
           - 123.675
           - 116.28
           - 103.53
-        num_classes: 1000
         std:
           - 58.395
           - 57.12

@@ -0,0 +1,63 @@
+# @package _global_
+defaults:
+  - override /base: classification
+  - override /callbacks: classification
+  - override /data: mmpretrain
+  - override /model: torch_classification
+data:
+  train_subset:
+    batch_size: 64
+    transforms:
+      - type: LoadImageFromFile
+        to_float32: true
+      - mean:
+          - 123.675
+          - 116.28
+          - 103.53
+        std:
+          - 58.395
+          - 57.12
+          - 57.375
+        to_rgb: true
+        type: Normalize
+      - backend: cv2
+        scale: 224
+        type: RandomResizedCrop
+      - type: PackInputs
+  val_subset:
+    batch_size: 64
+    transforms:
+      - type: LoadImageFromFile
+        to_float32: true
+      - backend: cv2
+        edge: short
+        scale: 256
+        type: ResizeEdge
+      - crop_size: 224
+        type: CenterCrop
+      - type: PackInputs
+  test_subset:
+    batch_size: 64
+    transforms:
+      - type: LoadImageFromFile
+        to_float32: true
+      - backend: cv2
+        edge: short
+        scale: 256
+        type: ResizeEdge
+      - crop_size: 224
+        type: CenterCrop
+      - type: PackInputs
+model:
+  otx_model:
+    _target_: otx.algo.classification.otx_dino_v2.DINOv2RegisterClassifier
+    config:
+      backbone:
+        name: dinov2_vits14_reg
+        frozen: true
+      head:
+        in_channels: 384
+        num_classes: 1000
+  optimizer:
+    _target_: torch.optim.AdamW
+    lr: 1e-5
@@ -70,7 +70,6 @@ model:
           - 123.675
           - 116.28
           - 103.53
-        num_classes: 1000
         std:
           - 58.395
           - 57.12

@@ -72,7 +72,6 @@ model:
           - 123.675
           - 116.28
           - 103.53
-        num_classes: 1000
         std:
           - 58.395
           - 57.12

@@ -71,7 +71,6 @@ model:
           - 123.675
           - 116.28
           - 103.53
-        num_classes: 1000
         std:
           - 58.395
           - 57.12

@@ -21,7 +21,6 @@
         "data_dir": "tests/assets/classification_dataset",
         "overrides": [
             "model.otx_model.config.head.num_classes=2",
-            "model.otx_model.config.data_preprocessor.num_classes=2",
         ],
     },
     "detection": {

@@ -0,0 +1,2 @@
+# Copyright (C) 2023 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
@@ -0,0 +1,51 @@
+# Copyright (C) 2023 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+#
+from __future__ import annotations
+
+import pytest
+import torch
+from omegaconf import DictConfig
+from torchvision import tv_tensors
+
+from src.otx.core.data.entity.base import ImageInfo
+from src.otx.core.data.entity.classification import MulticlassClsBatchDataEntity
+
+
+@pytest.fixture()
+def fxt_multiclass_cls_batch_data_entity() -> MulticlassClsBatchDataEntity:
+    batch_size = 2
+    random_tensor = torch.randn((batch_size, 3, 224, 224))
+    tv_tensor = tv_tensors.Image(data=random_tensor)
+    img_infos = [ImageInfo(
+        img_idx=i,
+        img_shape=(224, 224),
+        ori_shape=(224, 224),
+        pad_shape=(0, 0),
+        scale_factor=(1.0, 1.0),
+    ) for i in range(batch_size)]
+    return MulticlassClsBatchDataEntity(
+        batch_size=2,
+        images=tv_tensor,
+        imgs_info=img_infos,
+        labels=[torch.tensor([0]), torch.tensor([1])],
+    )
+
+@pytest.fixture()
+def fxt_config_mock() -> DictConfig:
+    pseudo_model_config = {
+        "backbone": {
+            "name": "dinov2_vits14_reg",
+            "frozen": False,
+        },
+        "head":{
+            "in_channels": 384,
+            "num_classes": 2,
+        },
+        "data_preprocess":{
+            "mean": [1, 1, 1],
+            "std": [1, 1, 1],
+            "to_rgb": True,
+        },
+    }
+    return DictConfig(pseudo_model_config)
-Original file line number
+Diff line change
@@ Expand Up / @@ -56,7 +56,6 @@ model: @@
               - 123.675
               - 116.28
               - 103.53
-            num_classes: 1000
             std:
               - 58.395
               - 57.12
@@ Expand Down @@
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1,2 @@
		# Copyright (C) 2023 Intel Corporation
		# SPDX-License-Identifier: Apache-2.0