From acb01c2ca24c8b424d90cea669e895d5e7fabfda Mon Sep 17 00:00:00 2001 From: Harim Kang Date: Tue, 13 Feb 2024 10:18:34 +0900 Subject: [PATCH 1/5] Add Auto-Configuration for OVModel (#2897) * Add initial commit for auto-config ov model * Add auto-config with OV data pipeline * Add intg test api export & test * Add api example * Add CLI link between checkpoint and model.model_name --- for_developers/add_custom_model.ipynb | 215 ++----- for_developers/cli_guide.md | 18 + for_developers/engine_api_example.ipynb | 541 ++++++++++++++++++ src/otx/cli/cli.py | 3 + src/otx/core/model/entity/classification.py | 24 +- src/otx/core/model/entity/detection.py | 20 + .../model/entity/instance_segmentation.py | 20 + src/otx/core/model/entity/segmentation.py | 20 + src/otx/engine/engine.py | 19 +- src/otx/engine/utils/auto_configurator.py | 60 +- tests/integration/api/test_engine_api.py | 23 +- tests/integration/cli/test_cli.py | 10 +- .../integration/cli/test_export_inference.py | 4 +- 13 files changed, 803 insertions(+), 174 deletions(-) create mode 100644 for_developers/engine_api_example.ipynb diff --git a/for_developers/add_custom_model.ipynb b/for_developers/add_custom_model.ipynb index 5110fbae48e..4a6bb1d79be 100644 --- a/for_developers/add_custom_model.ipynb +++ b/for_developers/add_custom_model.ipynb @@ -12,26 +12,25 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 1, "metadata": {}, "outputs": [], "source": [ - "from typing import Any\n", - "\n", - "import torch\n", - "from torch import nn\n", - "from torchvision.models.resnet import resnet50, ResNet50_Weights\n", + "from __future__ import annotations\n", "\n", - "from hydra import compose, initialize\n", + "from typing import TYPE_CHECKING, Any\n", "\n", - "from otx.core.data.entity.base import OTXBatchLossEntity\n", + "import torch\n", "from otx.core.data.entity.classification import (\n", " MulticlassClsBatchDataEntity,\n", " MulticlassClsBatchPredEntity,\n", ")\n", - "from otx.core.model.entity.classification import OTXClassificationModel\n", - "from otx.core.engine.train import train\n", - "from otx.cli.utils.hydra import configure_hydra_outputs\n" + "from otx.core.model.entity.classification import OTXMulticlassClsModel\n", + "from torch import nn\n", + "from torchvision.models.resnet import ResNet50_Weights, resnet50\n", + "\n", + "if TYPE_CHECKING:\n", + " from otx.core.data.entity.base import OTXBatchLossEntity\n" ] }, { @@ -50,7 +49,7 @@ }, { "cell_type": "code", - "execution_count": 1, + "execution_count": 2, "metadata": {}, "outputs": [], "source": [ @@ -96,14 +95,13 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 3, "metadata": {}, "outputs": [], "source": [ - "class OTXResNet50(OTXClassificationModel):\n", + "class OTXResNet50(OTXMulticlassClsModel):\n", " def __init__(self, num_classes: int) -> None:\n", - " self.num_classes = num_classes\n", - " super().__init__()\n", + " super().__init__(num_classes=num_classes)\n", " self.register_buffer(\n", " \"mean\",\n", " torch.FloatTensor([123.675, 116.28, 103.53]).view(-1, 1, 1),\n", @@ -121,7 +119,7 @@ " return ResNet50WithLossComputation(num_classes=self.num_classes)\n", "\n", " def _customize_inputs(self, inputs: MulticlassClsBatchDataEntity) -> dict[str, Any]:\n", - " images = torch.stack(inputs.images, dim=0).to(dtype=torch.float32)\n", + " images = inputs.images.to(dtype=torch.float32)\n", " images = (images - self.mean) / self.std\n", " return {\n", " \"images\": images,\n", @@ -165,184 +163,95 @@ }, { "cell_type": "code", - "execution_count": 2, + "execution_count": 4, "metadata": {}, "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "WARNING:root:The corresponding keys in config are not used.: ['verbose', 'data_root', 'task', 'seed', 'callback_monitor', 'resume', 'disable_infer_num_classes']\n", + "WARNING:root:Set Default Optimizer: {'class_path': 'torch.optim.SGD', 'init_args': {'lr': 0.0049, 'momentum': 0.9, 'dampening': 0, 'weight_decay': 0.0001, 'nesterov': False, 'maximize': False, 'foreach': None, 'differentiable': False}}\n", + "WARNING:root:Set Default Scheduler: {'class_path': 'lightning.pytorch.cli.ReduceLROnPlateau', 'init_args': {'monitor': 'train/loss', 'mode': 'min', 'factor': 0.5, 'patience': 1, 'threshold': 0.0001, 'threshold_mode': 'rel', 'cooldown': 0, 'min_lr': 0, 'eps': 1e-08, 'verbose': False}}\n", + "INFO:pytorch_lightning.utilities.rank_zero:GPU available: True (cuda), used: True\n", + "INFO:pytorch_lightning.utilities.rank_zero:TPU available: False, using: 0 TPU cores\n", + "INFO:pytorch_lightning.utilities.rank_zero:IPU available: False, using: 0 IPUs\n", + "INFO:pytorch_lightning.utilities.rank_zero:HPU available: False, using: 0 HPUs\n", + "INFO:pytorch_lightning.utilities.rank_zero:You are using a CUDA device ('NVIDIA GeForce RTX 3090') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision\n", + "LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1]\n", + "\n", + " | Name | Type | Params\n", + "---------------------------------------------------\n", + "0 | model | OTXResNet50 | 23.5 M\n", + "1 | val_metric | MulticlassAccuracy | 0 \n", + "2 | test_metric | MulticlassAccuracy | 0 \n", + "---------------------------------------------------\n", + "23.5 M Trainable params\n", + "0 Non-trainable params\n", + "23.5 M Total params\n", + "94.049 Total estimated model params size (MB)\n" + ] + }, { "name": "stdout", "output_type": "stream", "text": [ - "[2023-12-08 14:44:09,539][root][INFO] - Instantiating datamodule <{'data_format': 'imagenet_with_subset_dirs', 'data_root': '${base.data_dir}', 'train_subset': {'batch_size': 64, 'subset_name': 'train', 'transform_lib_type': , 'transforms': [{'type': 'LoadImageFromFile'}, {'backend': 'cv2', 'scale': 224, 'type': 'RandomResizedCrop'}, {'type': 'PackInputs'}], 'num_workers': 2}, 'val_subset': {'batch_size': 64, 'subset_name': 'val', 'transform_lib_type': , 'transforms': [{'type': 'LoadImageFromFile'}, {'backend': 'cv2', 'edge': 'short', 'scale': 256, 'type': 'ResizeEdge'}, {'crop_size': 224, 'type': 'CenterCrop'}, {'type': 'PackInputs'}], 'num_workers': 2}, 'test_subset': {'batch_size': 64, 'subset_name': 'test', 'transform_lib_type': , 'transforms': [{'type': 'LoadImageFromFile'}, {'backend': 'cv2', 'edge': 'short', 'scale': 256, 'type': 'ResizeEdge'}, {'crop_size': 224, 'type': 'CenterCrop'}, {'type': 'PackInputs'}], 'num_workers': 2}, 'mem_cache_size': '1GB', 'mem_cache_img_max_size': '${as_int_tuple:500,500}'}>\n", - "[2023-12-08 14:44:09,951][root][INFO] - Add name: val, self.subsets: {'val': }\n", - "[2023-12-08 14:44:09,952][root][INFO] - Add name: test, self.subsets: {'val': , 'test': }\n", - "[2023-12-08 14:44:09,954][root][INFO] - Add name: train, self.subsets: {'val': , 'test': , 'train': }\n", - "[2023-12-08 14:44:09,954][root][INFO] - Try to create a 1000000000 size memory pool.\n", - "[2023-12-08 14:44:10,356][root][INFO] - Instantiating model <{'_target_': 'otx.core.model.module.classification.OTXClassificationLitModule', 'optimizer': {'_target_': 'torch.optim.SGD', '_partial_': True, 'lr': 0.0049, 'weight_decay': 0.0001, 'momentum': 0.9}, 'scheduler': {'_target_': 'torch.optim.lr_scheduler.ReduceLROnPlateau', '_partial_': True, 'mode': 'min', 'factor': 0.5, 'patience': 1}, 'otx_model': {'_target_': 'otx.core.model.entity.classification.MMPretrainCompatibleModel', 'config': {'backbone': {'version': 'b0', 'pretrained': True, 'type': 'OTXEfficientNet'}, 'head': {'act_cfg': {'type': 'HSwish'}, 'dropout_rate': 0.2, 'in_channels': 1280, 'init_cfg': {'bias': 0.0, 'layer': 'Linear', 'mean': 0.0, 'std': 0.01, 'type': 'Normal'}, 'loss': {'loss_weight': 1.0, 'type': 'CrossEntropyLoss'}, 'mid_channels': [1280], 'num_classes': 1000, 'topk': [1, 5], 'type': 'StackedLinearClsHead'}, 'neck': {'type': 'GlobalAveragePooling'}, 'data_preprocessor': {'mean': [123.675, 116.28, 103.53], 'num_classes': 1000, 'std': [58.395, 57.12, 57.375], 'to_rgb': True, 'type': 'ClsDataPreprocessor'}, 'type': 'ImageClassifier'}}, 'torch_compile': False}>\n", - "init weight - https://github.com/osmr/imgclsmob/releases/download/v0.0.364/efficientnet_b0-0752-0e386130.pth.zip\n", - "[2023-12-08 14:44:11,335][root][INFO] - Overriding to this OTX model \n", - "[2023-12-08 14:44:11,335][root][INFO] - Instantiating callbacks...\n", - "[2023-12-08 14:44:11,335][otx.core.engine.utils.instantiators][INFO] - Instantiating callback \n", - "[2023-12-08 14:44:11,337][otx.core.engine.utils.instantiators][INFO] - Instantiating callback \n", - "[2023-12-08 14:44:11,339][otx.core.engine.utils.instantiators][INFO] - Instantiating callback \n", - "[2023-12-08 14:44:11,341][otx.core.engine.utils.instantiators][INFO] - Instantiating callback \n", - "[2023-12-08 14:44:11,341][otx.core.engine.utils.instantiators][INFO] - Instantiating callback \n", - "[2023-12-08 14:44:11,342][root][INFO] - Instantiating loggers...\n", - "[2023-12-08 14:44:11,342][otx.core.engine.utils.instantiators][INFO] - Instantiating logger \n", - "[2023-12-08 14:44:11,343][root][INFO] - Instantiating trainer <{'default_root_dir': '${base.output_dir}', 'accelerator': 'gpu', 'precision': 32, 'max_epochs': 10, 'min_epochs': 1, 'devices': 1, 'check_val_every_n_epoch': 1, 'deterministic': False, '_target_': 'lightning.pytorch.trainer.Trainer'}>\n" + " \r" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "Trainer already configured with model summary callbacks: []. Skipping setting a default `ModelSummary` callback.\n", - "GPU available: True (cuda), used: True\n", - "TPU available: False, using: 0 TPU cores\n", - "IPU available: False, using: 0 IPUs\n", - "HPU available: False, using: 0 HPUs\n" + "/home/harimkan/workspace/repo/otx-regression/venv/lib/python3.10/site-packages/lightning/pytorch/loops/fit_loop.py:293: The number of training batches (1) is smaller than the logging interval Trainer(log_every_n_steps=50). Set a lower value for log_every_n_steps if you want to see logs for the training epoch.\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ - "[2023-12-08 14:44:11,479][root][INFO] - Logging hyperparameters!\n", - "[2023-12-08 14:44:11,483][root][INFO] - Starting training!\n" + "Epoch 9: 100%|██████████| 1/1 [00:00<00:00, 8.83it/s, v_num=9, train/loss=0.563, val/accuracy=1.000]" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "You are using a CUDA device ('NVIDIA GeForce RTX 3090') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision\n", - "/home/vinnamki/miniconda3/envs/otx-v2/lib/python3.11/site-packages/lightning/pytorch/callbacks/model_checkpoint.py:639: Checkpoint directory outputs/checkpoints exists and is not empty.\n", - "LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1]\n" + "INFO:pytorch_lightning.utilities.rank_zero:`Trainer.fit` stopped: `max_epochs=10` reached.\n" ] }, { - "data": { - "text/html": [ - "
┏━━━┳━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━┓\n",
-       "┃    Name         Type                Params ┃\n",
-       "┡━━━╇━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━┩\n",
-       "│ 0 │ model       │ OTXResNet50        │ 23.5 M │\n",
-       "│ 1 │ val_metric  │ MulticlassAccuracy │      0 │\n",
-       "│ 2 │ test_metric │ MulticlassAccuracy │      0 │\n",
-       "└───┴─────────────┴────────────────────┴────────┘\n",
-       "
\n" - ], - "text/plain": [ - "┏━━━┳━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━┓\n", - "┃\u001b[1;35m \u001b[0m\u001b[1;35m \u001b[0m\u001b[1;35m \u001b[0m┃\u001b[1;35m \u001b[0m\u001b[1;35mName \u001b[0m\u001b[1;35m \u001b[0m┃\u001b[1;35m \u001b[0m\u001b[1;35mType \u001b[0m\u001b[1;35m \u001b[0m┃\u001b[1;35m \u001b[0m\u001b[1;35mParams\u001b[0m\u001b[1;35m \u001b[0m┃\n", - "┡━━━╇━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━┩\n", - "│\u001b[2m \u001b[0m\u001b[2m0\u001b[0m\u001b[2m \u001b[0m│ model │ OTXResNet50 │ 23.5 M │\n", - "│\u001b[2m \u001b[0m\u001b[2m1\u001b[0m\u001b[2m \u001b[0m│ val_metric │ MulticlassAccuracy │ 0 │\n", - "│\u001b[2m \u001b[0m\u001b[2m2\u001b[0m\u001b[2m \u001b[0m│ test_metric │ MulticlassAccuracy │ 0 │\n", - "└───┴─────────────┴────────────────────┴────────┘\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
Trainable params: 23.5 M                                                                                           \n",
-       "Non-trainable params: 0                                                                                            \n",
-       "Total params: 23.5 M                                                                                               \n",
-       "Total estimated model params size (MB): 94                                                                         \n",
-       "
\n" - ], - "text/plain": [ - "\u001b[1mTrainable params\u001b[0m: 23.5 M \n", - "\u001b[1mNon-trainable params\u001b[0m: 0 \n", - "\u001b[1mTotal params\u001b[0m: 23.5 M \n", - "\u001b[1mTotal estimated model params size (MB)\u001b[0m: 94 \n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "application/vnd.jupyter.widget-view+json": { - "model_id": "5a012e5444174b18a87ab25d93441b9f", - "version_major": 2, - "version_minor": 0 - }, - "text/plain": [ - "Output()" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
/home/vinnamki/miniconda3/envs/otx-v2/lib/python3.11/site-packages/lightning/pytorch/loops/fit_loop.py:293: The \n",
-       "number of training batches (1) is smaller than the logging interval Trainer(log_every_n_steps=50). Set a lower \n",
-       "value for log_every_n_steps if you want to see logs for the training epoch.\n",
-       "
\n" - ], - "text/plain": [ - "/home/vinnamki/miniconda3/envs/otx-v2/lib/python3.11/site-packages/lightning/pytorch/loops/fit_loop.py:293: The \n", - "number of training batches (1) is smaller than the logging interval Trainer(log_every_n_steps=50). Set a lower \n", - "value for log_every_n_steps if you want to see logs for the training epoch.\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "name": "stderr", + "name": "stdout", "output_type": "stream", "text": [ - "`Trainer.fit` stopped: `max_epochs=10` reached.\n" + "Epoch 9: 100%|██████████| 1/1 [00:00<00:00, 2.92it/s, v_num=9, train/loss=0.563, val/accuracy=1.000]\n" ] }, { "data": { - "text/html": [ - "
\n"
-      ],
-      "text/plain": []
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "data": {
-      "text/html": [
-       "
\n",
-       "
\n" - ], "text/plain": [ - "\n" + "{'train/loss': tensor(0.5628), 'val/accuracy': tensor(1.)}" ] }, + "execution_count": 4, "metadata": {}, - "output_type": "display_data" + "output_type": "execute_result" } ], "source": [ - "num_classes = 2\n", + "from otx.engine import Engine\n", + "\n", "data_dir = \"../tests/assets/classification_dataset\"\n", + "otx_model = OTXResNet50(num_classes=2)\n", "\n", - "with initialize(\n", - " config_path=\"../src/otx/config\", version_base=\"1.3\", job_name=\"otx_train\"\n", - "):\n", - " overrides = [\n", - " \"+recipe=classification/otx_efficientnet_b0\",\n", - " \"base.output_dir=outputs\",\n", - " \"trainer.accelerator=gpu\",\n", - " f\"base.data_dir={data_dir}\",\n", - " ]\n", - " cfg = compose(config_name=\"train\", overrides=overrides, return_hydra_config=True)\n", - " configure_hydra_outputs(cfg)\n", + "engine = Engine(\n", + " data_root=data_dir,\n", + " model=otx_model,\n", + " device=\"gpu\",\n", + " work_dir=\"otx-workspace\",\n", + ")\n", "\n", - " otx_model = OTXResNet50(num_classes=num_classes)\n", - " train(cfg, otx_model=otx_model)\n" + "engine.train(max_epochs=10)" ] }, { @@ -369,7 +278,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.11.5" + "version": "3.10.13" } }, "nbformat": 4, diff --git a/for_developers/cli_guide.md b/for_developers/cli_guide.md index 717a1d91afb..63465367d7f 100644 --- a/for_developers/cli_guide.md +++ b/for_developers/cli_guide.md @@ -203,6 +203,24 @@ Override Parameters otx train ... --model.num_classes --max_epochs ``` +Testing with checkpoint + +```console +otx test ... --checkpoint +``` + +Export to OpenVINO IR model or ONNX (Default="OPENVINO") + +```console +otx export ... --checkpoint --export_format +``` + +Testing with Exported model output + +```console +otx test ... --checkpoint +``` + ## How to write OTX Configuration (recipe) ### Configuration diff --git a/for_developers/engine_api_example.ipynb b/for_developers/engine_api_example.ipynb new file mode 100644 index 00000000000..e4bb524fe69 --- /dev/null +++ b/for_developers/engine_api_example.ipynb @@ -0,0 +1,541 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# How to use OTX Engine\n", + "\n", + "## Installation\n", + "\n", + "Please see [setup_guide.md](setup_guide.md).\n", + "\n", + "## Engine Usage" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/home/harimkan/workspace/repo/otx-regression/venv/lib/python3.10/site-packages/tqdm/auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n", + " from .autonotebook import tqdm as notebook_tqdm\n" + ] + } + ], + "source": [ + "from otx.engine import Engine" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Training with dataset path (Auto-Configuration)\n", + "\n", + "- Auto-Configurator detect task from data_root\n", + "- Auto-Configurator select default model, data-transform, optimizer, scheduler" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "WARNING:root:The corresponding keys in config are not used.: ['verbose', 'data_root', 'task', 'seed', 'callback_monitor', 'resume', 'disable_infer_num_classes']\n", + "WARNING:root:Set Default Model: {'class_path': 'otx.algo.classification.efficientnet_b0.EfficientNetB0ForMulticlassCls', 'init_args': {'num_classes': 2, 'light': False}}\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "init weight - https://github.com/osmr/imgclsmob/releases/download/v0.0.364/efficientnet_b0-0752-0e386130.pth.zip\n", + "init weight - https://github.com/osmr/imgclsmob/releases/download/v0.0.364/efficientnet_b0-0752-0e386130.pth.zip\n", + "init weight - https://github.com/osmr/imgclsmob/releases/download/v0.0.364/efficientnet_b0-0752-0e386130.pth.zip\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "WARNING:root:Set Default Optimizer: {'class_path': 'torch.optim.SGD', 'init_args': {'lr': 0.0049, 'momentum': 0.9, 'dampening': 0, 'weight_decay': 0.0001, 'nesterov': False, 'maximize': False, 'foreach': None, 'differentiable': False}}\n", + "WARNING:root:Set Default Scheduler: {'class_path': 'lightning.pytorch.cli.ReduceLROnPlateau', 'init_args': {'monitor': 'train/loss', 'mode': 'min', 'factor': 0.5, 'patience': 1, 'threshold': 0.0001, 'threshold_mode': 'rel', 'cooldown': 0, 'min_lr': 0, 'eps': 1e-08, 'verbose': False}}\n", + "INFO:pytorch_lightning.utilities.rank_zero:GPU available: True (cuda), used: True\n", + "INFO:pytorch_lightning.utilities.rank_zero:TPU available: False, using: 0 TPU cores\n", + "INFO:pytorch_lightning.utilities.rank_zero:IPU available: False, using: 0 IPUs\n", + "INFO:pytorch_lightning.utilities.rank_zero:HPU available: False, using: 0 HPUs\n", + "INFO:pytorch_lightning.utilities.rank_zero:You are using a CUDA device ('NVIDIA GeForce RTX 3090') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision\n", + "LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1]\n", + "\n", + " | Name | Type | Params\n", + "---------------------------------------------------------------\n", + "0 | model | EfficientNetB0ForMulticlassCls | 5.6 M \n", + "1 | val_metric | MulticlassAccuracy | 0 \n", + "2 | test_metric | MulticlassAccuracy | 0 \n", + "---------------------------------------------------------------\n", + "5.6 M Trainable params\n", + "0 Non-trainable params\n", + "5.6 M Total params\n", + "22.599 Total estimated model params size (MB)\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + " \r" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/home/harimkan/workspace/repo/otx-regression/venv/lib/python3.10/site-packages/lightning/pytorch/loops/fit_loop.py:293: The number of training batches (1) is smaller than the logging interval Trainer(log_every_n_steps=50). Set a lower value for log_every_n_steps if you want to see logs for the training epoch.\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Epoch 1: 100%|██████████| 1/1 [00:00<00:00, 9.97it/s, v_num=10, train/loss=0.692, val/accuracy=0.680]" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "INFO:pytorch_lightning.utilities.rank_zero:`Trainer.fit` stopped: `max_epochs=2` reached.\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Epoch 1: 100%|██████████| 1/1 [00:00<00:00, 5.66it/s, v_num=10, train/loss=0.692, val/accuracy=0.680]\n" + ] + }, + { + "data": { + "text/plain": [ + "{'train/loss': tensor(0.6917), 'val/accuracy': tensor(0.6800)}" + ] + }, + "execution_count": 2, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "data_root = \"../tests/assets/classification_dataset\"\n", + "\n", + "engine = Engine(data_root=data_root)\n", + "\n", + "engine.train(max_epochs=2)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Training with Custom OTXModel\n", + "\n", + "Please see [add_custom_model.ipynb](add_custom_model.ipynb).\n", + "\n", + "```python\n", + "# Inherited Class from otx.core.model.entity.base.OTXModel\n", + "custom_model = CustomOTXModel(...)\n", + "\n", + "engine = Engine(data_root=data_root, model=custom_model)\n", + "```" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Training with OTX Model (model_name: str)" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "['otx_efficientnet_v2',\n", + " 'openvino_model',\n", + " 'otx_dino_v2_linear_probe',\n", + " 'mobilenet_v3_large_light',\n", + " 'efficientnet_b0_light',\n", + " 'otx_mobilenet_v3_large',\n", + " 'efficientnet_v2_light',\n", + " 'otx_dino_v2',\n", + " 'otx_efficientnet_b0',\n", + " 'otx_deit_tiny']" + ] + }, + "execution_count": 3, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "from otx.engine.utils.api import list_models\n", + "\n", + "list_models(task=\"MULTI_CLASS_CLS\")" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "WARNING:root:The corresponding keys in config are not used.: ['verbose', 'data_root', 'task', 'seed', 'callback_monitor', 'resume', 'disable_infer_num_classes']\n", + "WARNING:root:Set Default Model: {'class_path': 'otx.algo.classification.mobilenet_v3_large.MobileNetV3ForMulticlassCls', 'init_args': {'num_classes': 2, 'light': True}}\n", + "WARNING:root:Set Default Optimizer: {'class_path': 'torch.optim.SGD', 'init_args': {'lr': 0.0058, 'momentum': 0.9, 'dampening': 0, 'weight_decay': 0.0001, 'nesterov': False, 'maximize': False, 'foreach': None, 'differentiable': False}}\n", + "WARNING:root:Set Default Scheduler: {'class_path': 'otx.algo.schedulers.WarmupReduceLROnPlateau', 'init_args': {'warmup_steps': 10, 'monitor': 'val/accuracy', 'mode': 'max', 'factor': 0.5, 'patience': 1, 'threshold': 0.0001, 'threshold_mode': 'rel', 'cooldown': 0, 'min_lr': 0, 'eps': 1e-08, 'verbose': False}}\n", + "INFO:pytorch_lightning.utilities.rank_zero:GPU available: True (cuda), used: True\n", + "INFO:pytorch_lightning.utilities.rank_zero:TPU available: False, using: 0 TPU cores\n", + "INFO:pytorch_lightning.utilities.rank_zero:IPU available: False, using: 0 IPUs\n", + "INFO:pytorch_lightning.utilities.rank_zero:HPU available: False, using: 0 HPUs\n", + "LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1]\n", + "\n", + " | Name | Type | Params\n", + "------------------------------------------------------------\n", + "0 | model | MobileNetV3ForMulticlassCls | 3.0 M \n", + "1 | val_metric | MulticlassAccuracy | 0 \n", + "2 | test_metric | MulticlassAccuracy | 0 \n", + "------------------------------------------------------------\n", + "3.0 M Trainable params\n", + "0 Non-trainable params\n", + "3.0 M Total params\n", + "11.895 Total estimated model params size (MB)\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Loads checkpoint by http backend from path: https://github.com/d-li14/mobilenetv3.pytorch/blob/master/pretrained/mobilenetv3-large-1cd25616.pth?raw=true\n", + "The model and loaded state dict do not match exactly\n", + "\n", + "unexpected key in source state_dict: classifier.0.weight, classifier.0.bias, classifier.3.weight, classifier.3.bias\n", + "\n", + "init weight - https://github.com/d-li14/mobilenetv3.pytorch/blob/master/pretrained/mobilenetv3-large-1cd25616.pth?raw=true\n", + "Loads checkpoint by http backend from path: https://github.com/d-li14/mobilenetv3.pytorch/blob/master/pretrained/mobilenetv3-large-1cd25616.pth?raw=true\n", + "The model and loaded state dict do not match exactly\n", + "\n", + "unexpected key in source state_dict: classifier.0.weight, classifier.0.bias, classifier.3.weight, classifier.3.bias\n", + "\n", + "init weight - https://github.com/d-li14/mobilenetv3.pytorch/blob/master/pretrained/mobilenetv3-large-1cd25616.pth?raw=true\n", + "Loads checkpoint by http backend from path: https://github.com/d-li14/mobilenetv3.pytorch/blob/master/pretrained/mobilenetv3-large-1cd25616.pth?raw=true\n", + "The model and loaded state dict do not match exactly\n", + "\n", + "unexpected key in source state_dict: classifier.0.weight, classifier.0.bias, classifier.3.weight, classifier.3.bias\n", + "\n", + "init weight - https://github.com/d-li14/mobilenetv3.pytorch/blob/master/pretrained/mobilenetv3-large-1cd25616.pth?raw=true\n", + "Epoch 1: 100%|██████████| 1/1 [00:00<00:00, 12.68it/s, v_num=11, train/loss=0.809, val/accuracy=0.520]" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "INFO:pytorch_lightning.utilities.rank_zero:`Trainer.fit` stopped: `max_epochs=2` reached.\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Epoch 1: 100%|██████████| 1/1 [00:00<00:00, 8.37it/s, v_num=11, train/loss=0.809, val/accuracy=0.520]\n" + ] + }, + { + "data": { + "text/plain": [ + "{'train/loss': tensor(0.8091), 'val/accuracy': tensor(0.5200)}" + ] + }, + "execution_count": 4, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "engine = Engine(data_root=data_root, model=\"mobilenet_v3_large_light\")\n", + "\n", + "engine.train(max_epochs=2)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Training with OTX configuration file\n", + "- Users can override configuration values when creating an Engin.from_config.\n", + "- Or Users can also modify the configuration file directly." + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "WARNING:root:The corresponding keys in config are not used.: ['verbose', 'data_root', 'task', 'seed', 'callback_monitor', 'resume', 'disable_infer_num_classes']\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "init weight - https://github.com/osmr/imgclsmob/releases/download/v0.0.364/efficientnet_b0-0752-0e386130.pth.zip\n", + "init weight - https://github.com/osmr/imgclsmob/releases/download/v0.0.364/efficientnet_b0-0752-0e386130.pth.zip\n", + "init weight - https://github.com/osmr/imgclsmob/releases/download/v0.0.364/efficientnet_b0-0752-0e386130.pth.zip\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "INFO:pytorch_lightning.utilities.rank_zero:GPU available: True (cuda), used: True\n", + "INFO:pytorch_lightning.utilities.rank_zero:TPU available: False, using: 0 TPU cores\n", + "INFO:pytorch_lightning.utilities.rank_zero:IPU available: False, using: 0 IPUs\n", + "INFO:pytorch_lightning.utilities.rank_zero:HPU available: False, using: 0 HPUs\n", + "LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1]\n", + "\n", + " | Name | Type | Params\n", + "---------------------------------------------------------------\n", + "0 | model | EfficientNetB0ForMulticlassCls | 5.6 M \n", + "1 | val_metric | MulticlassAccuracy | 0 \n", + "2 | test_metric | MulticlassAccuracy | 0 \n", + "---------------------------------------------------------------\n", + "5.6 M Trainable params\n", + "0 Non-trainable params\n", + "5.6 M Total params\n", + "22.599 Total estimated model params size (MB)\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Epoch 1: 100%|██████████| 1/1 [00:00<00:00, 9.87it/s, v_num=12, train/loss=0.697, val/accuracy=0.440]" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "INFO:pytorch_lightning.utilities.rank_zero:`Trainer.fit` stopped: `max_epochs=2` reached.\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Epoch 1: 100%|██████████| 1/1 [00:00<00:00, 5.55it/s, v_num=12, train/loss=0.697, val/accuracy=0.440]\n" + ] + }, + { + "data": { + "text/plain": [ + "{'train/loss': tensor(0.6972), 'val/accuracy': tensor(0.4400)}" + ] + }, + "execution_count": 5, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "from otx.engine import Engine\n", + "\n", + "config = \"../src/otx/recipe/classification/multi_class_cls/otx_efficientnet_b0.yaml\"\n", + "\n", + "engine = Engine.from_config(\n", + " config_path=config,\n", + " data_root=data_root,\n", + ")\n", + "\n", + "engine.train(max_epochs=2)" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1]\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Testing DataLoader 0: 100%|██████████| 1/1 [00:00<00:00, 61.30it/s]\n" + ] + }, + { + "data": { + "text/html": [ + "
┏━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━┓\n",
+       "┃        Test metric               DataLoader 0        ┃\n",
+       "┡━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━━┩\n",
+       "│       test/accuracy           0.4000000059604645     │\n",
+       "└───────────────────────────┴───────────────────────────┘\n",
+       "
\n" + ], + "text/plain": [ + "┏━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━┓\n", + "┃\u001b[1m \u001b[0m\u001b[1m Test metric \u001b[0m\u001b[1m \u001b[0m┃\u001b[1m \u001b[0m\u001b[1m DataLoader 0 \u001b[0m\u001b[1m \u001b[0m┃\n", + "┡━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━━┩\n", + "│\u001b[36m \u001b[0m\u001b[36m test/accuracy \u001b[0m\u001b[36m \u001b[0m│\u001b[35m \u001b[0m\u001b[35m 0.4000000059604645 \u001b[0m\u001b[35m \u001b[0m│\n", + "└───────────────────────────┴───────────────────────────┘\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/plain": [ + "{'test/accuracy': tensor(0.4000)}" + ] + }, + "execution_count": 6, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "engine.test()" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "PosixPath('otx-workspace/exported_model.xml')" + ] + }, + "execution_count": 7, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "exported_model_path = engine.export() # export_format=\"OPENVINO\" is default\n", + "exported_model_path" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "WARNING:root:The corresponding keys in config are not used.: ['verbose', 'data_root', 'task', 'seed', 'callback_monitor', 'resume', 'disable_infer_num_classes']\n", + "/home/harimkan/workspace/repo/otx-regression/src/otx/core/utils/build.py:52: UserWarning: Set the default number of OpenVINO inference requests to 8.\n", + " You can specify the value in config.\n", + " warnings.warn(msg, stacklevel=1)\n", + "LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1]\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Testing DataLoader 0: 100%|██████████| 1/1 [00:00<00:00, 11.92it/s]\n" + ] + }, + { + "data": { + "text/html": [ + "
┏━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━┓\n",
+       "┃        Test metric               DataLoader 0        ┃\n",
+       "┡━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━━┩\n",
+       "│       test/accuracy           0.4000000059604645     │\n",
+       "└───────────────────────────┴───────────────────────────┘\n",
+       "
\n" + ], + "text/plain": [ + "┏━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━┓\n", + "┃\u001b[1m \u001b[0m\u001b[1m Test metric \u001b[0m\u001b[1m \u001b[0m┃\u001b[1m \u001b[0m\u001b[1m DataLoader 0 \u001b[0m\u001b[1m \u001b[0m┃\n", + "┡━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━━┩\n", + "│\u001b[36m \u001b[0m\u001b[36m test/accuracy \u001b[0m\u001b[36m \u001b[0m│\u001b[35m \u001b[0m\u001b[35m 0.4000000059604645 \u001b[0m\u001b[35m \u001b[0m│\n", + "└───────────────────────────┴───────────────────────────┘\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/plain": [ + "{'test/accuracy': tensor(0.4000)}" + ] + }, + "execution_count": 8, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Using Exsting Engine for OV Testing\n", + "engine.test(checkpoint=exported_model_path)" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "venv", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.13" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/src/otx/cli/cli.py b/src/otx/cli/cli.py index 7b3f8579035..2c7159f990a 100644 --- a/src/otx/cli/cli.py +++ b/src/otx/cli/cli.py @@ -231,6 +231,9 @@ def add_subcommands(self) -> None: if "callbacks" in added_arguments: sub_parser.link_arguments("callback_monitor", "callbacks.init_args.monitor") sub_parser.link_arguments("engine.work_dir", "callbacks.init_args.dirpath") + if "checkpoint" in added_arguments and "--checkpoint" in sys.argv: + # This is code for an OVModel that uses checkpoint in model.model_name. + sub_parser.link_arguments("checkpoint", "model.init_args.model_name") # Load default subcommand config file default_config_file = get_otx_root_path() / "recipe" / "_base_" / f"{subcommand}.yaml" diff --git a/src/otx/core/model/entity/classification.py b/src/otx/core/model/entity/classification.py index 9361f35926f..d07fb96ed34 100644 --- a/src/otx/core/model/entity/classification.py +++ b/src/otx/core/model/entity/classification.py @@ -498,6 +498,26 @@ class OVMulticlassClassificationModel( and create the OTX classification model compatible for OTX testing pipeline. """ + def __init__( + self, + num_classes: int, + model_name: str, + model_type: str = "Classification", + async_inference: bool = True, + max_num_requests: int | None = None, + use_throughput_mode: bool = False, + model_api_configuration: dict[str, Any] | None = None, + ) -> None: + super().__init__( + num_classes, + model_name, + model_type, + async_inference, + max_num_requests, + use_throughput_mode, + model_api_configuration, + ) + def _customize_outputs( self, outputs: list[ClassificationResult], @@ -528,7 +548,7 @@ def __init__( self, num_classes: int, model_name: str, - model_type: str, + model_type: str = "Classification", async_inference: bool = True, max_num_requests: int | None = None, use_throughput_mode: bool = True, @@ -590,7 +610,7 @@ def __init__( self, num_classes: int, model_name: str, - model_type: str, + model_type: str = "Classification", async_inference: bool = True, max_num_requests: int | None = None, use_throughput_mode: bool = True, diff --git a/src/otx/core/model/entity/detection.py b/src/otx/core/model/entity/detection.py index 2fb8a7fcb00..1dbfd24e2c9 100644 --- a/src/otx/core/model/entity/detection.py +++ b/src/otx/core/model/entity/detection.py @@ -283,6 +283,26 @@ class OVDetectionModel(OVModel[DetBatchDataEntity, DetBatchPredEntity]): and create the OTX detection model compatible for OTX testing pipeline. """ + def __init__( + self, + num_classes: int, + model_name: str, + model_type: str = "SSD", + async_inference: bool = True, + max_num_requests: int | None = None, + use_throughput_mode: bool = True, + model_api_configuration: dict[str, Any] | None = None, + ) -> None: + super().__init__( + num_classes, + model_name, + model_type, + async_inference, + max_num_requests, + use_throughput_mode, + model_api_configuration, + ) + def _customize_outputs( self, outputs: list[DetectionResult], diff --git a/src/otx/core/model/entity/instance_segmentation.py b/src/otx/core/model/entity/instance_segmentation.py index a75668d4d30..124046ed82b 100644 --- a/src/otx/core/model/entity/instance_segmentation.py +++ b/src/otx/core/model/entity/instance_segmentation.py @@ -280,6 +280,26 @@ class OVInstanceSegmentationModel( and create the OTX detection model compatible for OTX testing pipeline. """ + def __init__( + self, + num_classes: int, + model_name: str, + model_type: str = "MaskRCNN", + async_inference: bool = True, + max_num_requests: int | None = None, + use_throughput_mode: bool = True, + model_api_configuration: dict[str, Any] | None = None, + ) -> None: + super().__init__( + num_classes, + model_name, + model_type, + async_inference, + max_num_requests, + use_throughput_mode, + model_api_configuration, + ) + def _customize_outputs( self, outputs: list[InstanceSegmentationResult], diff --git a/src/otx/core/model/entity/segmentation.py b/src/otx/core/model/entity/segmentation.py index f49c7648018..79c2d5d63e1 100644 --- a/src/otx/core/model/entity/segmentation.py +++ b/src/otx/core/model/entity/segmentation.py @@ -159,6 +159,26 @@ class OVSegmentationModel(OVModel[SegBatchDataEntity, SegBatchPredEntity]): and create the OTX segmentation model compatible for OTX testing pipeline. """ + def __init__( + self, + num_classes: int, + model_name: str, + model_type: str = "Segmentation", + async_inference: bool = True, + max_num_requests: int | None = None, + use_throughput_mode: bool = True, + model_api_configuration: dict[str, Any] | None = None, + ) -> None: + super().__init__( + num_classes, + model_name, + model_type, + async_inference, + max_num_requests, + use_throughput_mode, + model_api_configuration, + ) + def _customize_outputs( self, outputs: list[ImageResultWithSoftPrediction], diff --git a/src/otx/engine/engine.py b/src/otx/engine/engine.py index 8e246db18b8..fffe06a1f23 100644 --- a/src/otx/engine/engine.py +++ b/src/otx/engine/engine.py @@ -15,7 +15,7 @@ from otx.core.config.device import DeviceConfig from otx.core.config.explain import ExplainConfig from otx.core.data.module import OTXDataModule -from otx.core.model.entity.base import OTXModel +from otx.core.model.entity.base import OTXModel, OVModel from otx.core.model.module.base import OTXLitModule from otx.core.types.device import DeviceType from otx.core.types.export import OTXExportFormatType @@ -276,19 +276,25 @@ def test( otx test --config --checkpoint ``` """ + model = self.model + checkpoint = checkpoint if checkpoint is not None else self.checkpoint + datamodule = datamodule if datamodule is not None else self.datamodule + + is_ir_ckpt = Path(str(checkpoint)).suffix in [".xml", ".onnx"] + if is_ir_ckpt and not isinstance(model, OVModel): + datamodule = self._auto_configurator.get_ov_datamodule() + model = self._auto_configurator.get_ov_model(model_name=str(checkpoint), meta_info=datamodule.meta_info) + lit_module = self._build_lightning_module( - model=self.model, + model=model, optimizer=self.optimizer, scheduler=self.scheduler, ) - if datamodule is None: - datamodule = self.datamodule lit_module.meta_info = datamodule.meta_info # NOTE, trainer.test takes only lightning based checkpoint. # So, it can't take the OTX1.x checkpoint. - checkpoint = checkpoint if checkpoint is not None else self.checkpoint - if checkpoint is not None: + if checkpoint is not None and not is_ir_ckpt: loaded_checkpoint = torch.load(checkpoint) lit_module.load_state_dict(loaded_checkpoint) @@ -580,6 +586,7 @@ def from_config(cls, config_path: PathLike, data_root: PathLike | None = None, * engine_config = {**config.pop("engine"), **config} engine_config.update(kwargs) + engine_config["data_root"] = data_root return cls( datamodule=datamodule, model=model, diff --git a/src/otx/engine/utils/auto_configurator.py b/src/otx/engine/utils/auto_configurator.py index 02304493544..98f7f3506e1 100644 --- a/src/otx/engine/utils/auto_configurator.py +++ b/src/otx/engine/utils/auto_configurator.py @@ -16,6 +16,7 @@ from otx.core.config.data import DataModuleConfig, SubsetConfig, TilerConfig from otx.core.data.dataset.base import LabelInfo from otx.core.data.module import OTXDataModule +from otx.core.model.entity.base import OVModel from otx.core.types.task import OTXTaskType from otx.core.utils.imports import get_otx_root_path from otx.core.utils.instantiators import partial_instantiate_class @@ -64,6 +65,16 @@ "ava": [OTXTaskType.ACTION_DETECTION], } +OVMODEL_PER_TASK = { + OTXTaskType.MULTI_CLASS_CLS: "otx.core.model.entity.classification.OVMulticlassClassificationModel", + OTXTaskType.MULTI_LABEL_CLS: "otx.core.model.entity.classification.OVMultilabelClassificationModel", + OTXTaskType.H_LABEL_CLS: "otx.core.model.entity.classification.OVHlabelClassificationModel", + OTXTaskType.DETECTION: "otx.core.model.entity.detection.OVDetectionModel", + OTXTaskType.ROTATED_DETECTION: "otx.core.model.entity.rotated_detection.OVRotatedDetectionModel", + OTXTaskType.INSTANCE_SEGMENTATION: "otx.core.model.entity.instance_segmentation.OVInstanceSegmentationModel", + OTXTaskType.SEMANTIC_SEGMENTATION: "otx.core.model.entity.segmentation.OVSegmentationModel", +} + def configure_task(data_root: PathLike) -> OTXTaskType: """Configures the task based on the given data root. @@ -170,11 +181,11 @@ def _load_default_config(self, model_name: str | None = None) -> dict: dict: The loaded configuration. Raises: - ValueError: If the task is not supported for auto-configuration. + ValueError: If the task doesn't supported for auto-configuration. """ config_file = DEFAULT_CONFIG_PER_TASK.get(self.task, None) if config_file is None: - msg = f"{self.task} is not support Auto-Configuration." + msg = f"{self.task} doesn't support Auto-Configuration." raise ValueError(msg) if model_name is not None: model_path = str(config_file).split("/") @@ -257,3 +268,48 @@ def get_scheduler(self) -> LRSchedulerCallable | None: scheduler_config = self.config.get("scheduler", None) logger.warning(f"Set Default Scheduler: {scheduler_config}") return partial_instantiate_class(init=scheduler_config) + + def get_ov_model(self, model_name: str, meta_info: LabelInfo) -> OVModel: + """Retrieves the OVModel instance based on the given model name and label information. + + Args: + model_name (str): The name of the model. + meta_info (LabelInfo): The label information. + + Returns: + OVModel: The OVModel instance. + + Raises: + NotImplementedError: If the OVModel for the given task is not supported. + """ + class_path = OVMODEL_PER_TASK.get(self.task, None) + if class_path is None: + msg = f"{self.task} is not support OVModel." + raise NotImplementedError(msg) + class_module, class_name = class_path.rsplit(".", 1) + module = __import__(class_module, fromlist=[class_name]) + ov_model = getattr(module, class_name) + return ov_model( + model_name=model_name, + num_classes=meta_info.num_classes, + ) + + def get_ov_datamodule(self) -> OTXDataModule: + """Returns an instance of OTXDataModule configured with the specified data root and data module configuration. + + Returns: + OTXDataModule: An instance of OTXDataModule. + """ + config = self._load_default_config(model_name="openvino_model") + config["data"]["config"]["data_root"] = self.data_root + data_config = config["data"]["config"].copy() + return OTXDataModule( + task=config["data"]["task"], + config=DataModuleConfig( + train_subset=SubsetConfig(**data_config.pop("train_subset")), + val_subset=SubsetConfig(**data_config.pop("val_subset")), + test_subset=SubsetConfig(**data_config.pop("test_subset")), + tile_config=TilerConfig(**data_config.pop("tile_config", {})), + **data_config, + ), + ) diff --git a/tests/integration/api/test_engine_api.py b/tests/integration/api/test_engine_api.py index 39042b1665d..51570d69c29 100644 --- a/tests/integration/api/test_engine_api.py +++ b/tests/integration/api/test_engine_api.py @@ -8,7 +8,7 @@ from otx.core.model.entity.base import OTXModel from otx.core.types.task import OTXTaskType from otx.engine import Engine -from otx.engine.utils.auto_configurator import DEFAULT_CONFIG_PER_TASK +from otx.engine.utils.auto_configurator import DEFAULT_CONFIG_PER_TASK, OVMODEL_PER_TASK @pytest.mark.parametrize("task", list(DEFAULT_CONFIG_PER_TASK)) @@ -47,3 +47,24 @@ def test_engine_from_config( test_metric = engine.test() assert len(test_metric) > 0 + + # A Task that doesn't have Export implemented yet. + # [TODO]: Enable should progress for all Tasks. + if task in [ + OTXTaskType.ACTION_CLASSIFICATION, + OTXTaskType.ACTION_DETECTION, + OTXTaskType.H_LABEL_CLS, + OTXTaskType.ROTATED_DETECTION, + OTXTaskType.VISUAL_PROMPTING, + OTXTaskType.ZERO_SHOT_VISUAL_PROMPTING, + ]: + return + + # Export IR Model + exported_model_path = engine.export() + assert exported_model_path.exists() + + # Test with IR Model + if task in OVMODEL_PER_TASK: + test_metric_from_ov_model = engine.test(checkpoint=exported_model_path, accelerator="cpu") + assert len(test_metric_from_ov_model) > 0 diff --git a/tests/integration/cli/test_cli.py b/tests/integration/cli/test_cli.py index e2ea19952f9..4b0d53957bc 100644 --- a/tests/integration/cli/test_cli.py +++ b/tests/integration/cli/test_cli.py @@ -154,19 +154,13 @@ def test_otx_e2e( assert (tmp_path_test / "outputs" / f"{format_to_file[fmt]}").exists() # 4) infer of the exported models - task = recipe.split("/")[-2] - tmp_path_test = tmp_path / f"otx_test_{model_name}" - if "_cls" in recipe: - export_test_recipe = f"src/otx/recipe/classification/{task}/openvino_model.yaml" - else: - export_test_recipe = f"src/otx/recipe/{task}/openvino_model.yaml" exported_model_path = str(tmp_path_test / "outputs" / "exported_model.xml") command_cfg = [ "otx", "test", "--config", - export_test_recipe, + recipe, "--data_root", fxt_target_dataset_per_task[task], "--engine.work_dir", @@ -174,7 +168,7 @@ def test_otx_e2e( "--engine.device", "cpu", *fxt_cli_override_command_per_task[task], - "--model.model_name", + "--checkpoint", exported_model_path, ] diff --git a/tests/integration/cli/test_export_inference.py b/tests/integration/cli/test_export_inference.py index 2c1b571cd83..e4ad87fe97d 100644 --- a/tests/integration/cli/test_export_inference.py +++ b/tests/integration/cli/test_export_inference.py @@ -189,7 +189,7 @@ def test_otx_export_infer( "--engine.device", "cpu", *fxt_cli_override_command_per_task[task], - "--model.model_name", + "--checkpoint", exported_model_path, ] @@ -232,7 +232,7 @@ def test_otx_export_infer( "--engine.device", "cpu", *fxt_cli_override_command_per_task[task], - "--model.model_name", + "--checkpoint", exported_model_path, ] From 48e667674e90914cb6d25aacaf24b95f86d8f1fe Mon Sep 17 00:00:00 2001 From: Prokofiev Kirill Date: Tue, 13 Feb 2024 09:29:31 +0100 Subject: [PATCH 2/5] Add Dice metric for semantic segmentation (#2877) * add raising an error when metric is None * added Dice. Need to debug * updated other models * align metric * change mIoU to Dice in tests * fix monitor * minor fix * fix monitor callback --- src/otx/core/data/dataset/segmentation.py | 10 +++++++--- src/otx/core/model/module/segmentation.py | 20 +++++++++---------- src/otx/engine/engine.py | 1 - .../recipe/semantic_segmentation/dino_v2.yaml | 2 +- .../semantic_segmentation/litehrnet_18.yaml | 4 ++-- .../semantic_segmentation/litehrnet_s.yaml | 4 ++-- .../semantic_segmentation/litehrnet_x.yaml | 4 ++-- .../semantic_segmentation/openvino_model.yaml | 2 +- .../semantic_segmentation/segnext_b.yaml | 2 +- .../semantic_segmentation/segnext_s.yaml | 2 +- .../semantic_segmentation/segnext_t.yaml | 2 +- .../integration/cli/test_export_inference.py | 2 +- 12 files changed, 29 insertions(+), 26 deletions(-) diff --git a/src/otx/core/data/dataset/segmentation.py b/src/otx/core/data/dataset/segmentation.py index a9b47957c33..f95e7f6ffdc 100644 --- a/src/otx/core/data/dataset/segmentation.py +++ b/src/otx/core/data/dataset/segmentation.py @@ -72,7 +72,8 @@ def __init__( def _get_item_impl(self, index: int) -> SegDataEntity | None: item = self.dm_subset.get(id=self.ids[index], subset=self.dm_subset.name) img = item.media_as(Image) - ignored_labels: list[int] = [] # This should be assigned form item + num_classes = self.meta_info.num_classes + ignored_labels: list[int] = [] img_data, img_shape = self._get_img_data_and_shape(img) # create 2D class mask. We use np.sum() since Datumaro returns 3D masks (one for each class) @@ -81,7 +82,10 @@ def _get_item_impl(self, index: int) -> SegDataEntity | None: axis=0, dtype=np.uint8, ) - + mask = torch.as_tensor(mask_anns, dtype=torch.long) + # assign possible ignored labels from dataset to max label class + 1. + # it is needed to compute mDice metric. + mask[mask == 255] = num_classes entity = SegDataEntity( image=img_data, img_info=ImageInfo( @@ -92,7 +96,7 @@ def _get_item_impl(self, index: int) -> SegDataEntity | None: ignored_labels=ignored_labels, ), gt_seg_map=tv_tensors.Mask( - torch.as_tensor(mask_anns, dtype=torch.long), + mask, ), ) return self._apply_transforms(entity) diff --git a/src/otx/core/model/module/segmentation.py b/src/otx/core/model/module/segmentation.py index e294a09126f..de028c5c40a 100644 --- a/src/otx/core/model/module/segmentation.py +++ b/src/otx/core/model/module/segmentation.py @@ -9,7 +9,7 @@ import torch from torch import Tensor -from torchmetrics import JaccardIndex +from torchmetrics import Dice from otx.core.data.entity.segmentation import ( SegBatchDataEntity, @@ -40,18 +40,18 @@ def __init__( ) num_classes = otx_model.num_classes if num_classes is None: - msg = """JaccardIndex metric cannot be used with num_classes = None. + msg = """Dice metric cannot be used with num_classes = None. Please, specify number of classes in config.""" raise RuntimeError(msg) metric_params = { - "task": "multiclass", - "num_classes": num_classes, - "ignore_index": 255, + # a hack to use ignore_index in Dice metric + "num_classes": num_classes + 1, + "ignore_index": num_classes, } - self.val_metric = JaccardIndex(**metric_params) - self.test_metric = JaccardIndex(**metric_params) + self.val_metric = Dice(**metric_params) + self.test_metric = Dice(**metric_params) def on_validation_epoch_start(self) -> None: """Callback triggered when the validation epoch starts.""" @@ -69,7 +69,7 @@ def on_test_epoch_end(self) -> None: """Callback triggered when the test epoch ends.""" self._log_metrics(self.test_metric, "test") - def _log_metrics(self, meter: JaccardIndex, key: str) -> None: + def _log_metrics(self, meter: Dice, key: str) -> None: results = meter.compute() if results is None: msg = f"{meter} has no data to compute metric or there is an error computing metric" @@ -80,7 +80,7 @@ def _log_metrics(self, meter: JaccardIndex, key: str) -> None: log.debug("Cannot log Tensor which is not scalar") return self.log( - f"{key}/mIoU", + f"{key}/{type(meter).__name__}", results, sync_dist=True, prog_bar=True, @@ -134,4 +134,4 @@ def test_step(self, inputs: SegBatchDataEntity, batch_idx: int) -> None: @property def lr_scheduler_monitor_key(self) -> str: """Metric name that the learning rate scheduler monitor.""" - return "val/mIoU" + return "val/Dice" diff --git a/src/otx/engine/engine.py b/src/otx/engine/engine.py index fffe06a1f23..64e63fe2137 100644 --- a/src/otx/engine/engine.py +++ b/src/otx/engine/engine.py @@ -238,7 +238,6 @@ def train( **fit_kwargs, ) self.checkpoint = self.trainer.checkpoint_callback.best_model_path - return self.trainer.callback_metrics def test( diff --git a/src/otx/recipe/semantic_segmentation/dino_v2.yaml b/src/otx/recipe/semantic_segmentation/dino_v2.yaml index 0941d47edc6..d5435a5d34a 100644 --- a/src/otx/recipe/semantic_segmentation/dino_v2.yaml +++ b/src/otx/recipe/semantic_segmentation/dino_v2.yaml @@ -23,7 +23,7 @@ engine: task: SEMANTIC_SEGMENTATION device: auto -callback_monitor: val/mIoU +callback_monitor: val/Dice data: ../_base_/data/mmseg_base.yaml overrides: diff --git a/src/otx/recipe/semantic_segmentation/litehrnet_18.yaml b/src/otx/recipe/semantic_segmentation/litehrnet_18.yaml index b4215984175..8956be79ecd 100644 --- a/src/otx/recipe/semantic_segmentation/litehrnet_18.yaml +++ b/src/otx/recipe/semantic_segmentation/litehrnet_18.yaml @@ -20,12 +20,12 @@ scheduler: mode: max factor: 0.5 patience: 5 - monitor: val/mIoU + monitor: val/Dice engine: task: SEMANTIC_SEGMENTATION device: auto -callback_monitor: val/mIoU +callback_monitor: val/Dice data: ../_base_/data/mmseg_base.yaml diff --git a/src/otx/recipe/semantic_segmentation/litehrnet_s.yaml b/src/otx/recipe/semantic_segmentation/litehrnet_s.yaml index f0b575b4a0d..aae8197a084 100644 --- a/src/otx/recipe/semantic_segmentation/litehrnet_s.yaml +++ b/src/otx/recipe/semantic_segmentation/litehrnet_s.yaml @@ -20,12 +20,12 @@ scheduler: mode: max factor: 0.5 patience: 5 - monitor: val/mIoU + monitor: val/Dice engine: task: SEMANTIC_SEGMENTATION device: auto -callback_monitor: val/mIoU +callback_monitor: val/Dice data: ../_base_/data/mmseg_base.yaml diff --git a/src/otx/recipe/semantic_segmentation/litehrnet_x.yaml b/src/otx/recipe/semantic_segmentation/litehrnet_x.yaml index 115c655df0e..ec6fa516248 100644 --- a/src/otx/recipe/semantic_segmentation/litehrnet_x.yaml +++ b/src/otx/recipe/semantic_segmentation/litehrnet_x.yaml @@ -20,12 +20,12 @@ scheduler: mode: max factor: 0.5 patience: 5 - monitor: val/mIoU + monitor: val/Dice engine: task: SEMANTIC_SEGMENTATION device: auto -callback_monitor: val/mIoU +callback_monitor: val/Dice data: ../_base_/data/mmseg_base.yaml diff --git a/src/otx/recipe/semantic_segmentation/openvino_model.yaml b/src/otx/recipe/semantic_segmentation/openvino_model.yaml index 98021fefee7..90004e3c9b9 100644 --- a/src/otx/recipe/semantic_segmentation/openvino_model.yaml +++ b/src/otx/recipe/semantic_segmentation/openvino_model.yaml @@ -20,7 +20,7 @@ engine: task: SEMANTIC_SEGMENTATION device: cpu -callback_monitor: val/mIoU +callback_monitor: val/Dice data: ../_base_/data/torchvision_base.yaml overrides: diff --git a/src/otx/recipe/semantic_segmentation/segnext_b.yaml b/src/otx/recipe/semantic_segmentation/segnext_b.yaml index 8655921665f..2501234f6b3 100644 --- a/src/otx/recipe/semantic_segmentation/segnext_b.yaml +++ b/src/otx/recipe/semantic_segmentation/segnext_b.yaml @@ -25,7 +25,7 @@ engine: task: SEMANTIC_SEGMENTATION device: auto -callback_monitor: val/mIoU +callback_monitor: val/Dice data: ../_base_/data/mmseg_base.yaml overrides: diff --git a/src/otx/recipe/semantic_segmentation/segnext_s.yaml b/src/otx/recipe/semantic_segmentation/segnext_s.yaml index 7c1faa86567..e319bc5ada7 100644 --- a/src/otx/recipe/semantic_segmentation/segnext_s.yaml +++ b/src/otx/recipe/semantic_segmentation/segnext_s.yaml @@ -25,7 +25,7 @@ engine: task: SEMANTIC_SEGMENTATION device: auto -callback_monitor: val/mIoU +callback_monitor: val/Dice data: ../_base_/data/mmseg_base.yaml overrides: diff --git a/src/otx/recipe/semantic_segmentation/segnext_t.yaml b/src/otx/recipe/semantic_segmentation/segnext_t.yaml index 11c78833275..e8f657de00d 100644 --- a/src/otx/recipe/semantic_segmentation/segnext_t.yaml +++ b/src/otx/recipe/semantic_segmentation/segnext_t.yaml @@ -25,7 +25,7 @@ engine: task: SEMANTIC_SEGMENTATION device: auto -callback_monitor: val/mIoU +callback_monitor: val/Dice data: ../_base_/data/mmseg_base.yaml overrides: diff --git a/tests/integration/cli/test_export_inference.py b/tests/integration/cli/test_export_inference.py index e4ad87fe97d..b31073bd1a5 100644 --- a/tests/integration/cli/test_export_inference.py +++ b/tests/integration/cli/test_export_inference.py @@ -46,7 +46,7 @@ def fxt_local_seed() -> int: TASK_NAME_TO_MAIN_METRIC_NAME = { - "semantic_segmentation": "test/mIoU", + "semantic_segmentation": "test/Dice", "multi_label_cls": "test/accuracy", "multi_class_cls": "test/accuracy", "detection": "test/map_50", From 8096c32514be4e1503c0f2f92f19bfde75a486d2 Mon Sep 17 00:00:00 2001 From: Vladislav Sovrasov Date: Tue, 13 Feb 2024 10:01:34 +0100 Subject: [PATCH 3/5] Add workaround for h-cls inference (#2904) * Fix h-cls inference * Add h_cls to inference testing --- src/otx/core/model/entity/classification.py | 38 ++++++++++++++++--- .../integration/cli/test_export_inference.py | 4 ++ 2 files changed, 36 insertions(+), 6 deletions(-) diff --git a/src/otx/core/model/entity/classification.py b/src/otx/core/model/entity/classification.py index d07fb96ed34..f3d5f80c98b 100644 --- a/src/otx/core/model/entity/classification.py +++ b/src/otx/core/model/entity/classification.py @@ -8,6 +8,7 @@ import json from typing import TYPE_CHECKING, Any +import numpy as np import torch from otx.core.data.dataset.classification import HLabelMetaInfo @@ -559,7 +560,7 @@ def __init__( self.num_multiclass_heads = num_multiclass_heads self.num_multilabel_classes = num_multilabel_classes model_api_configuration = model_api_configuration if model_api_configuration else {} - model_api_configuration.update({"hierarchical": True, "confidence_threshold": 0.0}) + model_api_configuration.update({"hierarchical": True, "output_raw_scores": True}) super().__init__( num_classes, model_name, @@ -574,7 +575,7 @@ def set_hlabel_info(self, hierarchical_info: HLabelInfo) -> None: """Set hierarchical information in model head. Since OV IR model consist of all required hierarchy information, - this method serves as placehloder + this method serves as placeholder """ if not hasattr(self.model, "hierarchical_info") or not self.model.hierarchical_info: msg = "OpenVINO IR model should have hierarchical config embedded in rt_info of the model" @@ -585,15 +586,40 @@ def _customize_outputs( outputs: list[ClassificationResult], inputs: HlabelClsBatchDataEntity, ) -> HlabelClsBatchPredEntity: - pred_labels = [torch.tensor([label[0] for label in out.top_labels], dtype=torch.long) for out in outputs] - pred_scores = [torch.tensor([label[2] for label in out.top_labels]) for out in outputs] + all_pred_labels = [] + all_pred_scores = [] + for output in outputs: + logits = output.raw_scores + predicted_labels = [] + predicted_scores = [] + cls_heads_info = self.model.hierarchical_info["cls_heads_info"] + for i in range(cls_heads_info["num_multiclass_heads"]): + logits_begin, logits_end = cls_heads_info["head_idx_to_logits_range"][str(i)] + head_logits = logits[logits_begin:logits_end] + j = np.argmax(head_logits) + predicted_labels.append(j) + predicted_scores.append(head_logits[j]) + + if cls_heads_info["num_multilabel_classes"]: + logits_begin = cls_heads_info["num_single_label_classes"] + head_logits = logits[logits_begin:] + + for i in range(head_logits.shape[0]): + predicted_scores.append(head_logits[i]) + if head_logits[i] > self.model.confidence_threshold: + predicted_labels.append(1) + else: + predicted_labels.append(0) + + all_pred_labels.append(torch.tensor(predicted_labels, dtype=torch.long)) + all_pred_scores.append(torch.tensor(predicted_scores)) return HlabelClsBatchPredEntity( batch_size=len(outputs), images=inputs.images, imgs_info=inputs.imgs_info, - scores=pred_scores, - labels=pred_labels, + scores=all_pred_scores, + labels=all_pred_labels, ) diff --git a/tests/integration/cli/test_export_inference.py b/tests/integration/cli/test_export_inference.py index b31073bd1a5..a7c53fe0e0d 100644 --- a/tests/integration/cli/test_export_inference.py +++ b/tests/integration/cli/test_export_inference.py @@ -49,6 +49,7 @@ def fxt_local_seed() -> int: "semantic_segmentation": "test/Dice", "multi_label_cls": "test/accuracy", "multi_class_cls": "test/accuracy", + "h_label_cls": "test/accuracy", "detection": "test/map_50", "instance_segmentation": "test/map_50", } @@ -264,5 +265,8 @@ def test_otx_export_infer( if "multi_label_cls/mobilenet_v3_large_light" in request.node.name: msg = "multi_label_cls/mobilenet_v3_large_light exceeds the following threshold = 0.1" pytest.xfail(msg) + if "h_label_cls/efficientnet_v2_light" in request.node.name: + msg = "h_label_cls/efficientnet_v2_light exceeds the following threshold = 0.1" + pytest.xfail(msg) _check_relative_metric_diff(torch_acc, ov_acc, 0.1) From 4cd268b0d757047cdb7965c84d40df6a49e6e49a Mon Sep 17 00:00:00 2001 From: Vladislav Sovrasov Date: Tue, 13 Feb 2024 10:03:54 +0100 Subject: [PATCH 4/5] Update data pipeline for DinoV2 cls (#2898) * Update data pipeline for DinoV2 cls * Fix typos in native converter * Fix typos in mmdeploy converter --- src/otx/core/exporter/base.py | 2 +- src/otx/core/exporter/mmdeploy.py | 6 +++--- src/otx/core/exporter/native.py | 4 ++-- .../multi_class_cls/otx_dino_v2_linear_probe.yaml | 7 ++----- tests/integration/cli/test_export_inference.py | 4 +--- 5 files changed, 9 insertions(+), 14 deletions(-) diff --git a/src/otx/core/exporter/base.py b/src/otx/core/exporter/base.py index 845a06a9d59..e9838d5d16e 100644 --- a/src/otx/core/exporter/base.py +++ b/src/otx/core/exporter/base.py @@ -31,7 +31,7 @@ class OTXModelExporter: mean (tuple[float, float, float], optional): Mean values of 3 channels. Defaults to (0.0, 0.0, 0.0). std (tuple[float, float, float], optional): Std values of 3 channels. Defaults to (1.0, 1.0, 1.0). resize_mode (Literal["crop", "standard", "fit_to_window", "fit_to_window_letterbox"], optional): - A resize type for model preprocess. "standard" resizes iamges without keeping ratio. + A resize type for model preprocess. "standard" resizes images without keeping ratio. "fit_to_window" resizes images while keeping ratio. "fit_to_window_letterbox" resizes images and pads images to fit the size. Defaults to "standard". pad_value (int, optional): Padding value. Defaults to 0. diff --git a/src/otx/core/exporter/mmdeploy.py b/src/otx/core/exporter/mmdeploy.py index 158cf924596..6e378199d1b 100644 --- a/src/otx/core/exporter/mmdeploy.py +++ b/src/otx/core/exporter/mmdeploy.py @@ -41,7 +41,7 @@ class MMdeployExporter(OTXModelExporter): mean (tuple[float, float, float], optional): Mean values of 3 channels. Defaults to (0.0, 0.0, 0.0). std (tuple[float, float, float], optional): Std values of 3 channels. Defaults to (1.0, 1.0, 1.0). resize_mode (Literal["crop", "standard", "fit_to_window", "fit_to_window_letterbox"], optional): - A resize type for model preprocess. "standard" resizes iamges without keeping ratio. + A resize type for model preprocess. "standard" resizes images without keeping ratio. "fit_to_window" resizes images while keeping ratio. "fit_to_window_letterbox" resizes images and pads images to fit the size. Defaults to "standard". pad_value (int, optional): Padding value. Defaults to 0. @@ -110,7 +110,7 @@ def to_openvino( save_path = output_dir / (base_model_name + ".xml") openvino.save_model(exported_model, save_path, compress_to_fp16=(precision == OTXPrecisionType.FP16)) onnx_path.unlink() - log.info("Coverting to OpenVINO is done.") + log.info("Converting to OpenVINO is done.") return Path(save_path) @@ -141,7 +141,7 @@ def to_onnx( onnx_model = self._postprocess_onnx_model(onnx_model, embed_metadata, precision) onnx.save(onnx_model, str(save_path)) - log.info("Coverting to ONNX is done.") + log.info("Converting to ONNX is done.") return save_path diff --git a/src/otx/core/exporter/native.py b/src/otx/core/exporter/native.py index ee0a8748a03..0a4257a6b31 100644 --- a/src/otx/core/exporter/native.py +++ b/src/otx/core/exporter/native.py @@ -75,7 +75,7 @@ def to_openvino( save_path = output_dir / (base_model_name + ".xml") openvino.save_model(exported_model, save_path, compress_to_fp16=(precision == OTXPrecisionType.FP16)) - log.info("Coverting to OpenVINO is done.") + log.info("Converting to OpenVINO is done.") return Path(save_path) @@ -109,6 +109,6 @@ def to_onnx( onnx_model = self._postprocess_onnx_model(onnx_model, embed_metadata, precision) onnx.save(onnx_model, save_path) - log.info("Coverting to ONNX is done.") + log.info("Converting to ONNX is done.") return Path(save_path) diff --git a/src/otx/recipe/classification/multi_class_cls/otx_dino_v2_linear_probe.yaml b/src/otx/recipe/classification/multi_class_cls/otx_dino_v2_linear_probe.yaml index 4ea1f2d67bf..dc5e4ccde51 100644 --- a/src/otx/recipe/classification/multi_class_cls/otx_dino_v2_linear_probe.yaml +++ b/src/otx/recipe/classification/multi_class_cls/otx_dino_v2_linear_probe.yaml @@ -92,9 +92,6 @@ overrides: to_rgb: true type: Normalize - backend: cv2 - edge: short - scale: 256 - type: ResizeEdge - - crop_size: 224 - type: CenterCrop + scale: 224 + type: Resize - type: PackInputs diff --git a/tests/integration/cli/test_export_inference.py b/tests/integration/cli/test_export_inference.py index a7c53fe0e0d..552e96aad7c 100644 --- a/tests/integration/cli/test_export_inference.py +++ b/tests/integration/cli/test_export_inference.py @@ -91,9 +91,7 @@ def test_otx_export_infer( ): pytest.skip("To prevent memory bug from aborting integration test, test single model per task.") elif "tile" in recipe: - pytest.skip("Exporting tiling model isn't suppored yet.") - elif "otx_dino_v2_linear_probe" in recipe: - pytest.skip("Test pipeline is different between torch and ov model.") # NOTE Enable test after making them same + pytest.skip("Exporting models with tiling isn't supported yet.") model_name = recipe.split("/")[-1].split(".")[0] # 1) otx train From c5ad4258a29376b3f9952c9b5441b675dd6f5c8c Mon Sep 17 00:00:00 2001 From: Harim Kang Date: Wed, 14 Feb 2024 09:52:36 +0900 Subject: [PATCH 5/5] Fix to allow Scheduler to be configured as a list and fix a bug in the AdaptiveTrainingCallback (#2910) * Address Adaptive Setting issue * Fix to receive optimizers and schedulers as a list * Remove warmup 0 iter settings * Fix partial_instantiate_function * Fix minor issue * Fix for other python version * Fix minor empty issue * Fix docstring in Engine * Fix CLI type * Fix configs.callbacks is None --- .../callbacks/adaptive_train_scheduling.py | 2 +- src/otx/cli/cli.py | 23 +++++--- src/otx/cli/utils/jsonargparse.py | 2 +- .../model/module/action_classification.py | 4 +- src/otx/core/model/module/action_detection.py | 4 +- src/otx/core/model/module/base.py | 57 ++++++++----------- src/otx/core/model/module/classification.py | 12 ++-- src/otx/core/model/module/detection.py | 4 +- .../model/module/instance_segmentation.py | 4 +- .../core/model/module/rotated_detection.py | 4 +- src/otx/core/model/module/segmentation.py | 4 +- src/otx/core/model/module/visual_prompting.py | 4 +- src/otx/core/utils/instantiators.py | 23 +++++--- src/otx/engine/engine.py | 21 +++---- src/otx/engine/utils/auto_configurator.py | 8 +-- .../action/action_classification/x3d.yaml | 16 +++--- .../action/action_detection/x3d_fastrcnn.yaml | 16 +++--- .../h_label_cls/efficientnet_b0_light.yaml | 8 ++- .../h_label_cls/efficientnet_v2_light.yaml | 8 ++- .../h_label_cls/mobilenet_v3_large_light.yaml | 21 ++++--- .../h_label_cls/otx_deit_tiny.yaml | 21 ++++--- .../efficientnet_b0_light.yaml | 10 +++- .../efficientnet_v2_light.yaml | 8 ++- .../mobilenet_v3_large_light.yaml | 21 ++++--- .../multi_class_cls/otx_deit_tiny.yaml | 23 +++++--- .../multi_class_cls/otx_dino_v2.yaml | 5 ++ .../otx_dino_v2_linear_probe.yaml | 5 ++ .../multi_class_cls/otx_efficientnet_b0.yaml | 11 +++- .../multi_class_cls/otx_efficientnet_v2.yaml | 9 ++- .../otx_mobilenet_v3_large.yaml | 20 +++++-- .../efficientnet_b0_light.yaml | 8 ++- .../efficientnet_v2_light.yaml | 8 ++- .../mobilenet_v3_large_light.yaml | 21 ++++--- .../multi_label_cls/otx_deit_tiny.yaml | 21 ++++--- .../recipe/detection/atss_mobilenetv2.yaml | 16 +++--- src/otx/recipe/detection/atss_r50_fpn.yaml | 16 +++--- src/otx/recipe/detection/atss_resnext101.yaml | 16 +++--- src/otx/recipe/detection/ssd_mobilenetv2.yaml | 16 +++--- src/otx/recipe/detection/yolox_l.yaml | 16 +++--- src/otx/recipe/detection/yolox_l_tile.yaml | 16 +++--- src/otx/recipe/detection/yolox_s.yaml | 16 +++--- src/otx/recipe/detection/yolox_s_tile.yaml | 16 +++--- src/otx/recipe/detection/yolox_tiny.yaml | 16 +++--- src/otx/recipe/detection/yolox_tiny_tile.yaml | 16 +++--- src/otx/recipe/detection/yolox_x.yaml | 16 +++--- src/otx/recipe/detection/yolox_x_tile.yaml | 16 +++--- .../maskrcnn_efficientnetb2b.yaml | 17 +++--- .../maskrcnn_efficientnetb2b_tile.yaml | 17 +++--- .../instance_segmentation/maskrcnn_r50.yaml | 17 +++--- .../maskrcnn_r50_tile.yaml | 17 +++--- .../instance_segmentation/maskrcnn_swint.yaml | 17 +++--- .../maskrcnn_efficientnetb2b.yaml | 16 ++++-- .../rotated_detection/maskrcnn_r50.yaml | 16 ++++-- .../semantic_segmentation/litehrnet_18.yaml | 19 ++++--- .../semantic_segmentation/litehrnet_s.yaml | 19 ++++--- .../semantic_segmentation/litehrnet_x.yaml | 19 ++++--- .../semantic_segmentation/segnext_b.yaml | 15 +++-- .../semantic_segmentation/segnext_s.yaml | 15 +++-- .../semantic_segmentation/segnext_t.yaml | 15 +++-- .../test_adaptive_train_scheduling.py | 1 + tests/unit/core/model/module/test_base.py | 23 ++++---- .../engine/utils/test_auto_configurator.py | 14 ++++- 62 files changed, 540 insertions(+), 345 deletions(-) diff --git a/src/otx/algo/callbacks/adaptive_train_scheduling.py b/src/otx/algo/callbacks/adaptive_train_scheduling.py index ae63040f056..afeeaa0bb35 100644 --- a/src/otx/algo/callbacks/adaptive_train_scheduling.py +++ b/src/otx/algo/callbacks/adaptive_train_scheduling.py @@ -104,7 +104,7 @@ def _revert_func(config: LRSchedulerConfig, saved_frequency: int) -> None: config.frequency = saved_frequency for config in lr_configs: - if hasattr(config, "frequency"): + if hasattr(config, "frequency") and hasattr(config, "interval") and config.interval == "epoch": msg = ( "The frequency of LRscheduler will be changed due to the effect of adaptive interval: " f"{config.frequency} --> {adaptive_interval}." diff --git a/src/otx/cli/cli.py b/src/otx/cli/cli.py index 2c7159f990a..764e84794a8 100644 --- a/src/otx/cli/cli.py +++ b/src/otx/cli/cli.py @@ -156,18 +156,19 @@ def engine_subcommand_parser(**kwargs) -> ArgumentParser: sub_configs=True, ) # Optimizer & Scheduler Settings - from lightning.pytorch.cli import LRSchedulerTypeTuple + from lightning.pytorch.cli import ReduceLROnPlateau from torch.optim import Optimizer + from torch.optim.lr_scheduler import LRScheduler optim_kwargs = {"instantiate": False, "fail_untyped": False, "skip": {"params"}} scheduler_kwargs = {"instantiate": False, "fail_untyped": False, "skip": {"optimizer"}} parser.add_subclass_arguments( - baseclass=(Optimizer,), + baseclass=(Optimizer, list), nested_key="optimizer", **optim_kwargs, ) parser.add_subclass_arguments( - baseclass=LRSchedulerTypeTuple, + baseclass=(LRScheduler, ReduceLROnPlateau, list), nested_key="scheduler", **scheduler_kwargs, ) @@ -341,11 +342,17 @@ def instantiate_model(self, model_config: Namespace) -> tuple: # Update self.config with model self.config[self.subcommand].update(Namespace(model=model_config)) - optimizer_kwargs = namespace_to_dict(self.get_config_value(self.config_init, "optimizer", Namespace())) - scheduler_kwargs = namespace_to_dict(self.get_config_value(self.config_init, "scheduler", Namespace())) from otx.core.utils.instantiators import partial_instantiate_class - return model, partial_instantiate_class(optimizer_kwargs), partial_instantiate_class(scheduler_kwargs) + optimizer_kwargs = self.get_config_value(self.config_init, "optimizer", {}) + optimizer_kwargs = optimizer_kwargs if isinstance(optimizer_kwargs, list) else [optimizer_kwargs] + optimizers = partial_instantiate_class([_opt for _opt in optimizer_kwargs if _opt]) + + scheduler_kwargs = self.get_config_value(self.config_init, "scheduler", {}) + scheduler_kwargs = scheduler_kwargs if isinstance(scheduler_kwargs, list) else [scheduler_kwargs] + schedulers = partial_instantiate_class([_sch for _sch in scheduler_kwargs if _sch]) + + return model, optimizers, schedulers def get_config_value(self, config: Namespace, key: str, default: Any = None) -> Any: # noqa: ANN401 """Retrieves the value of a configuration key from the given config object. @@ -357,8 +364,10 @@ def get_config_value(self, config: Namespace, key: str, default: Any = None) -> Returns: Any: The value of the configuration key, or the default value if the key is not found. + if the value is a Namespace, it is converted to a dictionary. """ - return config.get(str(self.subcommand), config).get(key, default) + result = config.get(str(self.subcommand), config).get(key, default) + return namespace_to_dict(result) if isinstance(result, Namespace) else result def get_subcommand_parser(self, subcommand: str | None) -> ArgumentParser: """Returns the argument parser for the specified subcommand. diff --git a/src/otx/cli/utils/jsonargparse.py b/src/otx/cli/utils/jsonargparse.py index d16a1a238c6..8ea735e9d07 100644 --- a/src/otx/cli/utils/jsonargparse.py +++ b/src/otx/cli/utils/jsonargparse.py @@ -178,7 +178,7 @@ def list_override(configs: Namespace, key: str, overrides: list) -> None: ... ... ... ] """ - if key not in configs: + if key not in configs or configs[key] is None: return for target in overrides: class_path = target.get("class_path", None) diff --git a/src/otx/core/model/module/action_classification.py b/src/otx/core/model/module/action_classification.py index 867f7378283..cdc4d065982 100644 --- a/src/otx/core/model/module/action_classification.py +++ b/src/otx/core/model/module/action_classification.py @@ -28,8 +28,8 @@ def __init__( self, otx_model: OTXActionClsModel, torch_compile: bool, - optimizer: OptimizerCallable = lambda p: torch.optim.SGD(p, lr=0.01), - scheduler: LRSchedulerCallable = torch.optim.lr_scheduler.ConstantLR, + optimizer: list[OptimizerCallable] | OptimizerCallable = lambda p: torch.optim.SGD(p, lr=0.01), + scheduler: list[LRSchedulerCallable] | LRSchedulerCallable = torch.optim.lr_scheduler.ConstantLR, ): super().__init__( otx_model=otx_model, diff --git a/src/otx/core/model/module/action_detection.py b/src/otx/core/model/module/action_detection.py index cf9ff35baaf..3e5f0ba7d46 100644 --- a/src/otx/core/model/module/action_detection.py +++ b/src/otx/core/model/module/action_detection.py @@ -29,8 +29,8 @@ def __init__( self, otx_model: OTXActionDetModel, torch_compile: bool, - optimizer: OptimizerCallable = lambda p: torch.optim.SGD(p, lr=0.01), - scheduler: LRSchedulerCallable = torch.optim.lr_scheduler.ConstantLR, + optimizer: list[OptimizerCallable] | OptimizerCallable = lambda p: torch.optim.SGD(p, lr=0.01), + scheduler: list[LRSchedulerCallable] | LRSchedulerCallable = torch.optim.lr_scheduler.ConstantLR, ): super().__init__( otx_model=otx_model, diff --git a/src/otx/core/model/module/base.py b/src/otx/core/model/module/base.py index b0e90ef16c9..8dd4f0e5618 100644 --- a/src/otx/core/model/module/base.py +++ b/src/otx/core/model/module/base.py @@ -12,7 +12,6 @@ from lightning import LightningModule from torch import Tensor -from otx.algo.schedulers.warmup_schedulers import BaseWarmupScheduler from otx.core.data.entity.base import ( OTXBatchDataEntity, OTXBatchLossEntity, @@ -34,11 +33,13 @@ def __init__( self, optimizer: torch.optim.Optimizer, num_warmup_steps: int = 1000, + interval: str = "step", ): - if num_warmup_steps > 0: + if not num_warmup_steps > 0: msg = f"num_warmup_steps should be > 0, got {num_warmup_steps}" - ValueError(msg) + raise ValueError(msg) self.num_warmup_steps = num_warmup_steps + self.interval = interval super().__init__(optimizer, lambda step: min(step / num_warmup_steps, 1.0)) @@ -50,8 +51,8 @@ def __init__( *, otx_model: OTXModel, torch_compile: bool, - optimizer: OptimizerCallable = lambda p: torch.optim.SGD(p, lr=0.01), - scheduler: LRSchedulerCallable = torch.optim.lr_scheduler.ConstantLR, + optimizer: list[OptimizerCallable] | OptimizerCallable = lambda p: torch.optim.SGD(p, lr=0.01), + scheduler: list[LRSchedulerCallable] | LRSchedulerCallable = torch.optim.lr_scheduler.ConstantLR, ): super().__init__() @@ -110,7 +111,7 @@ def setup(self, stage: str) -> None: if self.torch_compile and stage == "fit": self.model = torch.compile(self.model) - def configure_optimizers(self) -> tuple[list[torch.optim.Optimizer], list[torch.optim.Optimizer]]: + def configure_optimizers(self) -> tuple[list[torch.optim.Optimizer], list[dict]]: """Choose what optimizers and learning-rate schedulers to use in your optimization. Normally you'd need one. But in the case of GANs or similar you might have multiple. @@ -120,34 +121,26 @@ def configure_optimizers(self) -> tuple[list[torch.optim.Optimizer], list[torch. :return: A dict containing the configured optimizers and learning-rate schedulers to be used for training. """ - optimizer = ( - self.hparams.optimizer(params=self.parameters()) - if callable(self.hparams.optimizer) - else self.hparams.optimizer - ) - - scheduler = ( - self.hparams.scheduler(optimizer=optimizer) if callable(self.hparams.scheduler) else self.hparams.scheduler - ) - - lr_scheduler_configs = [] - if isinstance(scheduler, BaseWarmupScheduler) and scheduler.warmup_steps > 0: - lr_scheduler_configs += [ - { - "scheduler": LinearWarmupScheduler(optimizer, num_warmup_steps=scheduler.warmup_steps), - "interval": "step", - }, - ] - lr_scheduler_configs += [ - { - "scheduler": scheduler, - "monitor": self.lr_scheduler_monitor_key, - "interval": "epoch", - "frequency": self.trainer.check_val_every_n_epoch, - }, + + def ensure_list(item: Any) -> list: # noqa: ANN401 + return item if isinstance(item, list) else [item] + + optimizers = [ + optimizer(params=self.parameters()) if callable(optimizer) else optimizer + for optimizer in ensure_list(self.hparams.optimizer) ] - return [optimizer], lr_scheduler_configs + lr_schedulers = [] + for scheduler_config in ensure_list(self.hparams.scheduler): + scheduler = scheduler_config(optimizers[0]) if callable(scheduler_config) else scheduler_config + lr_scheduler_config = {"scheduler": scheduler} + if hasattr(scheduler, "interval"): + lr_scheduler_config["interval"] = scheduler.interval + if hasattr(scheduler, "monitor"): + lr_scheduler_config["monitor"] = scheduler.monitor + lr_schedulers.append(lr_scheduler_config) + + return optimizers, lr_schedulers def register_load_state_dict_pre_hook(self, model_classes: list[str], ckpt_classes: list[str]) -> None: """Register self.model's load_state_dict_pre_hook. diff --git a/src/otx/core/model/module/classification.py b/src/otx/core/model/module/classification.py index 7505824b48c..fd1f9cf5431 100644 --- a/src/otx/core/model/module/classification.py +++ b/src/otx/core/model/module/classification.py @@ -37,8 +37,8 @@ def __init__( self, otx_model: OTXMulticlassClsModel, torch_compile: bool, - optimizer: OptimizerCallable = lambda p: torch.optim.SGD(p, lr=0.01), - scheduler: LRSchedulerCallable = torch.optim.lr_scheduler.ConstantLR, + optimizer: list[OptimizerCallable] | OptimizerCallable = lambda p: torch.optim.SGD(p, lr=0.01), + scheduler: list[LRSchedulerCallable] | LRSchedulerCallable = torch.optim.lr_scheduler.ConstantLR, ): super().__init__( otx_model=otx_model, @@ -130,8 +130,8 @@ def __init__( self, otx_model: OTXMultilabelClsModel, torch_compile: bool, - optimizer: OptimizerCallable = lambda p: torch.optim.SGD(p, lr=0.01), - scheduler: LRSchedulerCallable = torch.optim.lr_scheduler.ConstantLR, + optimizer: list[OptimizerCallable] | OptimizerCallable = lambda p: torch.optim.SGD(p, lr=0.01), + scheduler: list[LRSchedulerCallable] | LRSchedulerCallable = torch.optim.lr_scheduler.ConstantLR, ): super().__init__( otx_model=otx_model, @@ -218,8 +218,8 @@ def __init__( self, otx_model: OTXHlabelClsModel, torch_compile: bool, - optimizer: OptimizerCallable = lambda p: torch.optim.SGD(p, lr=0.01), - scheduler: LRSchedulerCallable = torch.optim.lr_scheduler.ConstantLR, + optimizer: list[OptimizerCallable] | OptimizerCallable = lambda p: torch.optim.SGD(p, lr=0.01), + scheduler: list[LRSchedulerCallable] | LRSchedulerCallable = torch.optim.lr_scheduler.ConstantLR, ): super().__init__( otx_model=otx_model, diff --git a/src/otx/core/model/module/detection.py b/src/otx/core/model/module/detection.py index 3869a5a798f..f2d9938874a 100644 --- a/src/otx/core/model/module/detection.py +++ b/src/otx/core/model/module/detection.py @@ -29,8 +29,8 @@ def __init__( self, otx_model: ExplainableOTXDetModel, torch_compile: bool, - optimizer: OptimizerCallable = lambda p: torch.optim.SGD(p, lr=0.01), - scheduler: LRSchedulerCallable = torch.optim.lr_scheduler.ConstantLR, + optimizer: list[OptimizerCallable] | OptimizerCallable = lambda p: torch.optim.SGD(p, lr=0.01), + scheduler: list[LRSchedulerCallable] | LRSchedulerCallable = torch.optim.lr_scheduler.ConstantLR, ): super().__init__( otx_model=otx_model, diff --git a/src/otx/core/model/module/instance_segmentation.py b/src/otx/core/model/module/instance_segmentation.py index 4d27ece3a4c..40bf4fb4fb3 100644 --- a/src/otx/core/model/module/instance_segmentation.py +++ b/src/otx/core/model/module/instance_segmentation.py @@ -32,8 +32,8 @@ def __init__( self, otx_model: ExplainableOTXInstanceSegModel, torch_compile: bool, - optimizer: OptimizerCallable = lambda p: torch.optim.SGD(p, lr=0.01), - scheduler: LRSchedulerCallable = torch.optim.lr_scheduler.ConstantLR, + optimizer: list[OptimizerCallable] | OptimizerCallable = lambda p: torch.optim.SGD(p, lr=0.01), + scheduler: list[LRSchedulerCallable] | LRSchedulerCallable = torch.optim.lr_scheduler.ConstantLR, ): super().__init__( otx_model=otx_model, diff --git a/src/otx/core/model/module/rotated_detection.py b/src/otx/core/model/module/rotated_detection.py index 84275563b2e..12bfd84d5a7 100644 --- a/src/otx/core/model/module/rotated_detection.py +++ b/src/otx/core/model/module/rotated_detection.py @@ -25,8 +25,8 @@ def __init__( self, otx_model: OTXRotatedDetModel, torch_compile: bool, - optimizer: OptimizerCallable = lambda p: torch.optim.SGD(p, lr=0.01), - scheduler: LRSchedulerCallable = torch.optim.lr_scheduler.ConstantLR, + optimizer: list[OptimizerCallable] | OptimizerCallable = lambda p: torch.optim.SGD(p, lr=0.01), + scheduler: list[LRSchedulerCallable] | LRSchedulerCallable = torch.optim.lr_scheduler.ConstantLR, ): super().__init__( otx_model=otx_model, diff --git a/src/otx/core/model/module/segmentation.py b/src/otx/core/model/module/segmentation.py index de028c5c40a..000b2cdea3d 100644 --- a/src/otx/core/model/module/segmentation.py +++ b/src/otx/core/model/module/segmentation.py @@ -29,8 +29,8 @@ def __init__( self, otx_model: OTXSegmentationModel, torch_compile: bool, - optimizer: OptimizerCallable = lambda p: torch.optim.SGD(p, lr=0.01), - scheduler: LRSchedulerCallable = torch.optim.lr_scheduler.ConstantLR, + optimizer: list[OptimizerCallable] | OptimizerCallable = lambda p: torch.optim.SGD(p, lr=0.01), + scheduler: list[LRSchedulerCallable] | LRSchedulerCallable = torch.optim.lr_scheduler.ConstantLR, ): super().__init__( otx_model=otx_model, diff --git a/src/otx/core/model/module/visual_prompting.py b/src/otx/core/model/module/visual_prompting.py index 599f2a06c79..68a62ef8398 100644 --- a/src/otx/core/model/module/visual_prompting.py +++ b/src/otx/core/model/module/visual_prompting.py @@ -36,8 +36,8 @@ def __init__( self, otx_model: OTXVisualPromptingModel, torch_compile: bool, - optimizer: OptimizerCallable = lambda p: torch.optim.SGD(p, lr=0.01), - scheduler: LRSchedulerCallable = torch.optim.lr_scheduler.ConstantLR, + optimizer: list[OptimizerCallable] | OptimizerCallable = lambda p: torch.optim.SGD(p, lr=0.01), + scheduler: list[LRSchedulerCallable] | LRSchedulerCallable = torch.optim.lr_scheduler.ConstantLR, ): super().__init__( otx_model=otx_model, diff --git a/src/otx/core/utils/instantiators.py b/src/otx/core/utils/instantiators.py index b19f0105cea..5ca301b1163 100644 --- a/src/otx/core/utils/instantiators.py +++ b/src/otx/core/utils/instantiators.py @@ -66,24 +66,29 @@ def instantiate_loggers(logger_cfg: list | None) -> list[Logger]: return logger -def partial_instantiate_class(init: dict | None) -> partial | None: +def partial_instantiate_class(init: list | dict | None) -> list[partial] | None: """Partially instantiates a class with the given initialization arguments. Copy from lightning.pytorch.cli.instantiate_class and modify it to use partial. Args: - init (dict): A dictionary containing the initialization arguments. - It should have the following keys: + init (list | dict | None): A dictionary containing the initialization arguments. + It should have the following each keys: - "init_args" (dict): A dictionary of keyword arguments to be passed to the class constructor. - "class_path" (str): The fully qualified path of the class to be instantiated. Returns: - partial: A partial object representing the partially instantiated class. + list[partial] | None: A partial object representing the partially instantiated class. """ if not init: return None - kwargs = init.get("init_args", {}) - class_module, class_name = init["class_path"].rsplit(".", 1) - module = __import__(class_module, fromlist=[class_name]) - args_class = getattr(module, class_name) - return partial(args_class, **kwargs) + if not isinstance(init, list): + init = [init] + items: list[partial] = [] + for item in init: + kwargs = item.get("init_args", {}) + class_module, class_name = item["class_path"].rsplit(".", 1) + module = __import__(class_module, fromlist=[class_name]) + args_class = getattr(module, class_name) + items.append(partial(args_class, **kwargs)) + return items diff --git a/src/otx/engine/engine.py b/src/otx/engine/engine.py index 64e63fe2137..694a29827ac 100644 --- a/src/otx/engine/engine.py +++ b/src/otx/engine/engine.py @@ -83,8 +83,8 @@ def __init__( work_dir: PathLike = "./otx-workspace", datamodule: OTXDataModule | None = None, model: OTXModel | str | None = None, - optimizer: OptimizerCallable | None = None, - scheduler: LRSchedulerCallable | None = None, + optimizer: list[OptimizerCallable] | OptimizerCallable | None = None, + scheduler: list[LRSchedulerCallable] | LRSchedulerCallable | None = None, checkpoint: PathLike | None = None, device: DeviceType = DeviceType.auto, **kwargs, @@ -97,9 +97,10 @@ def __init__( work_dir (PathLike, optional): Working directory for the engine. Defaults to "./otx-workspace". datamodule (OTXDataModule | None, optional): The data module for the engine. Defaults to None. model (OTXModel | str | None, optional): The model for the engine. Defaults to None. - optimizer (OptimizerCallable | None, optional): The optimizer for the engine. Defaults to None. - scheduler (LRSchedulerCallable | None, optional): The learning rate scheduler for the engine. + optimizer (list[OptimizerCallable] | OptimizerCallable | None, optional): The optimizer for the engine. Defaults to None. + scheduler (list[LRSchedulerCallable] | LRSchedulerCallable | None, optional): + The learning rate scheduler for the engine. Defaults to None. checkpoint (PathLike | None, optional): Path to the checkpoint file. Defaults to None. device (DeviceType, optional): The device type to use. Defaults to DeviceType.auto. **kwargs: Additional keyword arguments for pl.Trainer. @@ -132,10 +133,10 @@ def __init__( meta_info=self._datamodule.meta_info if self._datamodule is not None else None, ) ) - self.optimizer: OptimizerCallable | None = ( + self.optimizer: list[OptimizerCallable] | OptimizerCallable | None = ( optimizer if optimizer is not None else self._auto_configurator.get_optimizer() ) - self.scheduler: LRSchedulerCallable | None = ( + self.scheduler: list[LRSchedulerCallable] | LRSchedulerCallable | None = ( scheduler if scheduler is not None else self._auto_configurator.get_scheduler() ) @@ -667,15 +668,15 @@ def datamodule(self) -> OTXDataModule: def _build_lightning_module( self, model: OTXModel, - optimizer: OptimizerCallable, - scheduler: LRSchedulerCallable, + optimizer: list[OptimizerCallable] | OptimizerCallable | None, + scheduler: list[LRSchedulerCallable] | LRSchedulerCallable | None, ) -> OTXLitModule: """Builds a LightningModule for engine workflow. Args: model (OTXModel): The OTXModel instance. - optimizer (OptimizerCallable): The optimizer callable. - scheduler (LRSchedulerCallable): The learning rate scheduler callable. + optimizer (list[OptimizerCallable] | OptimizerCallable | None): The optimizer callable. + scheduler (list[LRSchedulerCallable] | LRSchedulerCallable | None): The learning rate scheduler callable. Returns: OTXLitModule: The built LightningModule instance. diff --git a/src/otx/engine/utils/auto_configurator.py b/src/otx/engine/utils/auto_configurator.py index 98f7f3506e1..edeceb9ce50 100644 --- a/src/otx/engine/utils/auto_configurator.py +++ b/src/otx/engine/utils/auto_configurator.py @@ -249,21 +249,21 @@ def get_model(self, model_name: str | None = None, meta_info: LabelInfo | None = logger.warning(f"Set Default Model: {self.config['model']}") return instantiate_class(args=(), init=self.config["model"]) - def get_optimizer(self) -> OptimizerCallable | None: + def get_optimizer(self) -> list[OptimizerCallable] | None: """Returns the optimizer callable based on the configuration. Returns: - OptimizerCallable | None: The optimizer callable. + list[OptimizerCallable] | None: The optimizer callable. """ optimizer_config = self.config.get("optimizer", None) logger.warning(f"Set Default Optimizer: {optimizer_config}") return partial_instantiate_class(init=optimizer_config) - def get_scheduler(self) -> LRSchedulerCallable | None: + def get_scheduler(self) -> list[LRSchedulerCallable] | None: """Returns the instantiated scheduler based on the configuration. Returns: - LRSchedulerCallable | None: The instantiated scheduler. + list[LRSchedulerCallable] | None: The instantiated scheduler. """ scheduler_config = self.config.get("scheduler", None) logger.warning(f"Set Default Scheduler: {scheduler_config}") diff --git a/src/otx/recipe/action/action_classification/x3d.yaml b/src/otx/recipe/action/action_classification/x3d.yaml index 98fa0340eae..e43d0b2c1e9 100644 --- a/src/otx/recipe/action/action_classification/x3d.yaml +++ b/src/otx/recipe/action/action_classification/x3d.yaml @@ -10,13 +10,15 @@ optimizer: weight_decay: 0.0001 scheduler: - class_path: otx.algo.schedulers.WarmupReduceLROnPlateau - init_args: - warmup_steps: 100 - mode: max - factor: 0.5 - patience: 2 - monitor: val/accuracy + - class_path: otx.core.model.module.base.LinearWarmupScheduler + init_args: + num_warmup_steps: 100 + - class_path: lightning.pytorch.cli.ReduceLROnPlateau + init_args: + mode: max + factor: 0.5 + patience: 2 + monitor: val/accuracy engine: task: ACTION_CLASSIFICATION diff --git a/src/otx/recipe/action/action_detection/x3d_fastrcnn.yaml b/src/otx/recipe/action/action_detection/x3d_fastrcnn.yaml index f297429cce6..36c51f4247e 100644 --- a/src/otx/recipe/action/action_detection/x3d_fastrcnn.yaml +++ b/src/otx/recipe/action/action_detection/x3d_fastrcnn.yaml @@ -12,13 +12,15 @@ optimizer: weight_decay: 0.00001 scheduler: - class_path: otx.algo.schedulers.WarmupReduceLROnPlateau - init_args: - warmup_steps: 100 - mode: max - factor: 0.5 - patience: 2 - monitor: val/map_50 + - class_path: otx.core.model.module.base.LinearWarmupScheduler + init_args: + num_warmup_steps: 100 + - class_path: lightning.pytorch.cli.ReduceLROnPlateau + init_args: + mode: max + factor: 0.5 + patience: 2 + monitor: val/map_50 engine: task: ACTION_DETECTION diff --git a/src/otx/recipe/classification/h_label_cls/efficientnet_b0_light.yaml b/src/otx/recipe/classification/h_label_cls/efficientnet_b0_light.yaml index 6717202c843..a6b696b1497 100644 --- a/src/otx/recipe/classification/h_label_cls/efficientnet_b0_light.yaml +++ b/src/otx/recipe/classification/h_label_cls/efficientnet_b0_light.yaml @@ -11,9 +11,8 @@ optimizer: lr: 0.0049 scheduler: - class_path: otx.algo.schedulers.WarmupReduceLROnPlateau + class_path: lightning.pytorch.cli.ReduceLROnPlateau init_args: - warmup_steps: 0 mode: max factor: 0.5 patience: 1 @@ -27,6 +26,11 @@ callback_monitor: val/accuracy data: ../../_base_/data/mmpretrain_base.yaml overrides: + max_epochs: 90 + callbacks: + - class_path: lightning.pytorch.callbacks.EarlyStopping + init_args: + patience: 3 data: task: H_LABEL_CLS config: diff --git a/src/otx/recipe/classification/h_label_cls/efficientnet_v2_light.yaml b/src/otx/recipe/classification/h_label_cls/efficientnet_v2_light.yaml index 6f63839aee3..9a12f9005e9 100644 --- a/src/otx/recipe/classification/h_label_cls/efficientnet_v2_light.yaml +++ b/src/otx/recipe/classification/h_label_cls/efficientnet_v2_light.yaml @@ -13,9 +13,8 @@ optimizer: weight_decay: 0.0001 scheduler: - class_path: otx.algo.schedulers.WarmupReduceLROnPlateau + class_path: lightning.pytorch.cli.ReduceLROnPlateau init_args: - warmup_steps: 0 mode: max factor: 0.5 patience: 1 @@ -29,6 +28,11 @@ callback_monitor: val/accuracy data: ../../_base_/data/mmpretrain_base.yaml overrides: + max_epochs: 90 + callbacks: + - class_path: lightning.pytorch.callbacks.EarlyStopping + init_args: + patience: 3 data: task: H_LABEL_CLS config: diff --git a/src/otx/recipe/classification/h_label_cls/mobilenet_v3_large_light.yaml b/src/otx/recipe/classification/h_label_cls/mobilenet_v3_large_light.yaml index fb4e34bc725..12f731da739 100644 --- a/src/otx/recipe/classification/h_label_cls/mobilenet_v3_large_light.yaml +++ b/src/otx/recipe/classification/h_label_cls/mobilenet_v3_large_light.yaml @@ -13,13 +13,15 @@ optimizer: weight_decay: 0.0001 scheduler: - class_path: otx.algo.schedulers.WarmupReduceLROnPlateau - init_args: - warmup_steps: 10 - mode: max - factor: 0.5 - patience: 1 - monitor: val/accuracy + - class_path: otx.core.model.module.base.LinearWarmupScheduler + init_args: + num_warmup_steps: 10 + - class_path: lightning.pytorch.cli.ReduceLROnPlateau + init_args: + mode: max + factor: 0.5 + patience: 1 + monitor: val/accuracy engine: task: H_LABEL_CLS @@ -29,6 +31,11 @@ callback_monitor: val/accuracy data: ../../_base_/data/mmpretrain_base.yaml overrides: + max_epochs: 90 + callbacks: + - class_path: lightning.pytorch.callbacks.EarlyStopping + init_args: + patience: 3 data: task: H_LABEL_CLS config: diff --git a/src/otx/recipe/classification/h_label_cls/otx_deit_tiny.yaml b/src/otx/recipe/classification/h_label_cls/otx_deit_tiny.yaml index d837293b0a3..a6d2e62b6a3 100644 --- a/src/otx/recipe/classification/h_label_cls/otx_deit_tiny.yaml +++ b/src/otx/recipe/classification/h_label_cls/otx_deit_tiny.yaml @@ -12,13 +12,15 @@ optimizer: weight_decay: 0.05 scheduler: - class_path: otx.algo.schedulers.WarmupReduceLROnPlateau - init_args: - warmup_steps: 10 - mode: max - factor: 0.5 - patience: 1 - monitor: val/accuracy + - class_path: otx.core.model.module.base.LinearWarmupScheduler + init_args: + num_warmup_steps: 10 + - class_path: lightning.pytorch.cli.ReduceLROnPlateau + init_args: + mode: max + factor: 0.5 + patience: 1 + monitor: val/accuracy engine: task: H_LABEL_CLS @@ -28,6 +30,11 @@ callback_monitor: val/accuracy data: ../../_base_/data/mmpretrain_base.yaml overrides: + max_epochs: 90 + callbacks: + - class_path: lightning.pytorch.callbacks.EarlyStopping + init_args: + patience: 3 data: task: H_LABEL_CLS config: diff --git a/src/otx/recipe/classification/multi_class_cls/efficientnet_b0_light.yaml b/src/otx/recipe/classification/multi_class_cls/efficientnet_b0_light.yaml index ec9309a707c..7c7511b134b 100644 --- a/src/otx/recipe/classification/multi_class_cls/efficientnet_b0_light.yaml +++ b/src/otx/recipe/classification/multi_class_cls/efficientnet_b0_light.yaml @@ -12,9 +12,8 @@ optimizer: weight_decay: 0.0001 scheduler: - class_path: otx.algo.schedulers.WarmupReduceLROnPlateau + class_path: lightning.pytorch.cli.ReduceLROnPlateau init_args: - warmup_steps: 0 mode: max factor: 0.5 patience: 1 @@ -27,3 +26,10 @@ engine: callback_monitor: val/accuracy data: ../../_base_/data/mmpretrain_base.yaml + +overrides: + max_epochs: 90 + callbacks: + - class_path: lightning.pytorch.callbacks.EarlyStopping + init_args: + patience: 3 diff --git a/src/otx/recipe/classification/multi_class_cls/efficientnet_v2_light.yaml b/src/otx/recipe/classification/multi_class_cls/efficientnet_v2_light.yaml index eb50774b193..c4402ac4810 100644 --- a/src/otx/recipe/classification/multi_class_cls/efficientnet_v2_light.yaml +++ b/src/otx/recipe/classification/multi_class_cls/efficientnet_v2_light.yaml @@ -12,9 +12,8 @@ optimizer: weight_decay: 0.0001 scheduler: - class_path: otx.algo.schedulers.WarmupReduceLROnPlateau + class_path: lightning.pytorch.cli.ReduceLROnPlateau init_args: - warmup_steps: 0 mode: max factor: 0.5 patience: 1 @@ -28,6 +27,11 @@ callback_monitor: val/accuracy data: ../../_base_/data/mmpretrain_base.yaml overrides: + max_epochs: 90 + callbacks: + - class_path: lightning.pytorch.callbacks.EarlyStopping + init_args: + patience: 3 data: config: train_subset: diff --git a/src/otx/recipe/classification/multi_class_cls/mobilenet_v3_large_light.yaml b/src/otx/recipe/classification/multi_class_cls/mobilenet_v3_large_light.yaml index 200fe8b08cc..080cc830be7 100644 --- a/src/otx/recipe/classification/multi_class_cls/mobilenet_v3_large_light.yaml +++ b/src/otx/recipe/classification/multi_class_cls/mobilenet_v3_large_light.yaml @@ -12,13 +12,15 @@ optimizer: weight_decay: 0.0001 scheduler: - class_path: otx.algo.schedulers.WarmupReduceLROnPlateau - init_args: - warmup_steps: 10 - mode: max - factor: 0.5 - patience: 1 - monitor: val/accuracy + - class_path: otx.core.model.module.base.LinearWarmupScheduler + init_args: + num_warmup_steps: 10 + - class_path: lightning.pytorch.cli.ReduceLROnPlateau + init_args: + mode: max + factor: 0.5 + patience: 1 + monitor: val/accuracy engine: task: MULTI_CLASS_CLS @@ -28,6 +30,11 @@ callback_monitor: val/accuracy data: ../../_base_/data/mmpretrain_base.yaml overrides: + max_epochs: 90 + callbacks: + - class_path: lightning.pytorch.callbacks.EarlyStopping + init_args: + patience: 3 data: config: train_subset: diff --git a/src/otx/recipe/classification/multi_class_cls/otx_deit_tiny.yaml b/src/otx/recipe/classification/multi_class_cls/otx_deit_tiny.yaml index 5c3373ce98f..da0c5522854 100644 --- a/src/otx/recipe/classification/multi_class_cls/otx_deit_tiny.yaml +++ b/src/otx/recipe/classification/multi_class_cls/otx_deit_tiny.yaml @@ -10,13 +10,15 @@ optimizer: weight_decay: 0.05 scheduler: - class_path: otx.algo.schedulers.WarmupReduceLROnPlateau - init_args: - warmup_steps: 10 - mode: max - factor: 0.5 - patience: 1 - monitor: val/accuracy + - class_path: otx.core.model.module.base.LinearWarmupScheduler + init_args: + num_warmup_steps: 10 + - class_path: lightning.pytorch.cli.ReduceLROnPlateau + init_args: + mode: max + factor: 0.5 + patience: 1 + monitor: val/accuracy engine: task: MULTI_CLASS_CLS @@ -25,3 +27,10 @@ engine: callback_monitor: val/accuracy data: ../../_base_/data/mmpretrain_base.yaml + +overrides: + max_epochs: 90 + callbacks: + - class_path: lightning.pytorch.callbacks.EarlyStopping + init_args: + patience: 3 diff --git a/src/otx/recipe/classification/multi_class_cls/otx_dino_v2.yaml b/src/otx/recipe/classification/multi_class_cls/otx_dino_v2.yaml index e911b5e06a2..e69f1e1aa8e 100644 --- a/src/otx/recipe/classification/multi_class_cls/otx_dino_v2.yaml +++ b/src/otx/recipe/classification/multi_class_cls/otx_dino_v2.yaml @@ -34,6 +34,11 @@ callback_monitor: val/accuracy data: ../../_base_/data/mmpretrain_base.yaml overrides: + max_epochs: 90 + callbacks: + - class_path: lightning.pytorch.callbacks.EarlyStopping + init_args: + patience: 3 data: config: train_subset: diff --git a/src/otx/recipe/classification/multi_class_cls/otx_dino_v2_linear_probe.yaml b/src/otx/recipe/classification/multi_class_cls/otx_dino_v2_linear_probe.yaml index dc5e4ccde51..3ef45101e92 100644 --- a/src/otx/recipe/classification/multi_class_cls/otx_dino_v2_linear_probe.yaml +++ b/src/otx/recipe/classification/multi_class_cls/otx_dino_v2_linear_probe.yaml @@ -36,6 +36,11 @@ callback_monitor: val/accuracy data: ../../_base_/data/mmpretrain_base.yaml overrides: + max_epochs: 90 + callbacks: + - class_path: lightning.pytorch.callbacks.EarlyStopping + init_args: + patience: 3 data: config: train_subset: diff --git a/src/otx/recipe/classification/multi_class_cls/otx_efficientnet_b0.yaml b/src/otx/recipe/classification/multi_class_cls/otx_efficientnet_b0.yaml index 8526dc42b0d..678cf451556 100644 --- a/src/otx/recipe/classification/multi_class_cls/otx_efficientnet_b0.yaml +++ b/src/otx/recipe/classification/multi_class_cls/otx_efficientnet_b0.yaml @@ -14,10 +14,10 @@ optimizer: scheduler: class_path: lightning.pytorch.cli.ReduceLROnPlateau init_args: - mode: min + mode: max factor: 0.5 patience: 1 - monitor: train/loss + monitor: val/accuracy engine: task: MULTI_CLASS_CLS @@ -26,3 +26,10 @@ engine: callback_monitor: val/accuracy data: ../../_base_/data/mmpretrain_base.yaml + +overrides: + max_epochs: 90 + callbacks: + - class_path: lightning.pytorch.callbacks.EarlyStopping + init_args: + patience: 3 diff --git a/src/otx/recipe/classification/multi_class_cls/otx_efficientnet_v2.yaml b/src/otx/recipe/classification/multi_class_cls/otx_efficientnet_v2.yaml index 1dee8cd1331..e0a2db7f69b 100644 --- a/src/otx/recipe/classification/multi_class_cls/otx_efficientnet_v2.yaml +++ b/src/otx/recipe/classification/multi_class_cls/otx_efficientnet_v2.yaml @@ -14,10 +14,10 @@ optimizer: scheduler: class_path: lightning.pytorch.cli.ReduceLROnPlateau init_args: - mode: min + mode: max factor: 0.5 patience: 1 - monitor: train/loss + monitor: val/accuracy engine: task: MULTI_CLASS_CLS @@ -27,6 +27,11 @@ callback_monitor: val/accuracy data: ../../_base_/data/mmpretrain_base.yaml overrides: + max_epochs: 90 + callbacks: + - class_path: lightning.pytorch.callbacks.EarlyStopping + init_args: + patience: 3 data: config: train_subset: diff --git a/src/otx/recipe/classification/multi_class_cls/otx_mobilenet_v3_large.yaml b/src/otx/recipe/classification/multi_class_cls/otx_mobilenet_v3_large.yaml index 5c280d6b397..7058f87da0e 100644 --- a/src/otx/recipe/classification/multi_class_cls/otx_mobilenet_v3_large.yaml +++ b/src/otx/recipe/classification/multi_class_cls/otx_mobilenet_v3_large.yaml @@ -12,12 +12,15 @@ optimizer: weight_decay: 0.0001 scheduler: - class_path: lightning.pytorch.cli.ReduceLROnPlateau - init_args: - mode: min - factor: 0.5 - patience: 1 - monitor: train/loss + - class_path: otx.core.model.module.base.LinearWarmupScheduler + init_args: + num_warmup_steps: 10 + - class_path: lightning.pytorch.cli.ReduceLROnPlateau + init_args: + mode: max + factor: 0.5 + patience: 1 + monitor: val/accuracy engine: task: MULTI_CLASS_CLS @@ -27,6 +30,11 @@ callback_monitor: val/accuracy data: ../../_base_/data/mmpretrain_base.yaml overrides: + max_epochs: 90 + callbacks: + - class_path: lightning.pytorch.callbacks.EarlyStopping + init_args: + patience: 3 data: config: train_subset: diff --git a/src/otx/recipe/classification/multi_label_cls/efficientnet_b0_light.yaml b/src/otx/recipe/classification/multi_label_cls/efficientnet_b0_light.yaml index 7b055bb74d7..e904e4bfe12 100644 --- a/src/otx/recipe/classification/multi_label_cls/efficientnet_b0_light.yaml +++ b/src/otx/recipe/classification/multi_label_cls/efficientnet_b0_light.yaml @@ -9,9 +9,8 @@ optimizer: lr: 0.0049 scheduler: - class_path: otx.algo.schedulers.WarmupReduceLROnPlateau + class_path: lightning.pytorch.cli.ReduceLROnPlateau init_args: - warmup_steps: 0 mode: max factor: 0.5 patience: 1 @@ -25,6 +24,11 @@ callback_monitor: val/accuracy data: ../../_base_/data/mmpretrain_base.yaml overrides: + max_epochs: 90 + callbacks: + - class_path: lightning.pytorch.callbacks.EarlyStopping + init_args: + patience: 3 data: task: MULTI_LABEL_CLS config: diff --git a/src/otx/recipe/classification/multi_label_cls/efficientnet_v2_light.yaml b/src/otx/recipe/classification/multi_label_cls/efficientnet_v2_light.yaml index 7517e625995..ab0c328d6b2 100644 --- a/src/otx/recipe/classification/multi_label_cls/efficientnet_v2_light.yaml +++ b/src/otx/recipe/classification/multi_label_cls/efficientnet_v2_light.yaml @@ -11,9 +11,8 @@ optimizer: weight_decay: 0.0001 scheduler: - class_path: otx.algo.schedulers.WarmupReduceLROnPlateau + class_path: lightning.pytorch.cli.ReduceLROnPlateau init_args: - warmup_steps: 0 mode: max factor: 0.5 patience: 1 @@ -27,6 +26,11 @@ callback_monitor: val/accuracy data: ../../_base_/data/mmpretrain_base.yaml overrides: + max_epochs: 90 + callbacks: + - class_path: lightning.pytorch.callbacks.EarlyStopping + init_args: + patience: 3 data: task: MULTI_LABEL_CLS config: diff --git a/src/otx/recipe/classification/multi_label_cls/mobilenet_v3_large_light.yaml b/src/otx/recipe/classification/multi_label_cls/mobilenet_v3_large_light.yaml index 78f34991e0d..5f9f82ae0f8 100644 --- a/src/otx/recipe/classification/multi_label_cls/mobilenet_v3_large_light.yaml +++ b/src/otx/recipe/classification/multi_label_cls/mobilenet_v3_large_light.yaml @@ -11,13 +11,15 @@ optimizer: weight_decay: 0.0001 scheduler: - class_path: otx.algo.schedulers.WarmupReduceLROnPlateau - init_args: - warmup_steps: 10 - mode: max - factor: 0.5 - patience: 1 - monitor: val/accuracy + - class_path: otx.core.model.module.base.LinearWarmupScheduler + init_args: + num_warmup_steps: 10 + - class_path: lightning.pytorch.cli.ReduceLROnPlateau + init_args: + mode: max + factor: 0.5 + patience: 1 + monitor: val/accuracy engine: task: MULTI_LABEL_CLS @@ -27,6 +29,11 @@ callback_monitor: val/accuracy data: ../../_base_/data/mmpretrain_base.yaml overrides: + max_epochs: 90 + callbacks: + - class_path: lightning.pytorch.callbacks.EarlyStopping + init_args: + patience: 3 data: task: MULTI_LABEL_CLS config: diff --git a/src/otx/recipe/classification/multi_label_cls/otx_deit_tiny.yaml b/src/otx/recipe/classification/multi_label_cls/otx_deit_tiny.yaml index 2c1b64e8452..9e66ece0fbf 100644 --- a/src/otx/recipe/classification/multi_label_cls/otx_deit_tiny.yaml +++ b/src/otx/recipe/classification/multi_label_cls/otx_deit_tiny.yaml @@ -10,13 +10,15 @@ optimizer: weight_decay: 0.05 scheduler: - class_path: otx.algo.schedulers.WarmupReduceLROnPlateau - init_args: - warmup_steps: 10 - mode: max - factor: 0.5 - patience: 1 - monitor: val/accuracy + - class_path: otx.core.model.module.base.LinearWarmupScheduler + init_args: + num_warmup_steps: 10 + - class_path: lightning.pytorch.cli.ReduceLROnPlateau + init_args: + mode: max + factor: 0.5 + patience: 1 + monitor: val/accuracy engine: task: MULTI_LABEL_CLS @@ -26,6 +28,11 @@ callback_monitor: val/accuracy data: ../../_base_/data/mmpretrain_base.yaml overrides: + max_epochs: 90 + callbacks: + - class_path: lightning.pytorch.callbacks.EarlyStopping + init_args: + patience: 3 data: task: MULTI_LABEL_CLS config: diff --git a/src/otx/recipe/detection/atss_mobilenetv2.yaml b/src/otx/recipe/detection/atss_mobilenetv2.yaml index c35d5129bda..69d1cd52c7d 100644 --- a/src/otx/recipe/detection/atss_mobilenetv2.yaml +++ b/src/otx/recipe/detection/atss_mobilenetv2.yaml @@ -12,13 +12,15 @@ optimizer: weight_decay: 0.0001 scheduler: - class_path: otx.algo.schedulers.WarmupReduceLROnPlateau - init_args: - warmup_steps: 3 - mode: max - factor: 0.5 - patience: 5 - monitor: val/map_50 + - class_path: otx.core.model.module.base.LinearWarmupScheduler + init_args: + num_warmup_steps: 3 + - class_path: lightning.pytorch.cli.ReduceLROnPlateau + init_args: + mode: max + factor: 0.5 + patience: 5 + monitor: val/map_50 engine: task: DETECTION diff --git a/src/otx/recipe/detection/atss_r50_fpn.yaml b/src/otx/recipe/detection/atss_r50_fpn.yaml index 0f005a3b18d..43a20e292f4 100644 --- a/src/otx/recipe/detection/atss_r50_fpn.yaml +++ b/src/otx/recipe/detection/atss_r50_fpn.yaml @@ -10,13 +10,15 @@ optimizer: weight_decay: 0.0 scheduler: - class_path: otx.algo.schedulers.WarmupReduceLROnPlateau - init_args: - warmup_steps: 3 - mode: max - factor: 0.5 - patience: 5 - monitor: val/map_50 + - class_path: otx.core.model.module.base.LinearWarmupScheduler + init_args: + num_warmup_steps: 3 + - class_path: lightning.pytorch.cli.ReduceLROnPlateau + init_args: + mode: max + factor: 0.5 + patience: 5 + monitor: val/map_50 engine: task: DETECTION diff --git a/src/otx/recipe/detection/atss_resnext101.yaml b/src/otx/recipe/detection/atss_resnext101.yaml index f46baa1c21f..bb0a7b939f9 100644 --- a/src/otx/recipe/detection/atss_resnext101.yaml +++ b/src/otx/recipe/detection/atss_resnext101.yaml @@ -12,13 +12,15 @@ optimizer: weight_decay: 0.0001 scheduler: - class_path: otx.algo.schedulers.WarmupReduceLROnPlateau - init_args: - warmup_steps: 3 - mode: max - factor: 0.5 - patience: 5 - monitor: val/map_50 + - class_path: otx.core.model.module.base.LinearWarmupScheduler + init_args: + num_warmup_steps: 3 + - class_path: lightning.pytorch.cli.ReduceLROnPlateau + init_args: + mode: max + factor: 0.5 + patience: 5 + monitor: val/map_50 engine: task: DETECTION diff --git a/src/otx/recipe/detection/ssd_mobilenetv2.yaml b/src/otx/recipe/detection/ssd_mobilenetv2.yaml index 84350f1e232..09b10bc4eea 100644 --- a/src/otx/recipe/detection/ssd_mobilenetv2.yaml +++ b/src/otx/recipe/detection/ssd_mobilenetv2.yaml @@ -12,13 +12,15 @@ optimizer: weight_decay: 0.0001 scheduler: - class_path: otx.algo.schedulers.WarmupReduceLROnPlateau - init_args: - warmup_steps: 3 - mode: max - factor: 0.5 - patience: 5 - monitor: val/map_50 + - class_path: otx.core.model.module.base.LinearWarmupScheduler + init_args: + num_warmup_steps: 3 + - class_path: lightning.pytorch.cli.ReduceLROnPlateau + init_args: + mode: max + factor: 0.5 + patience: 5 + monitor: val/map_50 engine: task: DETECTION diff --git a/src/otx/recipe/detection/yolox_l.yaml b/src/otx/recipe/detection/yolox_l.yaml index 01c83b61e1d..690f7bfd4f4 100644 --- a/src/otx/recipe/detection/yolox_l.yaml +++ b/src/otx/recipe/detection/yolox_l.yaml @@ -12,13 +12,15 @@ optimizer: weight_decay: 0.0001 scheduler: - class_path: otx.algo.schedulers.WarmupReduceLROnPlateau - init_args: - warmup_steps: 3 - mode: max - factor: 0.5 - patience: 5 - monitor: val/map_50 + - class_path: otx.core.model.module.base.LinearWarmupScheduler + init_args: + num_warmup_steps: 3 + - class_path: lightning.pytorch.cli.ReduceLROnPlateau + init_args: + mode: max + factor: 0.5 + patience: 5 + monitor: val/map_50 engine: task: DETECTION diff --git a/src/otx/recipe/detection/yolox_l_tile.yaml b/src/otx/recipe/detection/yolox_l_tile.yaml index e49efa21de6..af45f977f3b 100644 --- a/src/otx/recipe/detection/yolox_l_tile.yaml +++ b/src/otx/recipe/detection/yolox_l_tile.yaml @@ -12,13 +12,15 @@ optimizer: weight_decay: 0.0001 scheduler: - class_path: otx.algo.schedulers.WarmupReduceLROnPlateau - init_args: - warmup_steps: 3 - mode: max - factor: 0.5 - patience: 5 - monitor: val/map_50 + - class_path: otx.core.model.module.base.LinearWarmupScheduler + init_args: + num_warmup_steps: 3 + - class_path: lightning.pytorch.cli.ReduceLROnPlateau + init_args: + mode: max + factor: 0.5 + patience: 5 + monitor: val/map_50 engine: task: DETECTION diff --git a/src/otx/recipe/detection/yolox_s.yaml b/src/otx/recipe/detection/yolox_s.yaml index c2b5878ad1d..0bf3a268446 100644 --- a/src/otx/recipe/detection/yolox_s.yaml +++ b/src/otx/recipe/detection/yolox_s.yaml @@ -12,13 +12,15 @@ optimizer: weight_decay: 0.0001 scheduler: - class_path: otx.algo.schedulers.WarmupReduceLROnPlateau - init_args: - warmup_steps: 3 - mode: max - factor: 0.5 - patience: 5 - monitor: val/map_50 + - class_path: otx.core.model.module.base.LinearWarmupScheduler + init_args: + num_warmup_steps: 3 + - class_path: lightning.pytorch.cli.ReduceLROnPlateau + init_args: + mode: max + factor: 0.5 + patience: 5 + monitor: val/map_50 engine: task: DETECTION diff --git a/src/otx/recipe/detection/yolox_s_tile.yaml b/src/otx/recipe/detection/yolox_s_tile.yaml index 21da346c6b9..5f44cbe8964 100644 --- a/src/otx/recipe/detection/yolox_s_tile.yaml +++ b/src/otx/recipe/detection/yolox_s_tile.yaml @@ -12,13 +12,15 @@ optimizer: weight_decay: 0.0001 scheduler: - class_path: otx.algo.schedulers.WarmupReduceLROnPlateau - init_args: - warmup_steps: 3 - mode: max - factor: 0.5 - patience: 5 - monitor: val/map_50 + - class_path: otx.core.model.module.base.LinearWarmupScheduler + init_args: + num_warmup_steps: 3 + - class_path: lightning.pytorch.cli.ReduceLROnPlateau + init_args: + mode: max + factor: 0.5 + patience: 5 + monitor: val/map_50 engine: task: DETECTION diff --git a/src/otx/recipe/detection/yolox_tiny.yaml b/src/otx/recipe/detection/yolox_tiny.yaml index fafc40e6bfc..9997a0022e8 100644 --- a/src/otx/recipe/detection/yolox_tiny.yaml +++ b/src/otx/recipe/detection/yolox_tiny.yaml @@ -11,13 +11,15 @@ optimizer: weight_decay: 0.0001 scheduler: - class_path: otx.algo.schedulers.WarmupReduceLROnPlateau - init_args: - warmup_steps: 3 - mode: max - factor: 0.5 - patience: 5 - monitor: val/map_50 + - class_path: otx.core.model.module.base.LinearWarmupScheduler + init_args: + num_warmup_steps: 3 + - class_path: lightning.pytorch.cli.ReduceLROnPlateau + init_args: + mode: max + factor: 0.5 + patience: 5 + monitor: val/map_50 engine: task: DETECTION diff --git a/src/otx/recipe/detection/yolox_tiny_tile.yaml b/src/otx/recipe/detection/yolox_tiny_tile.yaml index f3633769e81..54d37914817 100644 --- a/src/otx/recipe/detection/yolox_tiny_tile.yaml +++ b/src/otx/recipe/detection/yolox_tiny_tile.yaml @@ -12,13 +12,15 @@ optimizer: weight_decay: 0.0001 scheduler: - class_path: otx.algo.schedulers.WarmupReduceLROnPlateau - init_args: - warmup_steps: 3 - mode: max - factor: 0.5 - patience: 5 - monitor: val/map_50 + - class_path: otx.core.model.module.base.LinearWarmupScheduler + init_args: + num_warmup_steps: 3 + - class_path: lightning.pytorch.cli.ReduceLROnPlateau + init_args: + mode: max + factor: 0.5 + patience: 5 + monitor: val/map_50 engine: task: DETECTION diff --git a/src/otx/recipe/detection/yolox_x.yaml b/src/otx/recipe/detection/yolox_x.yaml index cb3496f9e2a..68162a0164d 100644 --- a/src/otx/recipe/detection/yolox_x.yaml +++ b/src/otx/recipe/detection/yolox_x.yaml @@ -12,13 +12,15 @@ optimizer: weight_decay: 0.0001 scheduler: - class_path: otx.algo.schedulers.WarmupReduceLROnPlateau - init_args: - warmup_steps: 3 - mode: max - factor: 0.5 - patience: 5 - monitor: val/map_50 + - class_path: otx.core.model.module.base.LinearWarmupScheduler + init_args: + num_warmup_steps: 3 + - class_path: lightning.pytorch.cli.ReduceLROnPlateau + init_args: + mode: max + factor: 0.5 + patience: 5 + monitor: val/map_50 engine: task: DETECTION diff --git a/src/otx/recipe/detection/yolox_x_tile.yaml b/src/otx/recipe/detection/yolox_x_tile.yaml index d7d23780abf..f5c74413cf8 100644 --- a/src/otx/recipe/detection/yolox_x_tile.yaml +++ b/src/otx/recipe/detection/yolox_x_tile.yaml @@ -12,13 +12,15 @@ optimizer: weight_decay: 0.0001 scheduler: - class_path: otx.algo.schedulers.WarmupReduceLROnPlateau - init_args: - warmup_steps: 3 - mode: max - factor: 0.5 - patience: 5 - monitor: val/map_50 + - class_path: otx.core.model.module.base.LinearWarmupScheduler + init_args: + num_warmup_steps: 3 + - class_path: lightning.pytorch.cli.ReduceLROnPlateau + init_args: + mode: max + factor: 0.5 + patience: 5 + monitor: val/map_50 engine: task: DETECTION diff --git a/src/otx/recipe/instance_segmentation/maskrcnn_efficientnetb2b.yaml b/src/otx/recipe/instance_segmentation/maskrcnn_efficientnetb2b.yaml index 4a207a8f4b6..bdba41b089f 100644 --- a/src/otx/recipe/instance_segmentation/maskrcnn_efficientnetb2b.yaml +++ b/src/otx/recipe/instance_segmentation/maskrcnn_efficientnetb2b.yaml @@ -12,13 +12,15 @@ optimizer: weight_decay: 0.001 scheduler: - class_path: otx.algo.schedulers.WarmupReduceLROnPlateau - init_args: - warmup_steps: 100 - mode: max - factor: 0.5 - patience: 5 - monitor: val/map_50 + - class_path: otx.core.model.module.base.LinearWarmupScheduler + init_args: + num_warmup_steps: 100 + - class_path: lightning.pytorch.cli.ReduceLROnPlateau + init_args: + mode: max + factor: 0.5 + patience: 5 + monitor: val/map_50 engine: task: INSTANCE_SEGMENTATION @@ -28,6 +30,7 @@ callback_monitor: val/map_50 data: ../_base_/data/mmdet_base.yaml overrides: + max_epochs: 100 data: task: INSTANCE_SEGMENTATION config: diff --git a/src/otx/recipe/instance_segmentation/maskrcnn_efficientnetb2b_tile.yaml b/src/otx/recipe/instance_segmentation/maskrcnn_efficientnetb2b_tile.yaml index aab00bcca18..c795668081c 100644 --- a/src/otx/recipe/instance_segmentation/maskrcnn_efficientnetb2b_tile.yaml +++ b/src/otx/recipe/instance_segmentation/maskrcnn_efficientnetb2b_tile.yaml @@ -12,13 +12,15 @@ optimizer: weight_decay: 0.001 scheduler: - class_path: otx.algo.schedulers.WarmupReduceLROnPlateau - init_args: - warmup_steps: 100 - mode: max - factor: 0.5 - patience: 5 - monitor: val/map_50 + - class_path: otx.core.model.module.base.LinearWarmupScheduler + init_args: + num_warmup_steps: 100 + - class_path: lightning.pytorch.cli.ReduceLROnPlateau + init_args: + mode: max + factor: 0.5 + patience: 5 + monitor: val/map_50 engine: task: INSTANCE_SEGMENTATION @@ -28,6 +30,7 @@ callback_monitor: val/map_50 data: ../_base_/data/mmdet_base.yaml overrides: + max_epochs: 100 gradient_clip_val: 35.0 data: task: INSTANCE_SEGMENTATION diff --git a/src/otx/recipe/instance_segmentation/maskrcnn_r50.yaml b/src/otx/recipe/instance_segmentation/maskrcnn_r50.yaml index 56cdb8cd1d2..2ea4a57884f 100644 --- a/src/otx/recipe/instance_segmentation/maskrcnn_r50.yaml +++ b/src/otx/recipe/instance_segmentation/maskrcnn_r50.yaml @@ -12,13 +12,15 @@ optimizer: weight_decay: 0.001 scheduler: - class_path: otx.algo.schedulers.WarmupReduceLROnPlateau - init_args: - warmup_steps: 100 - mode: max - factor: 0.5 - patience: 5 - monitor: val/map_50 + - class_path: otx.core.model.module.base.LinearWarmupScheduler + init_args: + num_warmup_steps: 100 + - class_path: lightning.pytorch.cli.ReduceLROnPlateau + init_args: + mode: max + factor: 0.5 + patience: 5 + monitor: val/map_50 engine: task: INSTANCE_SEGMENTATION @@ -28,6 +30,7 @@ callback_monitor: val/map_50 data: ../_base_/data/mmdet_base.yaml overrides: + max_epochs: 100 gradient_clip_val: 35.0 data: task: INSTANCE_SEGMENTATION diff --git a/src/otx/recipe/instance_segmentation/maskrcnn_r50_tile.yaml b/src/otx/recipe/instance_segmentation/maskrcnn_r50_tile.yaml index 9375bfff69e..398943e1744 100644 --- a/src/otx/recipe/instance_segmentation/maskrcnn_r50_tile.yaml +++ b/src/otx/recipe/instance_segmentation/maskrcnn_r50_tile.yaml @@ -12,13 +12,15 @@ optimizer: weight_decay: 0.001 scheduler: - class_path: otx.algo.schedulers.WarmupReduceLROnPlateau - init_args: - warmup_steps: 100 - mode: max - factor: 0.5 - patience: 5 - monitor: val/map_50 + - class_path: otx.core.model.module.base.LinearWarmupScheduler + init_args: + num_warmup_steps: 100 + - class_path: lightning.pytorch.cli.ReduceLROnPlateau + init_args: + mode: max + factor: 0.5 + patience: 5 + monitor: val/map_50 engine: task: INSTANCE_SEGMENTATION @@ -28,6 +30,7 @@ callback_monitor: val/map_50 data: ../_base_/data/mmdet_base.yaml overrides: + max_epochs: 100 gradient_clip_val: 35.0 data: task: INSTANCE_SEGMENTATION diff --git a/src/otx/recipe/instance_segmentation/maskrcnn_swint.yaml b/src/otx/recipe/instance_segmentation/maskrcnn_swint.yaml index 5a8a7ddf3e8..8806f46b905 100644 --- a/src/otx/recipe/instance_segmentation/maskrcnn_swint.yaml +++ b/src/otx/recipe/instance_segmentation/maskrcnn_swint.yaml @@ -10,13 +10,15 @@ optimizer: weight_decay: 0.05 scheduler: - class_path: otx.algo.schedulers.WarmupReduceLROnPlateau - init_args: - warmup_steps: 100 - mode: max - factor: 0.5 - patience: 5 - monitor: val/map_50 + - class_path: otx.core.model.module.base.LinearWarmupScheduler + init_args: + num_warmup_steps: 100 + - class_path: lightning.pytorch.cli.ReduceLROnPlateau + init_args: + mode: max + factor: 0.5 + patience: 5 + monitor: val/map_50 engine: task: INSTANCE_SEGMENTATION @@ -26,6 +28,7 @@ callback_monitor: val/map_50 data: ../_base_/data/mmdet_base.yaml overrides: + max_epochs: 100 data: task: INSTANCE_SEGMENTATION config: diff --git a/src/otx/recipe/rotated_detection/maskrcnn_efficientnetb2b.yaml b/src/otx/recipe/rotated_detection/maskrcnn_efficientnetb2b.yaml index 64e89e0850a..b5f5a9a8cfe 100644 --- a/src/otx/recipe/rotated_detection/maskrcnn_efficientnetb2b.yaml +++ b/src/otx/recipe/rotated_detection/maskrcnn_efficientnetb2b.yaml @@ -12,12 +12,15 @@ optimizer: weight_decay: 0.001 scheduler: - class_path: lightning.pytorch.cli.ReduceLROnPlateau - init_args: - mode: min - factor: 0.1 - patience: 10 - monitor: train/loss + - class_path: otx.core.model.module.base.LinearWarmupScheduler + init_args: + num_warmup_steps: 100 + - class_path: lightning.pytorch.cli.ReduceLROnPlateau + init_args: + mode: max + factor: 0.1 + patience: 10 + monitor: val/map_50 engine: task: ROTATED_DETECTION @@ -27,6 +30,7 @@ callback_monitor: val/map_50 data: ../_base_/data/mmdet_base.yaml overrides: + max_epochs: 100 data: task: ROTATED_DETECTION config: diff --git a/src/otx/recipe/rotated_detection/maskrcnn_r50.yaml b/src/otx/recipe/rotated_detection/maskrcnn_r50.yaml index 20bdc6c4ec2..415e11b66fa 100644 --- a/src/otx/recipe/rotated_detection/maskrcnn_r50.yaml +++ b/src/otx/recipe/rotated_detection/maskrcnn_r50.yaml @@ -12,12 +12,15 @@ optimizer: weight_decay: 0.001 scheduler: - class_path: lightning.pytorch.cli.ReduceLROnPlateau - init_args: - mode: min - factor: 0.1 - patience: 10 - monitor: train/loss + - class_path: otx.core.model.module.base.LinearWarmupScheduler + init_args: + num_warmup_steps: 100 + - class_path: lightning.pytorch.cli.ReduceLROnPlateau + init_args: + mode: max + factor: 0.1 + patience: 10 + monitor: val/map_50 engine: task: ROTATED_DETECTION @@ -27,6 +30,7 @@ callback_monitor: val/map_50 data: ../_base_/data/mmdet_base.yaml overrides: + max_epochs: 100 data: task: ROTATED_DETECTION config: diff --git a/src/otx/recipe/semantic_segmentation/litehrnet_18.yaml b/src/otx/recipe/semantic_segmentation/litehrnet_18.yaml index 8956be79ecd..99995b831da 100644 --- a/src/otx/recipe/semantic_segmentation/litehrnet_18.yaml +++ b/src/otx/recipe/semantic_segmentation/litehrnet_18.yaml @@ -14,13 +14,15 @@ optimizer: weight_decay: 0.0 scheduler: - class_path: otx.algo.schedulers.WarmupReduceLROnPlateau - init_args: - warmup_steps: 100 - mode: max - factor: 0.5 - patience: 5 - monitor: val/Dice + - class_path: otx.core.model.module.base.LinearWarmupScheduler + init_args: + num_warmup_steps: 100 + - class_path: lightning.pytorch.cli.ReduceLROnPlateau + init_args: + mode: max + factor: 0.5 + patience: 5 + monitor: val/Dice engine: task: SEMANTIC_SEGMENTATION @@ -29,3 +31,6 @@ engine: callback_monitor: val/Dice data: ../_base_/data/mmseg_base.yaml + +overrides: + max_epochs: 300 diff --git a/src/otx/recipe/semantic_segmentation/litehrnet_s.yaml b/src/otx/recipe/semantic_segmentation/litehrnet_s.yaml index aae8197a084..fdf93f5734c 100644 --- a/src/otx/recipe/semantic_segmentation/litehrnet_s.yaml +++ b/src/otx/recipe/semantic_segmentation/litehrnet_s.yaml @@ -14,13 +14,15 @@ optimizer: weight_decay: 0.0 scheduler: - class_path: otx.algo.schedulers.WarmupReduceLROnPlateau - init_args: - warmup_steps: 100 - mode: max - factor: 0.5 - patience: 5 - monitor: val/Dice + - class_path: otx.core.model.module.base.LinearWarmupScheduler + init_args: + num_warmup_steps: 100 + - class_path: lightning.pytorch.cli.ReduceLROnPlateau + init_args: + mode: max + factor: 0.5 + patience: 5 + monitor: val/Dice engine: task: SEMANTIC_SEGMENTATION @@ -29,3 +31,6 @@ engine: callback_monitor: val/Dice data: ../_base_/data/mmseg_base.yaml + +overrides: + max_epochs: 300 diff --git a/src/otx/recipe/semantic_segmentation/litehrnet_x.yaml b/src/otx/recipe/semantic_segmentation/litehrnet_x.yaml index ec6fa516248..3df08bb4eb3 100644 --- a/src/otx/recipe/semantic_segmentation/litehrnet_x.yaml +++ b/src/otx/recipe/semantic_segmentation/litehrnet_x.yaml @@ -14,13 +14,15 @@ optimizer: weight_decay: 0.0 scheduler: - class_path: otx.algo.schedulers.WarmupReduceLROnPlateau - init_args: - warmup_steps: 100 - mode: max - factor: 0.5 - patience: 5 - monitor: val/Dice + - class_path: otx.core.model.module.base.LinearWarmupScheduler + init_args: + num_warmup_steps: 100 + - class_path: lightning.pytorch.cli.ReduceLROnPlateau + init_args: + mode: max + factor: 0.5 + patience: 5 + monitor: val/Dice engine: task: SEMANTIC_SEGMENTATION @@ -29,3 +31,6 @@ engine: callback_monitor: val/Dice data: ../_base_/data/mmseg_base.yaml + +overrides: + max_epochs: 300 diff --git a/src/otx/recipe/semantic_segmentation/segnext_b.yaml b/src/otx/recipe/semantic_segmentation/segnext_b.yaml index 2501234f6b3..f2330302006 100644 --- a/src/otx/recipe/semantic_segmentation/segnext_b.yaml +++ b/src/otx/recipe/semantic_segmentation/segnext_b.yaml @@ -14,12 +14,14 @@ optimizer: weight_decay: 0.01 scheduler: - class_path: otx.algo.schedulers.warmup_schedulers.WarmupPolynomialLR - init_args: - warmup_steps: 20 - total_iters: 100 - power: 0.9 - last_epoch: -1 + - class_path: otx.core.model.module.base.LinearWarmupScheduler + init_args: + num_warmup_steps: 20 + - class_path: torch.optim.lr_scheduler.PolynomialLR + init_args: + total_iters: 100 + power: 0.9 + last_epoch: -1 engine: task: SEMANTIC_SEGMENTATION @@ -29,6 +31,7 @@ callback_monitor: val/Dice data: ../_base_/data/mmseg_base.yaml overrides: + max_epochs: 170 data: config: train_subset: diff --git a/src/otx/recipe/semantic_segmentation/segnext_s.yaml b/src/otx/recipe/semantic_segmentation/segnext_s.yaml index e319bc5ada7..7f814f34119 100644 --- a/src/otx/recipe/semantic_segmentation/segnext_s.yaml +++ b/src/otx/recipe/semantic_segmentation/segnext_s.yaml @@ -14,12 +14,14 @@ optimizer: weight_decay: 0.01 scheduler: - class_path: otx.algo.schedulers.warmup_schedulers.WarmupPolynomialLR - init_args: - warmup_steps: 20 - total_iters: 100 - power: 0.9 - last_epoch: -1 + - class_path: otx.core.model.module.base.LinearWarmupScheduler + init_args: + num_warmup_steps: 20 + - class_path: torch.optim.lr_scheduler.PolynomialLR + init_args: + total_iters: 100 + power: 0.9 + last_epoch: -1 engine: task: SEMANTIC_SEGMENTATION @@ -29,6 +31,7 @@ callback_monitor: val/Dice data: ../_base_/data/mmseg_base.yaml overrides: + max_epochs: 170 data: config: train_subset: diff --git a/src/otx/recipe/semantic_segmentation/segnext_t.yaml b/src/otx/recipe/semantic_segmentation/segnext_t.yaml index e8f657de00d..3de98141813 100644 --- a/src/otx/recipe/semantic_segmentation/segnext_t.yaml +++ b/src/otx/recipe/semantic_segmentation/segnext_t.yaml @@ -14,12 +14,14 @@ optimizer: weight_decay: 0.01 scheduler: - class_path: otx.algo.schedulers.warmup_schedulers.WarmupPolynomialLR - init_args: - warmup_steps: 20 - total_iters: 100 - power: 0.9 - last_epoch: -1 + - class_path: otx.core.model.module.base.LinearWarmupScheduler + init_args: + num_warmup_steps: 20 + - class_path: torch.optim.lr_scheduler.PolynomialLR + init_args: + total_iters: 100 + power: 0.9 + last_epoch: -1 engine: task: SEMANTIC_SEGMENTATION @@ -29,6 +31,7 @@ callback_monitor: val/Dice data: ../_base_/data/mmseg_base.yaml overrides: + max_epochs: 170 data: config: train_subset: diff --git a/tests/unit/algo/callbacks/test_adaptive_train_scheduling.py b/tests/unit/algo/callbacks/test_adaptive_train_scheduling.py index 0229ea0c2fa..9d019be0f0a 100644 --- a/tests/unit/algo/callbacks/test_adaptive_train_scheduling.py +++ b/tests/unit/algo/callbacks/test_adaptive_train_scheduling.py @@ -32,6 +32,7 @@ def test_callback(self, caplog) -> None: mock_lr_scheduler_config = MagicMock(spec=LRSchedulerConfig) mock_lr_scheduler_config.frequency = 1 + mock_lr_scheduler_config.interval = "epoch" mock_trainer.lr_scheduler_configs = [mock_lr_scheduler_config] with caplog.at_level(log.WARNING): diff --git a/tests/unit/core/model/module/test_base.py b/tests/unit/core/model/module/test_base.py index 7436a53a721..bf9961da165 100644 --- a/tests/unit/core/model/module/test_base.py +++ b/tests/unit/core/model/module/test_base.py @@ -8,8 +8,8 @@ from unittest.mock import MagicMock, create_autospec import pytest +from lightning.pytorch.cli import ReduceLROnPlateau from lightning.pytorch.trainer import Trainer -from otx.algo.schedulers.warmup_schedulers import WarmupReduceLROnPlateau from otx.core.model.entity.base import OTXModel from otx.core.model.module.base import LinearWarmupScheduler, OTXLitModule from torch.optim import Optimizer @@ -33,14 +33,19 @@ def optimizer_factory(*args, **kargs) -> Optimizer: # noqa: ARG001 return optimizer_factory @pytest.fixture() - def mock_scheduler(self) -> WarmupReduceLROnPlateau: - scheduler = MagicMock(spec=WarmupReduceLROnPlateau) - scheduler.warmup_steps = 10 + def mock_scheduler(self) -> list[LinearWarmupScheduler | ReduceLROnPlateau]: + scheduler_object_1 = MagicMock() + warmup_scheduler = MagicMock(spec=LinearWarmupScheduler) + warmup_scheduler.num_warmup_steps = 10 + warmup_scheduler.interval = "step" + scheduler_object_1.return_value = warmup_scheduler - def scheduler_factory(*args, **kargs) -> WarmupReduceLROnPlateau: # noqa: ARG001 - return scheduler + scheduler_object_2 = MagicMock() + lr_scheduler = MagicMock(spec=ReduceLROnPlateau) + lr_scheduler.monitor = "val/loss" + scheduler_object_2.return_value = lr_scheduler - return scheduler_factory + return [scheduler_object_1, scheduler_object_2] def test_configure_optimizers(self, mock_otx_model, mock_optimizer, mock_scheduler) -> None: module = OTXLitModule( @@ -61,7 +66,3 @@ def test_configure_optimizers(self, mock_otx_model, mock_optimizer, mock_schedul assert "scheduler" in lr_schedulers[1] assert "monitor" in lr_schedulers[1] - assert "interval" in lr_schedulers[1] - assert "frequency" in lr_schedulers[1] - - assert lr_schedulers[1]["frequency"] == 2 diff --git a/tests/unit/engine/utils/test_auto_configurator.py b/tests/unit/engine/utils/test_auto_configurator.py index aa65edf0a80..0abe3a77207 100644 --- a/tests/unit/engine/utils/test_auto_configurator.py +++ b/tests/unit/engine/utils/test_auto_configurator.py @@ -124,9 +124,19 @@ def test_get_model(self) -> None: def test_get_optimizer(self) -> None: task = OTXTaskType.SEMANTIC_SEGMENTATION auto_configurator = AutoConfigurator(task=task) - assert callable(auto_configurator.get_optimizer()) + optimizer = auto_configurator.get_optimizer() + if isinstance(optimizer, list): + for opt in optimizer: + assert callable(opt) + else: + assert callable(optimizer) def test_get_scheduler(self) -> None: task = OTXTaskType.INSTANCE_SEGMENTATION auto_configurator = AutoConfigurator(task=task) - assert callable(auto_configurator.get_scheduler()) + scheduler = auto_configurator.get_scheduler() + if isinstance(scheduler, list): + for sch in scheduler: + assert callable(sch) + else: + assert callable(scheduler)