openvinotoolkit · goodsong81 · Nov 20, 2023 · Nov 17, 2023 · Nov 17, 2023 · Nov 17, 2023
@@ -9,6 +9,7 @@ All notable changes to this project will be documented in this file.
 - Update ModelAPI configuration(<https://github.com/openvinotoolkit/training_extensions/pull/2564>)
 - Add Anomaly modelAPI changes (<https://github.com/openvinotoolkit/training_extensions/pull/2563>)
 - Update Image numpy access (<https://github.com/openvinotoolkit/training_extensions/pull/2586>)
+- Make max_num_detections configurable (<https://github.com/openvinotoolkit/training_extensions/pull/2647>)
 
 ### Bug fixes
 

@@ -1,18 +1,7 @@
 """Base Configuration of OTX Common Algorithms."""
 
-# Copyright (C) 2022 Intel Corporation
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions
-# and limitations under the License.
+# Copyright (C) 2022-2023 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
 
 from sys import maxsize
 
@@ -227,6 +216,16 @@ class BasePostprocessing(ParameterGroup):
             affects_outcome_of=ModelLifecycle.INFERENCE,
         )
 
+        max_num_detections = configurable_integer(
+            header="Maximum number of detection per image",
+            description="Extra detection outputs will be discared in non-maximum suppression process. "
+            "Defaults to 0, which means per-model default value.",
+            default_value=0,
+            min_value=0,
+            max_value=10000,
+            affects_outcome_of=ModelLifecycle.INFERENCE,
+        )
+
         use_ellipse_shapes = configurable_boolean(
             default_value=False,
             header="Use ellipse shapes",

@@ -64,13 +64,14 @@
         ir_options=None,
         data_classes=None,
         model_classes=None,
+        max_num_detections=0,
     ):
         """Create MMCV-consumable config from given inputs."""
         logger.info(f"configure!: training={training}")
 
         self.configure_base(cfg, data_cfg, data_classes, model_classes)
         self.configure_device(cfg, training)
-        self.configure_model(cfg, ir_options)
+        self.configure_model(cfg, ir_options, max_num_detections)
         self.configure_ckpt(cfg, model_ckpt)
         self.configure_data(cfg, training, data_cfg)
         self.configure_regularization(cfg, training)
@@ -113,7 +114,7 @@
             new_classes = np.setdiff1d(data_classes, model_classes).tolist()
             train_data_cfg["new_classes"] = new_classes
 
-    def configure_model(self, cfg, ir_options):  # noqa: C901
+    def configure_model(self, cfg, ir_options, max_num_detections=0):  # noqa: C901
         """Patch config's model.
 
         Change model type to super type
@@ -149,6 +150,13 @@
                 {"model_path": ir_model_path, "weight_path": ir_weight_path, "init_weight": ir_weight_init},
             )
 
+        # Test config
+        if max_num_detections > 0:
+            logger.info(f"Model max_num_detections: {max_num_detections}")
+            test_cfg = cfg.model.test_cfg
+            test_cfg.max_per_img = max_num_detections
+            test_cfg.nms_pre = max_num_detections * 10
+
     def configure_data(self, cfg, training, data_cfg):  # noqa: C901
         """Patch cfg.data.
 

@@ -1,18 +1,7 @@
 """Task of OTX Detection using mmdetection training backend."""
 
 # Copyright (C) 2023 Intel Corporation
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions
-# and limitations under the License.
+# SPDX-License-Identifier: Apache-2.0
 
 import glob
 import io
@@ -206,6 +195,7 @@
             ir_options,
             data_classes,
             model_classes,
+            self.max_num_detections,
         )
         if should_cluster_anchors(self._recipe_cfg):
             if train_dataset is not None:
@@ -513,6 +503,12 @@
         assert len(self._precision) == 1
         export_options["precision"] = str(self._precision[0])
         export_options["type"] = str(export_format)
+        if self.max_num_detections > 0:
+            logger.info(f"Export max_num_detections: {self.max_num_detections}")
+            post_proc_cfg = export_options["deploy_cfg"]["codebase_config"]["post_processing"]
+            post_proc_cfg["max_output_boxes_per_class"] = self.max_num_detections
+            post_proc_cfg["keep_top_k"] = self.max_num_detections
+            post_proc_cfg["pre_top_k"] = self.max_num_detections * 10
 
         export_options["deploy_cfg"]["dump_features"] = dump_features
         if dump_features:

@@ -1,18 +1,7 @@
 """Openvino Task of Detection."""
 
-# Copyright (C) 2021 Intel Corporation
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions
-# and limitations under the License.
+# Copyright (C) 2021-2023 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
 
 import copy
 import io

@@ -1,18 +1,7 @@
 """Configuration file of OTX Detection."""
 
-# Copyright (C) 2022 Intel Corporation
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions
-# and limitations under the License.
+# Copyright (C) 2022-2023 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
 
 from attr import attrs
 

@@ -258,6 +258,25 @@ postprocessing:
     value: 0.01
     visible_in_ui: true
     warning: null
+  max_num_detections:
+    affects_outcome_of: INFERENCE
+    default_value: 0
+    description:
+      Extra detection outputs will be discared in non-maximum suppression process.
+      Defaults to 0, which means per-model default values.
+    editable: true
+    header: Maximum number of detections per image
+    max_value: 10000
+    min_value: 0
+    type: INTEGER
+    ui_rules:
+      action: DISABLE_EDITING
+      operator: AND
+      rules: []
+      type: UI_RULES
+    value: 0
+    visible_in_ui: true
+    warning: null
   use_ellipse_shapes:
     affects_outcome_of: INFERENCE
     default_value: false

@@ -258,6 +258,25 @@ postprocessing:
     value: 0.01
     visible_in_ui: true
     warning: null
+  max_num_detections:
+    affects_outcome_of: INFERENCE
+    default_value: 0
+    description:
+      Extra detection outputs will be discared in non-maximum suppression process.
+      Defaults to 0, which means per-model default values.
+    editable: true
+    header: Maximum number of detections per image
+    max_value: 10000
+    min_value: 0
+    type: INTEGER
+    ui_rules:
+      action: DISABLE_EDITING
+      operator: AND
+      rules: []
+      type: UI_RULES
+    value: 0
+    visible_in_ui: true
+    warning: null
   use_ellipse_shapes:
     affects_outcome_of: INFERENCE
     default_value: false

@@ -277,6 +277,25 @@ postprocessing:
     warning: null
   type: PARAMETER_GROUP
   visible_in_ui: true
+  max_num_detections:
+    affects_outcome_of: INFERENCE
+    default_value: 0
+    description:
+      Extra detection outputs will be discared in non-maximum suppression process.
+      Defaults to 0, which means per-model default values.
+    editable: true
+    header: Maximum number of detections per image
+    max_value: 10000
+    min_value: 0
+    type: INTEGER
+    ui_rules:
+      action: DISABLE_EDITING
+      operator: AND
+      rules: []
+      type: UI_RULES
+    value: 0
+    visible_in_ui: true
+    warning: null
 algo_backend:
   description: parameters for algo backend
   header: Algo backend parameters

@@ -1,18 +1,7 @@
 """Task of OTX Detection."""
 
 # Copyright (C) 2023 Intel Corporation
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions
-# and limitations under the License.
+# SPDX-License-Identifier: Apache-2.0
 
 import io
 import os
@@ -83,11 +72,13 @@ def __init__(self, task_environment: TaskEnvironment, output_path: Optional[str]
         )
         self._anchors: Dict[str, int] = {}
 
+        self.confidence_threshold = 0.0
+        self.max_num_detections = 0
         if hasattr(self._hyperparams, "postprocessing"):
             if hasattr(self._hyperparams.postprocessing, "confidence_threshold"):
                 self.confidence_threshold = self._hyperparams.postprocessing.confidence_threshold
-        else:
-            self.confidence_threshold = 0.0
+            if hasattr(self._hyperparams.postprocessing, "max_num_detections"):
+                self.max_num_detections = self._hyperparams.postprocessing.max_num_detections
 
         if task_environment.model is not None:
             self._load_model()
@@ -112,6 +103,11 @@ def _load_postprocessing(self, model_data):
             hparams.use_ellipse_shapes = loaded_postprocessing["use_ellipse_shapes"]["value"]
         else:
             hparams.use_ellipse_shapes = False
+        if "max_num_detections" in loaded_postprocessing:
+            trained_max_num_detections = loaded_postprocessing["max_num_detections"]["value"]
+            # Prefer new hparam value set by user (>0) intentionally than trained value
+            if self.max_num_detections == 0:
+                self.max_num_detections = trained_max_num_detections
 
     def _load_tiling_parameters(self, model_data):
         """Load tiling parameters from PyTorch model.

@@ -43,10 +43,12 @@ def test_configure(self, mocker):
 
         model_cfg = copy.deepcopy(self.model_cfg)
         data_cfg = copy.deepcopy(self.data_cfg)
-        returned_value = self.configurer.configure(model_cfg, self.det_dataset, "", data_cfg, True)
+        returned_value = self.configurer.configure(
+            model_cfg, self.det_dataset, "", data_cfg, True, max_num_detections=100
+        )
         mock_cfg_base.assert_called_once_with(model_cfg, data_cfg, None, None)
         mock_cfg_device.assert_called_once_with(model_cfg, True)
-        mock_cfg_model.assert_called_once_with(model_cfg, None)
+        mock_cfg_model.assert_called_once_with(model_cfg, None, 100)
         mock_cfg_ckpt.assert_called_once_with(model_cfg, "")
         mock_cfg_regularization.assert_called_once_with(model_cfg, True)
         mock_cfg_task.assert_called_once_with(model_cfg, self.det_dataset, True)