Add perf benchmark test cases for action and visual prompting v1 (#3292)

* Run command w/ subprocess.run() for better stability * Collect raw data to get seed info * Fix model-category default to all * Add action perf test cases * Add visual prompting perf test cases * Fix pre-commit
openvinotoolkit · Apr 11, 2024 · 0996359 · 0996359
1 parent a999372
commit 0996359
Show file tree

Hide file tree

Showing 5 changed files with 400 additions and 7 deletions.
diff --git a/.github/workflows/perf_benchmark.yaml b/.github/workflows/perf_benchmark.yaml
@@ -9,7 +9,7 @@ on:
         options:
           - default # speed, balance, accuracy models only
           - all # default + other models
-        default: default
+        default: all
       data-group:
         type: choice
         description: Data group to run benchmark
@@ -98,6 +98,8 @@ jobs:
       fail-fast: false
       matrix:
         include:
+          - task-short: "act"
+            task: "action"
           - task-short: "ano"
             task: "anomaly"
           - task-short: "cls"

diff --git a/tests/perf/benchmark.py b/tests/perf/benchmark.py
@@ -121,7 +121,7 @@ def load_result(result_path: str) -> pd.DataFrame | None:
         """
         # Search csv files
         if os.path.isdir(result_path):
-            csv_file_paths = glob.glob(f"{result_path}/**/exp_summary.csv", recursive=True)
+            csv_file_paths = glob.glob(f"{result_path}/**/all_exp_result.csv", recursive=True)
         else:
             csv_file_paths = [result_path]
         results = []
@@ -142,7 +142,9 @@ def load_result(result_path: str) -> pd.DataFrame | None:
 
         # Merge experiments
         data = pd.concat(results, ignore_index=True)
-        data["train_e2e_time"] = pd.to_timedelta(data["train_e2e_time"]).dt.total_seconds()  # H:M:S str -> seconds
+        if "train_e2e_time" in data:
+            data["train_e2e_time"] = pd.to_timedelta(data["train_e2e_time"]).dt.total_seconds()  # H:M:S str -> seconds
+        data = data.rename(columns={"repeat": "seed"})
         return data.set_index(["task", "model", "data_group", "data"])
 
     @staticmethod
@@ -231,6 +233,8 @@ def _set_num_epoch(model_id: str, train_params: dict, num_epoch: int):
             return  # No configurable parameter for num_epoch
         elif "stfpm" in model_id:
             train_params["learning_parameters.max_epochs"] = num_epoch
+        elif "SAM" in model_id:
+            train_params["learning_parameters.trainer.max_epochs"] = num_epoch
         else:
             train_params["learning_parameters.num_iters"] = num_epoch
 

diff --git a/tests/perf/test_action.py b/tests/perf/test_action.py
@@ -0,0 +1,203 @@
+"""OTX Action perfomance tests."""
+
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+
+import pytest
+
+from otx.cli.registry import Registry
+from typing import Callable
+from .benchmark import Benchmark
+
+
+class TestPerfActionClassification:
+    """Benchmark action classification."""
+
+    MODEL_TEMPLATES = Registry(f"src/otx/algorithms").filter(task_type="ACTION_CLASSIFICATION").templates
+    MODEL_IDS = [template.model_template_id for template in MODEL_TEMPLATES]
+
+    BENCHMARK_CONFIGS = {
+        "small": {
+            "tags": {
+                "task": "action_classification",
+            },
+            "datasets": [
+                "action/action_classification/ucf_cvat_5percent",
+            ],
+            "num_repeat": 5,
+            "num_epoch": 10,
+        },
+        "medium": {
+            "tags": {
+                "task": "action_classification",
+            },
+            "datasets": [
+                "action/action_classification/ucf_cvat_30percent",
+            ],
+            "num_repeat": 5,
+            "num_epoch": 10,
+        },
+        "large": {
+            "tags": {
+                "task": "action_classification",
+            },
+            "datasets": [
+                "action/action_classification/ucf_cvat",
+            ],
+            "num_repeat": 5,
+            "num_epoch": 3,
+        },
+    }
+
+    @pytest.mark.parametrize("fxt_model_id", MODEL_TEMPLATES, ids=MODEL_IDS, indirect=True)
+    @pytest.mark.parametrize("fxt_benchmark", BENCHMARK_CONFIGS.items(), ids=BENCHMARK_CONFIGS.keys(), indirect=True)
+    def test_perf(self, fxt_model_id: str, fxt_benchmark: Benchmark):
+        """Benchmark performance metrics."""
+        result = fxt_benchmark.run(model_id=fxt_model_id)
+        fxt_benchmark.check(
+            result,
+            criteria=[
+                {
+                    "name": "Accuracy(train)",
+                    "op": ">",
+                    "margin": 0.1,
+                },
+                {
+                    "name": "Accuracy(export)",
+                    "op": ">",
+                    "margin": 0.1,
+                },
+                {
+                    "name": "Accuracy(optimize)",
+                    "op": ">",
+                    "margin": 0.1,
+                },
+                {
+                    "name": "epoch",
+                    "op": "<",
+                    "margin": 0.1,
+                },
+                {
+                    "name": "train_e2e_time",
+                    "op": "<",
+                    "margin": 0.1,
+                },
+                {
+                    "name": "avg_data_time",
+                    "op": "<",
+                    "margin": 0.1,
+                },
+                {
+                    "name": "avg_iter_time",
+                    "op": "<",
+                    "margin": 0.1,
+                },
+                {
+                    "name": "avg_time_per_image(export)",
+                    "op": "<",
+                    "margin": 0.1,
+                },
+                {
+                    "name": "avg_time_per_image(optimize)",
+                    "op": "<",
+                    "margin": 0.1,
+                },
+            ],
+        )
+
+
+class TestPerfActionDetection:
+    """Benchmark action detection."""
+
+    MODEL_TEMPLATES = Registry(f"src/otx/algorithms").filter(task_type="ACTION_DETECTION").templates
+    MODEL_IDS = [template.model_template_id for template in MODEL_TEMPLATES]
+
+    BENCHMARK_CONFIGS = {
+        "small": {
+            "tags": {
+                "task": "action_detection",
+            },
+            "datasets": [
+                "action/action_detection/UCF101_cvat_5percent",
+            ],
+            "num_repeat": 5,
+            "num_epoch": 3,
+        },
+        "medium": {
+            "tags": {
+                "task": "action_detection",
+            },
+            "datasets": [
+                "action/action_detection/UCF101_cvat_30percent",
+            ],
+            "num_repeat": 5,
+            "num_epoch": 3,
+        },
+        "large": {
+            "tags": {
+                "task": "action_detection",
+            },
+            "datasets": [
+                "action/action_detection/UCF101_cvat",
+            ],
+            "num_repeat": 5,
+            "num_epoch": 1,
+        },
+    }
+
+    @pytest.mark.parametrize("fxt_model_id", MODEL_TEMPLATES, ids=MODEL_IDS, indirect=True)
+    @pytest.mark.parametrize("fxt_benchmark", BENCHMARK_CONFIGS.items(), ids=BENCHMARK_CONFIGS.keys(), indirect=True)
+    def test_perf(self, fxt_model_id: str, fxt_benchmark: Benchmark):
+        """Benchmark performance metrics."""
+        result = fxt_benchmark.run(model_id=fxt_model_id)
+        fxt_benchmark.check(
+            result,
+            criteria=[
+                {
+                    "name": "f-measure(train)",
+                    "op": ">",
+                    "margin": 0.1,
+                },
+                {
+                    "name": "epoch",
+                    "op": "<",
+                    "margin": 0.1,
+                },
+                {
+                    "name": "f-measure(export)",
+                    "op": ">",
+                    "margin": 0.1,
+                },
+                {
+                    "name": "f-measure(optimize)",
+                    "op": ">",
+                    "margin": 0.1,
+                },
+                {
+                    "name": "train_e2e_time",
+                    "op": "<",
+                    "margin": 0.1,
+                },
+                {
+                    "name": "avg_data_time",
+                    "op": "<",
+                    "margin": 0.1,
+                },
+                {
+                    "name": "avg_iter_time",
+                    "op": "<",
+                    "margin": 0.1,
+                },
+                {
+                    "name": "avg_time_per_image(export)",
+                    "op": "<",
+                    "margin": 0.1,
+                },
+                {
+                    "name": "avg_time_per_image(optimize)",
+                    "op": "<",
+                    "margin": 0.1,
+                },
+            ],
+        )