From 4f36c14d8f7b42f70b0c2e320069c44773314ced Mon Sep 17 00:00:00 2001
From: Yunchu Lee <yunchu.lee@intel.com>
Date: Mon, 19 Feb 2024 16:12:50 +0900
Subject: [PATCH] Update weekly workflow to run perf tests (#2920)

* update weekly workflow to run perf tests

* Fix missing fixture in perf test

* update input to perf tests for weekly

---------

Co-authored-by: Songki Choi <songki.choi@intel.com>
---
 .github/workflows/perf-accuracy.yml | 30 +++++++++++++++-
 .github/workflows/perf-speed.yml    | 30 +++++++++++++++-
 .github/workflows/weekly.yml        | 56 ++++++++++-------------------
 tests/perf/test_classification.py   |  4 +--
 4 files changed, 79 insertions(+), 41 deletions(-)

diff --git a/.github/workflows/perf-accuracy.yml b/.github/workflows/perf-accuracy.yml
index 1318403c3be..ef367a6f9d1 100644
--- a/.github/workflows/perf-accuracy.yml
+++ b/.github/workflows/perf-accuracy.yml
@@ -33,6 +33,34 @@ on:
           - export
           - optimize
         default: optimize
+      artifact-prefix:
+        type: string
+        default: perf-accuracy-benchmark
+  workflow_call:
+    inputs:
+      model-type:
+        type: string
+        description: Model type to run benchmark [default, all]
+        default: default
+      data-size:
+        type: string
+        description: Dataset size to run benchmark [small, medium, large, all]
+        default: all
+      num-repeat:
+        type: number
+        description: Overrides default per-data-size number of repeat setting
+        default: 0
+      num-epoch:
+        type: number
+        description: Overrides default per-model number of epoch setting
+        default: 0
+      eval-upto:
+        type: string
+        description: The last operation to evaluate. 'optimize' means all. [train,  export, optimize]
+        default: optimize
+      artifact-prefix:
+        type: string
+        default: perf-accuracy-benchmark
 
 # Declare default permissions as read only.
 permissions: read-all
@@ -73,4 +101,4 @@ jobs:
       task: ${{ matrix.task }}
       timeout-minutes: 8640
       upload-artifact: true
-      artifact-prefix: perf-accuracy-benchmark
+      artifact-prefix: ${{ inputs.perf-accuracy-benchmark }}
diff --git a/.github/workflows/perf-speed.yml b/.github/workflows/perf-speed.yml
index 3e33a782c2b..26995b0077c 100644
--- a/.github/workflows/perf-speed.yml
+++ b/.github/workflows/perf-speed.yml
@@ -33,6 +33,34 @@ on:
           - export
           - optimize
         default: optimize
+      artifact-prefix:
+        type: string
+        default: perf-speed-benchmark
+  workflow_call:
+    inputs:
+      model-type:
+        type: string
+        description: Model type to run benchmark [default, all]
+        default: default
+      data-size:
+        type: string
+        description: Dataset size to run benchmark [small, medium, large, all]
+        default: medium
+      num-repeat:
+        type: number
+        description: Overrides default per-data-size number of repeat setting
+        default: 1
+      num-epoch:
+        type: number
+        description: Overrides default per-model number of epoch setting
+        default: 3
+      eval-upto:
+        type: string
+        description: The last operation to evaluate. 'optimize' means all [train, export, optimize]
+        default: optimize
+      artifact-prefix:
+        type: string
+        default: perf-speed-benchmark
 
 # Declare default permissions as read only.
 permissions: read-all
@@ -59,4 +87,4 @@ jobs:
       task: all
       timeout-minutes: 8640
       upload-artifact: true
-      artifact-prefix: perf-speed-benchmark
+      artifact-prefix: ${{ inputs.artifact-prefix }}
diff --git a/.github/workflows/weekly.yml b/.github/workflows/weekly.yml
index 3badd5ab79a..ceb401b21f6 100644
--- a/.github/workflows/weekly.yml
+++ b/.github/workflows/weekly.yml
@@ -10,41 +10,23 @@ on:
 permissions: read-all
 
 jobs:
-  Regression-Tests:
-    strategy:
-      fail-fast: false
-      matrix:
-        include:
-          - toxenv_task: "iseg"
-            test_dir: "tests/regression/instance_segmentation/test_instance_segmentation.py"
-            task: "instance_segmentation"
-          - toxenv_task: "iseg_t"
-            test_dir: "tests/regression/instance_segmentation/test_tiling_instance_segmentation.py"
-            task: "instance_segmentation"
-          - toxenv_task: "seg"
-            test_dir: "tests/regression/semantic_segmentation"
-            task: "segmentation"
-          - toxenv_task: "det"
-            test_dir: "tests/regression/detection"
-            task: "detection"
-          - toxenv_task: "ano"
-            test_dir: "tests/regression/anomaly"
-            task: "anomaly"
-          - toxenv_task: "act"
-            test_dir: "tests/regression/action"
-            task: "action"
-          - toxenv_task: "cls"
-            test_dir: "tests/regression/classification"
-            task: "classification"
-    name: Regression-Test-py310-${{ matrix.toxenv_task }}
-    uses: ./.github/workflows/run_tests_in_tox.yml
+  Performance-Speed-Tests:
+    name: Performance-Speed-py310
+    uses: ./.github/workflows/perf-speed.yml
     with:
-      python-version: "3.10"
-      toxenv-pyver: "py310"
-      toxenv-task: ${{ matrix.toxenv_task }}
-      tests-dir: ${{ matrix.test_dir }}
-      runs-on: "['self-hosted', 'Linux', 'X64', 'dmount']"
-      task: ${{ matrix.task }}
-      timeout-minutes: 8640
-      upload-artifact: true
-      artifact-prefix: "weekly-test-results"
+      model-type: default
+      data-size: medium
+      num-repeat: 1
+      num-epoch: 3
+      eval-upto: optimize
+      artifact-prefix: weekly-perf-speed-benchmark
+  Performance-Accuracy-Tests:
+    name: Performance-Accuracy-py310
+    uses: ./.github/workflows/perf-accuracy.yml
+    with:
+      model-type: default
+      data-size: all
+      num-repeat: 0
+      num-epoch: 0
+      eval-upto: optimize
+      artifact-prefix: weekly-perf-accuracy-benchmark
diff --git a/tests/perf/test_classification.py b/tests/perf/test_classification.py
index 820d644ae40..9397dc5413e 100644
--- a/tests/perf/test_classification.py
+++ b/tests/perf/test_classification.py
@@ -52,7 +52,7 @@ class TestPerfSingleLabelClassification:
 
     @pytest.mark.parametrize("fxt_model_id", MODEL_TEMPLATES, ids=MODEL_IDS, indirect=True)
     @pytest.mark.parametrize("fxt_benchmark", BENCHMARK_CONFIGS.items(), ids=BENCHMARK_CONFIGS.keys(), indirect=True)
-    def test_accuracy(self, fxt_model_id: str, fxt_benchmark: OTXBenchmark):
+    def test_accuracy(self, fxt_model_id: str, fxt_benchmark: OTXBenchmark, fxt_check_benchmark_result: Callable):
         """Benchmark accruacy metrics."""
         result = fxt_benchmark.run(
             model_id=fxt_model_id,
@@ -301,7 +301,7 @@ def test_accuracy(self, fxt_model_id: str, fxt_benchmark: OTXBenchmark, fxt_chec
 
     @pytest.mark.parametrize("fxt_model_id", MODEL_TEMPLATES, ids=MODEL_IDS, indirect=True)
     @pytest.mark.parametrize("fxt_benchmark", BENCHMARK_CONFIGS.items(), ids=BENCHMARK_CONFIGS.keys(), indirect=True)
-    def test_speed(self, fxt_model_id: str, fxt_benchmark: OTXBenchmark, fxt_check_benchmark_results: Callable):
+    def test_speed(self, fxt_model_id: str, fxt_benchmark: OTXBenchmark, fxt_check_benchmark_result: Callable):
         """Benchmark train time per iter / infer time per image."""
         fxt_benchmark.track_resources = True
         result = fxt_benchmark.run(