From 4f36c14d8f7b42f70b0c2e320069c44773314ced Mon Sep 17 00:00:00 2001 From: Yunchu Lee Date: Mon, 19 Feb 2024 16:12:50 +0900 Subject: [PATCH] Update weekly workflow to run perf tests (#2920) * update weekly workflow to run perf tests * Fix missing fixture in perf test * update input to perf tests for weekly --------- Co-authored-by: Songki Choi --- .github/workflows/perf-accuracy.yml | 30 +++++++++++++++- .github/workflows/perf-speed.yml | 30 +++++++++++++++- .github/workflows/weekly.yml | 56 ++++++++++------------------- tests/perf/test_classification.py | 4 +-- 4 files changed, 79 insertions(+), 41 deletions(-) diff --git a/.github/workflows/perf-accuracy.yml b/.github/workflows/perf-accuracy.yml index 1318403c3be..ef367a6f9d1 100644 --- a/.github/workflows/perf-accuracy.yml +++ b/.github/workflows/perf-accuracy.yml @@ -33,6 +33,34 @@ on: - export - optimize default: optimize + artifact-prefix: + type: string + default: perf-accuracy-benchmark + workflow_call: + inputs: + model-type: + type: string + description: Model type to run benchmark [default, all] + default: default + data-size: + type: string + description: Dataset size to run benchmark [small, medium, large, all] + default: all + num-repeat: + type: number + description: Overrides default per-data-size number of repeat setting + default: 0 + num-epoch: + type: number + description: Overrides default per-model number of epoch setting + default: 0 + eval-upto: + type: string + description: The last operation to evaluate. 'optimize' means all. [train, export, optimize] + default: optimize + artifact-prefix: + type: string + default: perf-accuracy-benchmark # Declare default permissions as read only. permissions: read-all @@ -73,4 +101,4 @@ jobs: task: ${{ matrix.task }} timeout-minutes: 8640 upload-artifact: true - artifact-prefix: perf-accuracy-benchmark + artifact-prefix: ${{ inputs.perf-accuracy-benchmark }} diff --git a/.github/workflows/perf-speed.yml b/.github/workflows/perf-speed.yml index 3e33a782c2b..26995b0077c 100644 --- a/.github/workflows/perf-speed.yml +++ b/.github/workflows/perf-speed.yml @@ -33,6 +33,34 @@ on: - export - optimize default: optimize + artifact-prefix: + type: string + default: perf-speed-benchmark + workflow_call: + inputs: + model-type: + type: string + description: Model type to run benchmark [default, all] + default: default + data-size: + type: string + description: Dataset size to run benchmark [small, medium, large, all] + default: medium + num-repeat: + type: number + description: Overrides default per-data-size number of repeat setting + default: 1 + num-epoch: + type: number + description: Overrides default per-model number of epoch setting + default: 3 + eval-upto: + type: string + description: The last operation to evaluate. 'optimize' means all [train, export, optimize] + default: optimize + artifact-prefix: + type: string + default: perf-speed-benchmark # Declare default permissions as read only. permissions: read-all @@ -59,4 +87,4 @@ jobs: task: all timeout-minutes: 8640 upload-artifact: true - artifact-prefix: perf-speed-benchmark + artifact-prefix: ${{ inputs.artifact-prefix }} diff --git a/.github/workflows/weekly.yml b/.github/workflows/weekly.yml index 3badd5ab79a..ceb401b21f6 100644 --- a/.github/workflows/weekly.yml +++ b/.github/workflows/weekly.yml @@ -10,41 +10,23 @@ on: permissions: read-all jobs: - Regression-Tests: - strategy: - fail-fast: false - matrix: - include: - - toxenv_task: "iseg" - test_dir: "tests/regression/instance_segmentation/test_instance_segmentation.py" - task: "instance_segmentation" - - toxenv_task: "iseg_t" - test_dir: "tests/regression/instance_segmentation/test_tiling_instance_segmentation.py" - task: "instance_segmentation" - - toxenv_task: "seg" - test_dir: "tests/regression/semantic_segmentation" - task: "segmentation" - - toxenv_task: "det" - test_dir: "tests/regression/detection" - task: "detection" - - toxenv_task: "ano" - test_dir: "tests/regression/anomaly" - task: "anomaly" - - toxenv_task: "act" - test_dir: "tests/regression/action" - task: "action" - - toxenv_task: "cls" - test_dir: "tests/regression/classification" - task: "classification" - name: Regression-Test-py310-${{ matrix.toxenv_task }} - uses: ./.github/workflows/run_tests_in_tox.yml + Performance-Speed-Tests: + name: Performance-Speed-py310 + uses: ./.github/workflows/perf-speed.yml with: - python-version: "3.10" - toxenv-pyver: "py310" - toxenv-task: ${{ matrix.toxenv_task }} - tests-dir: ${{ matrix.test_dir }} - runs-on: "['self-hosted', 'Linux', 'X64', 'dmount']" - task: ${{ matrix.task }} - timeout-minutes: 8640 - upload-artifact: true - artifact-prefix: "weekly-test-results" + model-type: default + data-size: medium + num-repeat: 1 + num-epoch: 3 + eval-upto: optimize + artifact-prefix: weekly-perf-speed-benchmark + Performance-Accuracy-Tests: + name: Performance-Accuracy-py310 + uses: ./.github/workflows/perf-accuracy.yml + with: + model-type: default + data-size: all + num-repeat: 0 + num-epoch: 0 + eval-upto: optimize + artifact-prefix: weekly-perf-accuracy-benchmark diff --git a/tests/perf/test_classification.py b/tests/perf/test_classification.py index 820d644ae40..9397dc5413e 100644 --- a/tests/perf/test_classification.py +++ b/tests/perf/test_classification.py @@ -52,7 +52,7 @@ class TestPerfSingleLabelClassification: @pytest.mark.parametrize("fxt_model_id", MODEL_TEMPLATES, ids=MODEL_IDS, indirect=True) @pytest.mark.parametrize("fxt_benchmark", BENCHMARK_CONFIGS.items(), ids=BENCHMARK_CONFIGS.keys(), indirect=True) - def test_accuracy(self, fxt_model_id: str, fxt_benchmark: OTXBenchmark): + def test_accuracy(self, fxt_model_id: str, fxt_benchmark: OTXBenchmark, fxt_check_benchmark_result: Callable): """Benchmark accruacy metrics.""" result = fxt_benchmark.run( model_id=fxt_model_id, @@ -301,7 +301,7 @@ def test_accuracy(self, fxt_model_id: str, fxt_benchmark: OTXBenchmark, fxt_chec @pytest.mark.parametrize("fxt_model_id", MODEL_TEMPLATES, ids=MODEL_IDS, indirect=True) @pytest.mark.parametrize("fxt_benchmark", BENCHMARK_CONFIGS.items(), ids=BENCHMARK_CONFIGS.keys(), indirect=True) - def test_speed(self, fxt_model_id: str, fxt_benchmark: OTXBenchmark, fxt_check_benchmark_results: Callable): + def test_speed(self, fxt_model_id: str, fxt_benchmark: OTXBenchmark, fxt_check_benchmark_result: Callable): """Benchmark train time per iter / infer time per image.""" fxt_benchmark.track_resources = True result = fxt_benchmark.run(