Skip to content

Commit

Permalink
Add perf benchmark tests for v2 (#3004)
Browse files Browse the repository at this point in the history
* Add perf tests for all tasks

* Update perf GH workflow

* Parameterize accuracy | efficiency tests

* Log raw data csv

* Subprocess run & csv summary

* Align with v1
  • Loading branch information
Songki Choi authored Mar 5, 2024
1 parent 2ad3653 commit ace7f64
Show file tree
Hide file tree
Showing 13 changed files with 1,717 additions and 33 deletions.
96 changes: 96 additions & 0 deletions .github/workflows/perf_accuracy.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,96 @@
name: Perf-Accuracy Benchmark

on:
workflow_dispatch: # run on request (no need for PR)
inputs:
model-category:
type: choice
description: Model category to run benchmark
options:
- default # speed, balance, accuracy models only
- all # default + other models
default: default
data-size:
type: choice
description: Dataset size to run benchmark
options:
- small
- medium
- large
- all
default: all
num-repeat:
description: Overrides default per-data-size number of repeat setting
default: 0
num-epoch:
description: Overrides default per-model number of epoch setting
default: 0
eval-upto:
type: choice
description: The last operation to evaluate. 'optimize' means all.
options:
- train
- export
- optimize
default: optimize
pytest-args:
type: string
description: |
Additional perf-benchmark pytest arguments.
"-k detection" -> detection task only
"--dry-run" -> print command w/o execution.
data-root:
type: string
description: Root directory containing validation data in CI server.
default: "/home/validation/data/v2/"

jobs:
Perf-Accuracy-Benchmark:
strategy:
fail-fast: false
matrix:
include:
- task-short: "ano"
task: "anomaly"
- task-short: "cls"
task: "classification"
- task-short: "det"
task: "detection"
- task-short: "isg"
task: "instance_segmentation"
- task-short: "ssg"
task: "semantic_segmentation"
- task-short: "vsp"
task: "visual_prompting"
name: Perf-Accuracy-Benchmark-${{ matrix.task-short }}
runs-on: [self-hosted, linux, x64, dmount]
timeout-minutes: 8640
steps:
- name: Checkout repository
uses: actions/checkout@v3
- name: Install Python
uses: actions/setup-python@v4
with:
python-version: "3.10"
- name: Install tox
run: python -m pip install tox
- name: Run Performance Test
run: >
tox -vv -e perf-benchmark -- tests/perf/test_${{ matrix.task }}.py ${{ inputs.pytest-args }}
--benchmark-type accuracy
--model-category ${{ inputs.model-category }}
--data-root ${{ inputs.data-root }}
--data-size ${{ inputs.data-size }}
--num-repeat ${{ inputs.num-repeat }}
--num-epoch ${{ inputs.num-epoch }}
--eval-upto ${{ inputs.eval-upto }}
--summary-csv .tox/perf-accuracy-benchmark-${{ matrix.task-short }}.csv
--mlflow-tracking-uri ${{ vars.MLFLOW_TRACKING_SERVER_URI }}
--user-name ${{ github.triggering_actor }}
- name: Upload test results
uses: actions/upload-artifact@v3
with:
name: perf-accuracy-benchmark-${{ matrix.task-short }}
path: .tox/perf-*.csv
# Use always() to always run this step to publish test results when there are test failures
if: ${{ always() }}
80 changes: 80 additions & 0 deletions .github/workflows/perf_efficiency.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,80 @@
name: Perf-Efficiency Benchmark

on:
workflow_dispatch: # run on request (no need for PR)
inputs:
model-category:
type: choice
description: Model category to run benchmark
options:
- default # speed, balance, accuracy models only
- all # default + other models
default: default
data-size:
type: choice
description: Dataset size to run benchmark
options:
- small
- medium
- large
- all
default: medium
num-repeat:
description: Overrides default per-data-size number of repeat setting
default: 1
num-epoch:
description: Overrides default per-model number of epoch setting
default: 2
eval-upto:
type: choice
description: The last operation to evaluate. 'optimize' means all.
options:
- train
- export
- optimize
default: optimize
pytest-args:
type: string
description: |
Additional perf-benchmark pytest arguments.
"-k detection" -> detection task only
"--dry-run" -> print command w/o execution.
data-root:
type: string
description: Root directory containing validation data in CI server.
default: "/home/validation/data/v2/"

jobs:
Perf-Efficiency-Benchmark:
name: Perf-Efficiency-Benchmark-all
runs-on: [self-hosted, linux, x64, dmount]
timeout-minutes: 8640
steps:
- name: Checkout repository
uses: actions/checkout@v3
- name: Install Python
uses: actions/setup-python@v4
with:
python-version: "3.10"
- name: Install tox
run: python -m pip install tox
- name: Run Performance Test
run: >
tox -vv -e perf-benchmark -- tests/perf ${{ inputs.pytest-args }}
--benchmark-type efficiency
--model-category ${{ inputs.model-category }}
--data-root ${{ inputs.data-root }}
--data-size ${{ inputs.data-size }}
--num-repeat ${{ inputs.num-repeat }}
--num-epoch ${{ inputs.num-epoch }}
--eval-upto ${{ inputs.eval-upto }}
--summary-csv .tox/perf-efficiency-benchmark-all.csv
--mlflow-tracking-uri ${{ vars.MLFLOW_TRACKING_SERVER_URI }}
--user-name ${{ github.triggering_actor }}
- name: Upload test results
uses: actions/upload-artifact@v3
with:
name: perf-efficiency-benchmark-all
path: .tox/perf-*.csv
# Use always() to always run this step to publish test results when there are test failures
if: ${{ always() }}
31 changes: 0 additions & 31 deletions .github/workflows/perf_test.yaml

This file was deleted.

4 changes: 4 additions & 0 deletions tests/perf/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0

"""OTX perfomance benchamrk tests."""
Loading

0 comments on commit ace7f64

Please sign in to comment.