From ec0f90658ea6d817ceb9609241149429fcd2147c Mon Sep 17 00:00:00 2001 From: Eunwoo Shin Date: Thu, 30 May 2024 21:07:52 +0900 Subject: [PATCH] Make Perf test available to load pervious Perf test to skip training stage (#3556) * symlink to relative path * skip training if prev perf result exists * implement missing part * align with pre-commit * udpate test code * fix typo * change arg name to resume-from * revert checkpoint symlink --- src/otx/cli/cli.py | 2 +- tests/conftest.py | 6 + tests/perf/benchmark.py | 175 +++++++++++++++-------- tests/perf/conftest.py | 12 ++ tests/perf/test_action.py | 10 ++ tests/perf/test_anomaly.py | 15 ++ tests/perf/test_classification.py | 15 ++ tests/perf/test_detection.py | 5 + tests/perf/test_instance_segmentation.py | 10 ++ tests/perf/test_semantic_segmentation.py | 5 + tests/perf/test_visual_prompting.py | 10 ++ 11 files changed, 205 insertions(+), 60 deletions(-) diff --git a/src/otx/cli/cli.py b/src/otx/cli/cli.py index 4694275c0cd..83da4f45023 100644 --- a/src/otx/cli/cli.py +++ b/src/otx/cli/cli.py @@ -492,7 +492,7 @@ def update_latest(self, work_dir: Path) -> None: cache_dir = latest_dir / self.subcommand if cache_dir.exists(): cache_dir.unlink() - cache_dir.symlink_to(work_dir) + cache_dir.symlink_to(Path("..") / work_dir.relative_to(work_dir.parent)) def set_seed(self) -> None: """Set the random seed for reproducibility. diff --git a/tests/conftest.py b/tests/conftest.py index 4ef8c4059fd..2d4c9d484b8 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -119,6 +119,12 @@ def pytest_addoption(parser: pytest.Parser): "`pip install otx[full]@https://github.com/openvinotoolkit/training_extensions.git@{otx_ref}` will be executed before run, " "and reverted after run. Works only for v2.x assuming CLI compatibility.", ) + parser.addoption( + "--resume-from", + type=str, + help="Previous performance test directory which contains execution results. " + "If training was already done in previous performance test, training is skipped and refer previous result.", + ) parser.addoption( "--open-subprocess", action="store_true", diff --git a/tests/perf/benchmark.py b/tests/perf/benchmark.py index d590ba993a2..ac575529c9f 100644 --- a/tests/perf/benchmark.py +++ b/tests/perf/benchmark.py @@ -7,11 +7,12 @@ import gc import logging +import shutil import subprocess from dataclasses import dataclass from pathlib import Path from time import time -from typing import Any +from typing import Any, Literal import numpy as np import pandas as pd @@ -132,6 +133,7 @@ def run( model: Model, dataset: Dataset, criteria: list[Criterion], + resume_from: Path | None = None, ) -> pd.DataFrame | None: """Run configured benchmark with given dataset and model and return the result. @@ -139,6 +141,9 @@ def run( model (Model): Target model settings dataset (Dataset): Target dataset settings criteria (list[Criterion]): Target criteria settings + resume_from(Path | None, optional): + Previous performance directory to load. If training was already done in previous performance test, + training is skipped and refer previous result. Retruns: pd.DataFrame | None: Table with benchmark metrics @@ -168,6 +173,13 @@ def run( tags["seed"] = str(seed) # Train & test + copied_train_dir = None + if ( + resume_from is not None + and (prev_train_dir := self._find_corresponding_dir(resume_from, tags)) is not None + ): + copied_train_dir = self._copy_prev_train_dir(prev_train_dir, sub_work_dir) + command = [ "otx", "train", @@ -189,13 +201,19 @@ def run( command.extend(["--deterministic", str(self.deterministic)]) if self.num_epoch > 0: command.extend(["--max_epochs", str(self.num_epoch)]) - start_time = time() - self._run_command(command) - extra_metrics = {"train/e2e_time": time() - start_time} - self._rename_raw_data( - work_dir=sub_work_dir / ".latest" / "train", - replaces={"train_": "train/", "{pre}": "train/"}, - ) + extra_metrics = {} + if copied_train_dir is not None: + command.append("--print_config") + with (copied_train_dir / "configs.yaml").open("w") as f: + self._run_command(command, stdout=f) # replace previuos configs.yaml to new one + else: + start_time = time() + self._run_command(command) + extra_metrics["train/e2e_time"] = time() - start_time + self._rename_raw_data( + work_dir=sub_work_dir / ".latest" / "train", + replaces={"train_": "train/", "{pre}": "train/"}, + ) self._log_metrics( work_dir=sub_work_dir / ".latest" / "train", tags=tags, @@ -203,21 +221,7 @@ def run( extra_metrics=extra_metrics, ) - command = [ - "otx", - "test", - "--work_dir", - str(sub_work_dir), - ] - for key, value in dataset.extra_overrides.get("test", {}).items(): - command.append(f"--{key}") - command.append(str(value)) - self._run_command(command) - self._rename_raw_data( - work_dir=sub_work_dir / ".latest" / "test", - replaces={"test_": "test/", "{pre}": "test/"}, - ) - self._log_metrics(work_dir=sub_work_dir / ".latest" / "test", tags=tags, criteria=criteria) + self._run_test(sub_work_dir, dataset, tags, criteria, what2test="train") # Export & test if self.eval_upto in ["export", "optimize"]: @@ -236,24 +240,14 @@ def run( if not exported_model_path.exists(): exported_model_path = sub_work_dir / ".latest" / "export" / "exported_model_decoder.xml" - command = [ # NOTE: not working for h_label_cls. to be fixed - "otx", - "test", - "--checkpoint", - str(exported_model_path), - "--work_dir", - str(sub_work_dir), - ] - for key, value in dataset.extra_overrides.get("test", {}).items(): - command.append(f"--{key}") - command.append(str(value)) - self._run_command(command) - - self._rename_raw_data( - work_dir=sub_work_dir / ".latest" / "test", - replaces={"test": "export", "{pre}": "export/"}, + self._run_test( + sub_work_dir, + dataset, + tags, + criteria, + checkpoint=exported_model_path, + what2test="export", ) - self._log_metrics(work_dir=sub_work_dir / ".latest" / "test", tags=tags, criteria=criteria) # Optimize & test if self.eval_upto == "optimize": @@ -274,24 +268,14 @@ def run( if not optimized_model_path.exists(): optimized_model_path = sub_work_dir / ".latest" / "optimize" / "optimized_model_decoder.xml" - command = [ - "otx", - "test", - "--checkpoint", - str(optimized_model_path), - "--work_dir", - str(sub_work_dir), - ] - for key, value in dataset.extra_overrides.get("test", {}).items(): - command.append(f"--{key}") - command.append(str(value)) - self._run_command(command) - - self._rename_raw_data( - work_dir=sub_work_dir / ".latest" / "test", - replaces={"test": "optimize", "{pre}": "optimize/"}, + self._run_test( + sub_work_dir, + dataset, + tags, + criteria, + checkpoint=optimized_model_path, + what2test="optimize", ) - self._log_metrics(work_dir=sub_work_dir / ".latest" / "test", tags=tags, criteria=criteria) # Force memory clean up gc.collect() @@ -308,10 +292,83 @@ def run( result = summary.average(result, keys=["task", "model", "data_group", "data"]) # Average out seeds return result.set_index(["task", "model", "data_group", "data"]) - def _run_command(self, command: list[str]) -> None: + def _find_corresponding_dir(self, resume_from: Path, tags: dict[str, str]) -> Path | None: + for csv_file in resume_from.rglob("benchmark.raw.csv"): + raw_data = pd.read_csv(csv_file) + if ( + "train/epoch" in raw_data.columns # check it's csv of train result + and all( # check meta info is same + str(raw_data.iloc[0].get(key, "NOT_IN_CSV")) == tags.get(key, "NOT_IN_TAG") + for key in ["data_group", "data", "model", "task", "seed"] + ) + ): + return csv_file.parent + return None + + def _copy_prev_train_dir(self, prev_train_dir: Path, work_dir: Path) -> Path: + work_dir.mkdir(parents=True, exist_ok=True) + new_train_dir = work_dir / prev_train_dir.name + shutil.copytree(prev_train_dir, new_train_dir, ignore_dangling_symlinks=True) + cache_dir = work_dir / ".latest" / "train" + cache_dir.parent.mkdir(exist_ok=True) + cache_dir.symlink_to(Path("..") / new_train_dir.relative_to(work_dir)) + + return new_train_dir + + def _run_test( + self, + work_dir: Path | str, + dataset: Dataset, + tags: dict[str, str], + criteria: list[Criterion], + checkpoint: Path | str | None = None, + what2test: Literal["train", "export", "optimize"] = "train", + ) -> None: + """Run otx test and update result csv file to align it's indices to the current task.""" + replace_map = { + "train": {"test_": "test/", "{pre}": "export/"}, + "export": {"test": "export", "{pre}": "export/"}, + "optimize": {"test": "optimize", "{pre}": "optimize/"}, + } + + command = [ + "otx", + "test", + "--work_dir", + str(work_dir), + ] + if checkpoint is not None: + command.extend(["--checkpoint", str(checkpoint)]) + for key, value in dataset.extra_overrides.get("test", {}).items(): + command.append(f"--{key}") + command.append(str(value)) + + start_time = time() + self._run_command(command) + extra_metrics = {f"test({what2test})/e2e_time": time() - start_time} + + self._rename_raw_data( + work_dir=work_dir / ".latest" / "test", + replaces=replace_map[what2test], + ) + self._log_metrics( + work_dir=work_dir / ".latest" / "test", + tags=tags, + criteria=criteria, + extra_metrics=extra_metrics, + ) + + def _run_command(self, command: list[str], **kwargs) -> None: + """Run command using 'subprocess.run'. + + Args: + command (list[str]): command to execute. + kwags: arguments to 'subprocess.run'. + """ print(" ".join(command)) + kwargs["check"] = True if not self.dry_run: - subprocess.run(command, check=True) # noqa: S603 + subprocess.run(command, **kwargs) # noqa: S603, PLW1510 def _log_metrics( self, diff --git a/tests/perf/conftest.py b/tests/perf/conftest.py index e690f3d8d16..6a0904ac6d9 100644 --- a/tests/perf/conftest.py +++ b/tests/perf/conftest.py @@ -245,6 +245,16 @@ def fxt_tags(fxt_user_name: str, fxt_version_tags: dict[str, str]) -> dict[str, return tags +@pytest.fixture(scope="session") +def fxt_resume_from(request: pytest.FixtureRequest) -> Path | None: + resume_from = request.config.getoption("--resume-from") + if resume_from is not None: + resume_from = Path(resume_from) + msg = f"{resume_from = }" + log.info(msg) + return resume_from + + @pytest.fixture() def fxt_benchmark( fxt_data_root: Path, @@ -356,11 +366,13 @@ def _test_perf( dataset: Benchmark.Dataset, benchmark: Benchmark, criteria: list[Benchmark.Criterion], + resume_from: Path | None, ) -> None: result = benchmark.run( model=model, dataset=dataset, criteria=criteria, + resume_from=resume_from, ) benchmark.check( result=result, diff --git a/tests/perf/test_action.py b/tests/perf/test_action.py index 96c6595cbef..bba1ab52ea6 100644 --- a/tests/perf/test_action.py +++ b/tests/perf/test_action.py @@ -70,6 +70,9 @@ class TestPerfActionClassification(PerfTestBase): Benchmark.Criterion(name="test/iter_time", summary="mean", compare="<", margin=0.1), Benchmark.Criterion(name="export/iter_time", summary="mean", compare="<", margin=0.1), Benchmark.Criterion(name="optimize/iter_time", summary="mean", compare="<", margin=0.1), + Benchmark.Criterion(name="test(train)/e2e_time", summary="max", compare=">", margin=0.1), + Benchmark.Criterion(name="test(export)/e2e_time", summary="max", compare=">", margin=0.1), + Benchmark.Criterion(name="test(optimize)/e2e_time", summary="max", compare=">", margin=0.1), ] @pytest.mark.parametrize( @@ -89,12 +92,14 @@ def test_perf( fxt_model: Benchmark.Model, fxt_dataset: Benchmark.Dataset, fxt_benchmark: Benchmark, + fxt_resume_from: Path | None, ): self._test_perf( model=fxt_model, dataset=fxt_dataset, benchmark=fxt_benchmark, criteria=self.BENCHMARK_CRITERIA, + resume_from=fxt_resume_from, ) @@ -154,6 +159,9 @@ class TestPerfActionDetection(PerfTestBase): Benchmark.Criterion(name="test/iter_time", summary="mean", compare="<", margin=0.1), Benchmark.Criterion(name="export/iter_time", summary="mean", compare="<", margin=0.1), Benchmark.Criterion(name="optimize/iter_time", summary="mean", compare="<", margin=0.1), + Benchmark.Criterion(name="test(train)/e2e_time", summary="max", compare=">", margin=0.1), + Benchmark.Criterion(name="test(export)/e2e_time", summary="max", compare=">", margin=0.1), + Benchmark.Criterion(name="test(optimize)/e2e_time", summary="max", compare=">", margin=0.1), ] @pytest.mark.parametrize( @@ -173,10 +181,12 @@ def test_perf( fxt_model: Benchmark.Model, fxt_dataset: Benchmark.Dataset, fxt_benchmark: Benchmark, + fxt_resume_from: Path | None, ): self._test_perf( model=fxt_model, dataset=fxt_dataset, benchmark=fxt_benchmark, criteria=self.BENCHMARK_CRITERIA, + resume_from=fxt_resume_from, ) diff --git a/tests/perf/test_anomaly.py b/tests/perf/test_anomaly.py index 63883e068bf..cea8ff3d804 100644 --- a/tests/perf/test_anomaly.py +++ b/tests/perf/test_anomaly.py @@ -57,6 +57,9 @@ class TestPerfAnomalyClassification(PerfTestBase): Benchmark.Criterion(name="test/iter_time", summary="mean", compare="<", margin=0.1), Benchmark.Criterion(name="export/iter_time", summary="mean", compare="<", margin=0.1), Benchmark.Criterion(name="optimize/iter_time", summary="mean", compare="<", margin=0.1), + Benchmark.Criterion(name="test(train)/e2e_time", summary="max", compare=">", margin=0.1), + Benchmark.Criterion(name="test(export)/e2e_time", summary="max", compare=">", margin=0.1), + Benchmark.Criterion(name="test(optimize)/e2e_time", summary="max", compare=">", margin=0.1), ] @pytest.mark.parametrize( @@ -76,12 +79,14 @@ def test_perf( fxt_model: Benchmark.Model, fxt_dataset: Benchmark.Dataset, fxt_benchmark: Benchmark, + fxt_resume_from: Path | None, ): self._test_perf( model=fxt_model, dataset=fxt_dataset, benchmark=fxt_benchmark, criteria=self.BENCHMARK_CRITERIA, + resume_from=fxt_resume_from, ) @@ -129,6 +134,9 @@ class TestPerfAnomalyDetection(PerfTestBase): Benchmark.Criterion(name="test/iter_time", summary="mean", compare="<", margin=0.1), Benchmark.Criterion(name="export/iter_time", summary="mean", compare="<", margin=0.1), Benchmark.Criterion(name="optimize/iter_time", summary="mean", compare="<", margin=0.1), + Benchmark.Criterion(name="test(train)/e2e_time", summary="max", compare=">", margin=0.1), + Benchmark.Criterion(name="test(export)/e2e_time", summary="max", compare=">", margin=0.1), + Benchmark.Criterion(name="test(optimize)/e2e_time", summary="max", compare=">", margin=0.1), ] @pytest.mark.parametrize( @@ -148,12 +156,14 @@ def test_perf( fxt_model: Benchmark.Model, fxt_dataset: Benchmark.Dataset, fxt_benchmark: Benchmark, + fxt_resume_from: Path | None, ): self._test_perf( model=fxt_model, dataset=fxt_dataset, benchmark=fxt_benchmark, criteria=self.BENCHMARK_CRITERIA, + resume_from=fxt_resume_from, ) @@ -201,6 +211,9 @@ class TestPerfAnomalySegmentation(PerfTestBase): Benchmark.Criterion(name="test/iter_time", summary="mean", compare="<", margin=0.1), Benchmark.Criterion(name="export/iter_time", summary="mean", compare="<", margin=0.1), Benchmark.Criterion(name="optimize/iter_time", summary="mean", compare="<", margin=0.1), + Benchmark.Criterion(name="test(train)/e2e_time", summary="max", compare=">", margin=0.1), + Benchmark.Criterion(name="test(export)/e2e_time", summary="max", compare=">", margin=0.1), + Benchmark.Criterion(name="test(optimize)/e2e_time", summary="max", compare=">", margin=0.1), ] @pytest.mark.parametrize( @@ -220,10 +233,12 @@ def test_perf( fxt_model: Benchmark.Model, fxt_dataset: Benchmark.Dataset, fxt_benchmark: Benchmark, + fxt_resume_from: Path | None, ): self._test_perf( model=fxt_model, dataset=fxt_dataset, benchmark=fxt_benchmark, criteria=self.BENCHMARK_CRITERIA, + resume_from=fxt_resume_from, ) diff --git a/tests/perf/test_classification.py b/tests/perf/test_classification.py index 3f2924d2db9..212c4bcbd65 100644 --- a/tests/perf/test_classification.py +++ b/tests/perf/test_classification.py @@ -61,6 +61,9 @@ class TestPerfSingleLabelClassification(PerfTestBase): Benchmark.Criterion(name="test/iter_time", summary="mean", compare="<", margin=0.1), Benchmark.Criterion(name="export/iter_time", summary="mean", compare="<", margin=0.1), Benchmark.Criterion(name="optimize/iter_time", summary="mean", compare="<", margin=0.1), + Benchmark.Criterion(name="test(train)/e2e_time", summary="max", compare=">", margin=0.1), + Benchmark.Criterion(name="test(export)/e2e_time", summary="max", compare=">", margin=0.1), + Benchmark.Criterion(name="test(optimize)/e2e_time", summary="max", compare=">", margin=0.1), ] @pytest.mark.parametrize( @@ -80,12 +83,14 @@ def test_perf( fxt_model: Benchmark.Model, fxt_dataset: Benchmark.Dataset, fxt_benchmark: Benchmark, + fxt_resume_from: Path | None, ): self._test_perf( model=fxt_model, dataset=fxt_dataset, benchmark=fxt_benchmark, criteria=self.BENCHMARK_CRITERIA, + resume_from=fxt_resume_from, ) @@ -136,6 +141,9 @@ class TestPerfMultiLabelClassification(PerfTestBase): Benchmark.Criterion(name="test/iter_time", summary="mean", compare="<", margin=0.1), Benchmark.Criterion(name="export/iter_time", summary="mean", compare="<", margin=0.1), Benchmark.Criterion(name="optimize/iter_time", summary="mean", compare="<", margin=0.1), + Benchmark.Criterion(name="test(train)/e2e_time", summary="max", compare=">", margin=0.1), + Benchmark.Criterion(name="test(export)/e2e_time", summary="max", compare=">", margin=0.1), + Benchmark.Criterion(name="test(optimize)/e2e_time", summary="max", compare=">", margin=0.1), ] @pytest.mark.parametrize( @@ -155,12 +163,14 @@ def test_perf( fxt_model: Benchmark.Model, fxt_dataset: Benchmark.Dataset, fxt_benchmark: Benchmark, + fxt_resume_from: Path | None, ): self._test_perf( model=fxt_model, dataset=fxt_dataset, benchmark=fxt_benchmark, criteria=self.BENCHMARK_CRITERIA, + resume_from=fxt_resume_from, ) @@ -205,6 +215,9 @@ class TestPerfHierarchicalLabelClassification(PerfTestBase): Benchmark.Criterion(name="test/iter_time", summary="mean", compare="<", margin=0.1), Benchmark.Criterion(name="export/iter_time", summary="mean", compare="<", margin=0.1), Benchmark.Criterion(name="optimize/iter_time", summary="mean", compare="<", margin=0.1), + Benchmark.Criterion(name="test(train)/e2e_time", summary="max", compare=">", margin=0.1), + Benchmark.Criterion(name="test(export)/e2e_time", summary="max", compare=">", margin=0.1), + Benchmark.Criterion(name="test(optimize)/e2e_time", summary="max", compare=">", margin=0.1), ] @pytest.mark.parametrize( @@ -224,10 +237,12 @@ def test_perf( fxt_model: Benchmark.Model, fxt_dataset: Benchmark.Dataset, fxt_benchmark: Benchmark, + fxt_resume_from: Path | None, ): self._test_perf( model=fxt_model, dataset=fxt_dataset, benchmark=fxt_benchmark, criteria=self.BENCHMARK_CRITERIA, + resume_from=fxt_resume_from, ) diff --git a/tests/perf/test_detection.py b/tests/perf/test_detection.py index 045badf4606..9ae73f7c932 100644 --- a/tests/perf/test_detection.py +++ b/tests/perf/test_detection.py @@ -84,6 +84,9 @@ class TestPerfObjectDetection(PerfTestBase): Benchmark.Criterion(name="test/iter_time", summary="mean", compare="<", margin=0.1), Benchmark.Criterion(name="export/iter_time", summary="mean", compare="<", margin=0.1), Benchmark.Criterion(name="optimize/iter_time", summary="mean", compare="<", margin=0.1), + Benchmark.Criterion(name="test(train)/e2e_time", summary="max", compare=">", margin=0.1), + Benchmark.Criterion(name="test(export)/e2e_time", summary="max", compare=">", margin=0.1), + Benchmark.Criterion(name="test(optimize)/e2e_time", summary="max", compare=">", margin=0.1), ] @pytest.mark.parametrize( @@ -103,10 +106,12 @@ def test_perf( fxt_model: Benchmark.Model, fxt_dataset: Benchmark.Dataset, fxt_benchmark: Benchmark, + fxt_resume_from: Path | None, ): self._test_perf( model=fxt_model, dataset=fxt_dataset, benchmark=fxt_benchmark, criteria=self.BENCHMARK_CRITERIA, + resume_from=fxt_resume_from, ) diff --git a/tests/perf/test_instance_segmentation.py b/tests/perf/test_instance_segmentation.py index 288b2a48732..1a2e6342118 100644 --- a/tests/perf/test_instance_segmentation.py +++ b/tests/perf/test_instance_segmentation.py @@ -89,6 +89,9 @@ class TestPerfInstanceSegmentation(PerfTestBase): Benchmark.Criterion(name="test/iter_time", summary="mean", compare="<", margin=0.1), Benchmark.Criterion(name="export/iter_time", summary="mean", compare="<", margin=0.1), Benchmark.Criterion(name="optimize/iter_time", summary="mean", compare="<", margin=0.1), + Benchmark.Criterion(name="test(train)/e2e_time", summary="max", compare=">", margin=0.1), + Benchmark.Criterion(name="test(export)/e2e_time", summary="max", compare=">", margin=0.1), + Benchmark.Criterion(name="test(optimize)/e2e_time", summary="max", compare=">", margin=0.1), ] @pytest.mark.parametrize( @@ -108,12 +111,14 @@ def test_perf( fxt_model: Benchmark.Model, fxt_dataset: Benchmark.Dataset, fxt_benchmark: Benchmark, + fxt_resume_from: Path | None, ): self._test_perf( model=fxt_model, dataset=fxt_dataset, benchmark=fxt_benchmark, criteria=self.BENCHMARK_CRITERIA, + resume_from=fxt_resume_from, ) @@ -177,6 +182,9 @@ class TestPerfTilingInstanceSegmentation(PerfTestBase): Benchmark.Criterion(name="test/iter_time", summary="mean", compare="<", margin=0.1), Benchmark.Criterion(name="export/iter_time", summary="mean", compare="<", margin=0.1), Benchmark.Criterion(name="optimize/iter_time", summary="mean", compare="<", margin=0.1), + Benchmark.Criterion(name="test(train)/e2e_time", summary="max", compare=">", margin=0.1), + Benchmark.Criterion(name="test(export)/e2e_time", summary="max", compare=">", margin=0.1), + Benchmark.Criterion(name="test(optimize)/e2e_time", summary="max", compare=">", margin=0.1), ] @pytest.mark.parametrize( @@ -196,10 +204,12 @@ def test_perf( fxt_model: Benchmark.Model, fxt_dataset: Benchmark.Dataset, fxt_benchmark: Benchmark, + fxt_resume_from: Path | None, ): self._test_perf( model=fxt_model, dataset=fxt_dataset, benchmark=fxt_benchmark, criteria=self.BENCHMARK_CRITERIA, + resume_from=fxt_resume_from, ) diff --git a/tests/perf/test_semantic_segmentation.py b/tests/perf/test_semantic_segmentation.py index 1cd5fe7a968..bf644258d3c 100644 --- a/tests/perf/test_semantic_segmentation.py +++ b/tests/perf/test_semantic_segmentation.py @@ -63,6 +63,9 @@ class TestPerfSemanticSegmentation(PerfTestBase): Benchmark.Criterion(name="test/iter_time", summary="mean", compare="<", margin=0.1), Benchmark.Criterion(name="export/iter_time", summary="mean", compare="<", margin=0.1), Benchmark.Criterion(name="optimize/iter_time", summary="mean", compare="<", margin=0.1), + Benchmark.Criterion(name="test(train)/e2e_time", summary="max", compare=">", margin=0.1), + Benchmark.Criterion(name="test(export)/e2e_time", summary="max", compare=">", margin=0.1), + Benchmark.Criterion(name="test(optimize)/e2e_time", summary="max", compare=">", margin=0.1), ] @pytest.mark.parametrize( @@ -82,10 +85,12 @@ def test_perf( fxt_model: Benchmark.Model, fxt_dataset: Benchmark.Dataset, fxt_benchmark: Benchmark, + fxt_resume_from: Path | None, ): self._test_perf( model=fxt_model, dataset=fxt_dataset, benchmark=fxt_benchmark, criteria=self.BENCHMARK_CRITERIA, + resume_from=fxt_resume_from, ) diff --git a/tests/perf/test_visual_prompting.py b/tests/perf/test_visual_prompting.py index c2d0a2f4766..0aae2d8c29b 100644 --- a/tests/perf/test_visual_prompting.py +++ b/tests/perf/test_visual_prompting.py @@ -58,6 +58,9 @@ class TestPerfVisualPrompting(PerfTestBase): Benchmark.Criterion(name="test/iter_time", summary="mean", compare="<", margin=0.1), Benchmark.Criterion(name="export/iter_time", summary="mean", compare="<", margin=0.1), Benchmark.Criterion(name="optimize/iter_time", summary="mean", compare="<", margin=0.1), + Benchmark.Criterion(name="test(train)/e2e_time", summary="max", compare=">", margin=0.1), + Benchmark.Criterion(name="test(export)/e2e_time", summary="max", compare=">", margin=0.1), + Benchmark.Criterion(name="test(optimize)/e2e_time", summary="max", compare=">", margin=0.1), ] @pytest.mark.parametrize( @@ -77,12 +80,14 @@ def test_perf( fxt_model: Benchmark.Model, fxt_dataset: Benchmark.Dataset, fxt_benchmark: Benchmark, + fxt_resume_from: Path | None, ): self._test_perf( model=fxt_model, dataset=fxt_dataset, benchmark=fxt_benchmark, criteria=self.BENCHMARK_CRITERIA, + resume_from=fxt_resume_from, ) @@ -119,6 +124,9 @@ class TestPerfZeroShotVisualPrompting(PerfTestBase): Benchmark.Criterion(name="test/iter_time", summary="mean", compare="<", margin=0.1), Benchmark.Criterion(name="export/iter_time", summary="mean", compare="<", margin=0.1), Benchmark.Criterion(name="optimize/iter_time", summary="mean", compare="<", margin=0.1), + Benchmark.Criterion(name="test(train)/e2e_time", summary="max", compare=">", margin=0.1), + Benchmark.Criterion(name="test(export)/e2e_time", summary="max", compare=">", margin=0.1), + Benchmark.Criterion(name="test(optimize)/e2e_time", summary="max", compare=">", margin=0.1), ] @pytest.mark.parametrize( @@ -138,10 +146,12 @@ def test_perf( fxt_model: Benchmark.Model, fxt_dataset: Benchmark.Dataset, fxt_benchmark: Benchmark, + fxt_resume_from: Path | None, ): self._test_perf( model=fxt_model, dataset=fxt_dataset, benchmark=fxt_benchmark, criteria=self.BENCHMARK_CRITERIA, + resume_from=fxt_resume_from, )