Skip to content

Commit

Permalink
Add anomaly perf benchmark tests
Browse files Browse the repository at this point in the history
  • Loading branch information
goodsong81 committed Mar 19, 2024
1 parent 106c111 commit 287c53d
Show file tree
Hide file tree
Showing 2 changed files with 260 additions and 8 deletions.
32 changes: 26 additions & 6 deletions tests/perf/benchmark.py
Original file line number Diff line number Diff line change
Expand Up @@ -172,7 +172,7 @@ def run(
start_time = time()
self._run_command(command)
extra_metrics = {"train/e2e_time": time() - start_time}
self._rename_raw_data(work_dir=sub_work_dir / ".latest" / "train", replaces={"epoch": "train/epoch"})
self._rename_raw_data(work_dir=sub_work_dir / ".latest" / "train", replaces={"train_": "train/", "{pre}": "train/"})
self._log_metrics(
work_dir=sub_work_dir / ".latest" / "train",
tags=tags,
Expand All @@ -187,6 +187,7 @@ def run(
str(sub_work_dir),
]
self._run_command(command)
self._rename_raw_data(work_dir=sub_work_dir / ".latest" / "test", replaces={"test_": "test/", "{pre}": "test/"})
self._log_metrics(work_dir=sub_work_dir / ".latest" / "test", tags=tags, criteria=criteria)

# Export & test
Expand Down Expand Up @@ -215,7 +216,7 @@ def run(
]
self._run_command(command)

self._rename_raw_data(work_dir=sub_work_dir / ".latest" / "test", replaces={"test": "export"})
self._rename_raw_data(work_dir=sub_work_dir / ".latest" / "test", replaces={"test": "export", "{pre}": "export/"})
self._log_metrics(work_dir=sub_work_dir / ".latest" / "test", tags=tags, criteria=criteria)

# Optimize & test
Expand Down Expand Up @@ -250,7 +251,7 @@ def run(
]
self._run_command(command)

self._rename_raw_data(work_dir=sub_work_dir / ".latest" / "test", replaces={"test": "optimize"})
self._rename_raw_data(work_dir=sub_work_dir / ".latest" / "test", replaces={"test": "optimize", "{pre}": "optimize/"})
self._log_metrics(work_dir=sub_work_dir / ".latest" / "test", tags=tags, criteria=criteria)

# Force memory clean up
Expand Down Expand Up @@ -310,11 +311,24 @@ def _log_metrics(
metrics.to_csv(work_dir / "benchmark.raw.csv", index=False)

def _rename_raw_data(self, work_dir: Path, replaces: dict[str, str]) -> None:
replaces = {**self.NAME_MAPPING, **replaces}
def _rename_col(col_name: str) -> str:
for src_str, dst_str in replaces.items():
if src_str == "{pre}":
if not col_name.startswith(dst_str):
col_name = dst_str + col_name
elif src_str == "{post}":
if not col_name.endswith(dst_str):
col_name = col_name + dst_str
else:
col_name = col_name.replace(src_str, dst_str)
return col_name

csv_files = work_dir.glob("**/metrics.csv")
for csv_file in csv_files:
data = pd.read_csv(csv_file)
for src_str, dst_str in replaces.items():
data.columns = data.columns.str.replace(src_str, dst_str)
data = data.rename(columns=_rename_col) # Column names
data = data.replace(replaces) # Values
data.to_csv(csv_file, index=False)

@staticmethod
Expand All @@ -338,7 +352,7 @@ def load_result(result_path: Path) -> pd.DataFrame | None:
return pd.concat(results, ignore_index=True).set_index(["task", "model", "data_group", "data"])

@staticmethod
def average_result(data: pd.DataFrame, keys: list[str]) -> pd.DataFrame:
def average_result(data: pd.DataFrame, keys: list[str]) -> pd.DataFrame | None:
"""Average result w.r.t. given keys
Args:
Expand All @@ -348,6 +362,9 @@ def average_result(data: pd.DataFrame, keys: list[str]) -> pd.DataFrame:
Retruns:
pd.DataFrame: Averaged result table
"""
if data is None:
return None

# Flatten index
index_names = data.index.names
column_names = data.columns
Expand Down Expand Up @@ -391,3 +408,6 @@ def check(self, result: pd.DataFrame, criteria: list[Criterion]):

for criterion in criteria:
criterion(result_entry, target_entry)

NAME_MAPPING: dict[str, str] = {
}
236 changes: 234 additions & 2 deletions tests/perf/test_anomaly.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,11 +5,243 @@

from __future__ import annotations

from pathlib import Path

import pytest

from .benchmark import Benchmark
from .conftest import PerfTestBase


class TestPerfAnomalyClassification(PerfTestBase):
"""Benchmark anomaly classification."""

def test_dummay(self):
pass
MODEL_TEST_CASES = [ # noqa: RUF012
Benchmark.Model(task="anomaly_classification", name="padim", category="speed"),
Benchmark.Model(task="anomaly_classification", name="stfpm", category="accuracy"),
]

DATASET_TEST_CASES = [
Benchmark.Dataset(
name=f"mvtec_bottle_small_{idx}",
path=Path("anomaly/mvtec/bottle_small") / f"{idx}",
group="small",
data_format="mvtec",
num_classes=2,
num_repeat=3,
extra_overrides={},
)
for idx in (1, 2, 3)
] + [
Benchmark.Dataset(
name="mvtec_wood_medium",
path=Path("anomaly/mvtec/wood_medium"),
group="medium",
data_format="mvtec",
num_classes=2,
num_repeat=3,
extra_overrides={},
),
Benchmark.Dataset(
name="mvtec_hazelnut_large",
path=Path("anomaly/mvtec/hazelnut_large"),
group="large",
data_format="mvtec",
num_classes=2,
num_repeat=3,
extra_overrides={},
),
]

BENCHMARK_CRITERIA = [ # noqa: RUF012
Benchmark.Criterion(name="train/epoch", summary="max", compare="<", margin=0.1),
Benchmark.Criterion(name="train/e2e_time", summary="max", compare="<", margin=0.1),
Benchmark.Criterion(name="test/image_F1Score", summary="max", compare=">", margin=0.1),
Benchmark.Criterion(name="export/image_F1Score", summary="max", compare=">", margin=0.1),
Benchmark.Criterion(name="optimize/image_F1Score", summary="max", compare=">", margin=0.1),
Benchmark.Criterion(name="train/iter_time", summary="mean", compare="<", margin=0.1),
Benchmark.Criterion(name="test/iter_time", summary="mean", compare="<", margin=0.1),
Benchmark.Criterion(name="export/iter_time", summary="mean", compare="<", margin=0.1),
Benchmark.Criterion(name="optimize/iter_time", summary="mean", compare="<", margin=0.1),
]

@pytest.mark.parametrize(
"fxt_model",
MODEL_TEST_CASES,
ids=lambda model: model.name,
indirect=True,
)
@pytest.mark.parametrize(
"fxt_dataset",
DATASET_TEST_CASES,
ids=lambda dataset: dataset.name,
indirect=True,
)
def test_perf(
self,
fxt_model: Benchmark.Model,
fxt_dataset: Benchmark.Dataset,
fxt_benchmark: Benchmark,
):
self._test_perf(
model=fxt_model,
dataset=fxt_dataset,
benchmark=fxt_benchmark,
criteria=self.BENCHMARK_CRITERIA,
)


class TestPerfAnomalyDetection(PerfTestBase):
"""Benchmark anomaly detection."""

MODEL_TEST_CASES = [ # noqa: RUF012
Benchmark.Model(task="anomaly_detection", name="padim", category="speed"),
Benchmark.Model(task="anomaly_detection", name="stfpm", category="accuracy"),
]

DATASET_TEST_CASES = [
Benchmark.Dataset(
name=f"mvtec_bottle_small_{idx}",
path=Path("anomaly/mvtec/bottle_small") / f"{idx}",
group="small",
data_format="mvtec",
num_classes=2,
num_repeat=3,
extra_overrides={},
)
for idx in (1, 2, 3)
] + [
Benchmark.Dataset(
name="mvtec_wood_medium",
path=Path("anomaly/mvtec/wood_medium"),
group="medium",
data_format="mvtec",
num_classes=2,
num_repeat=3,
extra_overrides={},
),
Benchmark.Dataset(
name="mvtec_hazelnut_large",
path=Path("anomaly/mvtec/hazelnut_large"),
group="large",
data_format="mvtec",
num_classes=2,
num_repeat=3,
extra_overrides={},
),
]

BENCHMARK_CRITERIA = [ # noqa: RUF012
Benchmark.Criterion(name="train/epoch", summary="max", compare="<", margin=0.1),
Benchmark.Criterion(name="train/e2e_time", summary="max", compare="<", margin=0.1),
Benchmark.Criterion(name="test/image_F1Score", summary="max", compare=">", margin=0.1),
Benchmark.Criterion(name="export/image_F1Score", summary="max", compare=">", margin=0.1),
Benchmark.Criterion(name="optimize/image_F1Score", summary="max", compare=">", margin=0.1),
Benchmark.Criterion(name="train/iter_time", summary="mean", compare="<", margin=0.1),
Benchmark.Criterion(name="test/iter_time", summary="mean", compare="<", margin=0.1),
Benchmark.Criterion(name="export/iter_time", summary="mean", compare="<", margin=0.1),
Benchmark.Criterion(name="optimize/iter_time", summary="mean", compare="<", margin=0.1),
]

@pytest.mark.parametrize(
"fxt_model",
MODEL_TEST_CASES,
ids=lambda model: model.name,
indirect=True,
)
@pytest.mark.parametrize(
"fxt_dataset",
DATASET_TEST_CASES,
ids=lambda dataset: dataset.name,
indirect=True,
)
def test_perf(
self,
fxt_model: Benchmark.Model,
fxt_dataset: Benchmark.Dataset,
fxt_benchmark: Benchmark,
):
self._test_perf(
model=fxt_model,
dataset=fxt_dataset,
benchmark=fxt_benchmark,
criteria=self.BENCHMARK_CRITERIA,
)


class TestPerfAnomalySegmentation(PerfTestBase):
"""Benchmark anomaly segmentation."""

MODEL_TEST_CASES = [ # noqa: RUF012
Benchmark.Model(task="anomaly_segmentation", name="padim", category="speed"),
Benchmark.Model(task="anomaly_segmentation", name="stfpm", category="accuracy"),
]

DATASET_TEST_CASES = [
Benchmark.Dataset(
name=f"mvtec_bottle_small_{idx}",
path=Path("anomaly/mvtec/bottle_small") / f"{idx}",
group="small",
data_format="mvtec",
num_classes=2,
num_repeat=3,
extra_overrides={},
)
for idx in (1, 2, 3)
] + [
Benchmark.Dataset(
name="mvtec_wood_medium",
path=Path("anomaly/mvtec/wood_medium"),
group="medium",
data_format="mvtec",
num_classes=2,
num_repeat=3,
extra_overrides={},
),
Benchmark.Dataset(
name="mvtec_hazelnut_large",
path=Path("anomaly/mvtec/hazelnut_large"),
group="large",
data_format="mvtec",
num_classes=2,
num_repeat=3,
extra_overrides={},
),
]

BENCHMARK_CRITERIA = [ # noqa: RUF012
Benchmark.Criterion(name="train/epoch", summary="max", compare="<", margin=0.1),
Benchmark.Criterion(name="train/e2e_time", summary="max", compare="<", margin=0.1),
Benchmark.Criterion(name="test/pixel_F1Score", summary="max", compare=">", margin=0.1),
Benchmark.Criterion(name="export/pixel_F1Score", summary="max", compare=">", margin=0.1),
Benchmark.Criterion(name="optimize/pixel_F1Score", summary="max", compare=">", margin=0.1),
Benchmark.Criterion(name="train/iter_time", summary="mean", compare="<", margin=0.1),
Benchmark.Criterion(name="test/iter_time", summary="mean", compare="<", margin=0.1),
Benchmark.Criterion(name="export/iter_time", summary="mean", compare="<", margin=0.1),
Benchmark.Criterion(name="optimize/iter_time", summary="mean", compare="<", margin=0.1),
]

@pytest.mark.parametrize(
"fxt_model",
MODEL_TEST_CASES,
ids=lambda model: model.name,
indirect=True,
)
@pytest.mark.parametrize(
"fxt_dataset",
DATASET_TEST_CASES,
ids=lambda dataset: dataset.name,
indirect=True,
)
def test_perf(
self,
fxt_model: Benchmark.Model,
fxt_dataset: Benchmark.Dataset,
fxt_benchmark: Benchmark,
):
self._test_perf(
model=fxt_model,
dataset=fxt_dataset,
benchmark=fxt_benchmark,
criteria=self.BENCHMARK_CRITERIA,
)

0 comments on commit 287c53d

Please sign in to comment.