From 8f9f8676284c288ce8df7b13784ed95f74bf12eb Mon Sep 17 00:00:00 2001 From: Yunchu Lee Date: Mon, 8 Jan 2024 16:38:15 +0900 Subject: [PATCH] enable perf result logging to mlflow server --- .github/workflows/run_tests_in_tox.yml | 3 + requirements/dev.txt | 1 + tests/conftest.py | 15 ++++ tests/perf/conftest.py | 97 ++++++++++++++++++++++++-- tox.ini | 4 ++ 5 files changed, 114 insertions(+), 6 deletions(-) diff --git a/.github/workflows/run_tests_in_tox.yml b/.github/workflows/run_tests_in_tox.yml index 40556df609d..3abdc92e8fc 100644 --- a/.github/workflows/run_tests_in_tox.yml +++ b/.github/workflows/run_tests_in_tox.yml @@ -51,6 +51,9 @@ jobs: - name: Install dependencies run: python -m pip install -r requirements/dev.txt - name: Run Tests + env: + MLFLOW_TRACKING_SERVER_URI: ${{ vars.MLFLOW_TRACKING_SERVER_URI }} + BENCHMARK_RESULTS_CLEAR: ${{ vars.BENCHMARK_RESULTS_CLEAR }} run: tox -vv -e tests-${{ inputs.toxenv-task }}-${{ inputs.toxenv-pyver }}-${{ inputs.toxenv-ptver }} -- ${{ inputs.tests-dir }} - name: Upload test results uses: actions/upload-artifact@v3 diff --git a/requirements/dev.txt b/requirements/dev.txt index 3966fdcf396..1645d008dcd 100644 --- a/requirements/dev.txt +++ b/requirements/dev.txt @@ -10,3 +10,4 @@ onnx==1.13.0 onnxruntime==1.14.1 pytest-csv==3.0.* tox==4.11.* +mlflow==2.9.* diff --git a/tests/conftest.py b/tests/conftest.py index 4ae77116996..e0e6956e841 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -13,6 +13,7 @@ otx_pytest_addoption_insertion, ) from .unit.api.fixtures.general import label_schema_example # noqa: F401 +import mlflow pytest_plugins = get_pytest_plugins_from_otx() # noqa: F405 @@ -93,3 +94,17 @@ def manage_tm_config_for_testing(): if created_cfg_dir: os.rmdir(cfg_dir) + + +@pytest.fixture(autouse=True, scope="session") +def init_mlflow_tracking(): + uri = os.environ.get("MLFLOW_TRACKING_SERVER_URI", "http://localhost:8080") + mlflow.set_tracking_uri(uri=uri) + + yield + + +@pytest.fixture(scope="session") +def fxt_mlflow_client(): + uri = os.environ.get("MLFLOW_TRACKING_SERVER_URI", "http://localhost:8080") + return mlflow.MlflowClient(uri) diff --git a/tests/perf/conftest.py b/tests/perf/conftest.py index d4be1d7e5ef..02d2962c053 100644 --- a/tests/perf/conftest.py +++ b/tests/perf/conftest.py @@ -2,15 +2,21 @@ # SPDX-License-Identifier: Apache-2.0 -import pytest import os +import shutil import subprocess -import yaml -from pathlib import Path -from typing import List from datetime import datetime +from pathlib import Path +from typing import Dict, List + +import mlflow +import pandas as pd +import pytest +import yaml + +from otx import __version__ as VERSION +from otx.api.entities.model_template import ModelCategory, ModelTemplate -from otx.api.entities.model_template import ModelTemplate, ModelCategory from .benchmark import OTXBenchmark @@ -139,8 +145,79 @@ def fxt_benchmark(request: pytest.FixtureRequest, fxt_output_root: Path) -> OTXB return benchmark +def logging_perf_results_to_mlflow(version: str, git_hash: str, results: pd.DataFrame, client: "MlflowClient"): + class DummyDatasetSource(mlflow.data.DatasetSource): + @staticmethod + def _get_source_type(): + return "dummy" + + class DummyDataset(mlflow.data.Dataset): + def _to_dict(self, base_dict): + return { + "name": base_dict["name"], + "digest": base_dict["digest"], + "source": base_dict["source"], + "source_type": base_dict["source_type"], + } + + exp_name = f"OTX Performance Benchmark" + exp = client.get_experiment_by_name(exp_name) + if exp is None: + exp_id = client.create_experiment(exp_name, tags={"Project": "OpenVINO Training Extensions"}) + else: + exp_id = exp.experiment_id + + mlflow.set_experiment(experiment_id=exp_id) + + rows = results.to_dict(orient="records") + for row in rows: + task = row.pop("task") + model = row.pop("model") + data = row.pop("data") + data = os.path.dirname(data) + data_sz = row.pop("data_size") + benchmark = row.pop("benchmark") + runs = client.search_runs( + exp_id, + filter_string=f"tags.task LIKE '%{task}%' AND " + f"tags.model LIKE '%{model}%' AND " + f"tags.data LIKE '%{data}%' AND " + f"tags.benchmark LIKE '%{benchmark}%'", + ) + run = None + is_new_run = True + run_name = f"[{benchmark}] {task} | {model}" + if len(runs) == 0: + run = client.create_run(exp_id, run_name=run_name) + else: + is_new_run = False + run = runs[0] + + with mlflow.start_run(run_id=run.info.run_id): + if is_new_run: + mlflow.set_tag("task", task) + mlflow.set_tag("model", model) + mlflow.set_tag("data", data) + mlflow.set_tag("benchmark", benchmark) + dat_src = DummyDatasetSource() + dataset = DummyDataset(dat_src, data, data_sz) + mlflow.log_input(dataset) + mlflow.set_tag("version", version) + mlflow.set_tag("git-hash", git_hash) + for k, v in row.items(): + if isinstance(v, int) or isinstance(v, float): + k = k.replace("(", "_") + k = k.replace(")", "") + k = k.replace("%", "percentage") + history = client.get_metric_history(run.info.run_id, k) + step = 0 + if len(history) > 0: + step = history[-1].step + 1 + mlflow.log_metric(k, v, step=step) + + @pytest.fixture(scope="session", autouse=True) -def fxt_benchmark_summary(request: pytest.FixtureRequest, fxt_output_root: Path): +def fxt_benchmark_summary(request: pytest.FixtureRequest, fxt_output_root: Path, fxt_mlflow_client): """Summarize all results at the end of test session.""" yield all_results = OTXBenchmark.load_result(fxt_output_root) @@ -152,3 +229,11 @@ def fxt_benchmark_summary(request: pytest.FixtureRequest, fxt_output_root: Path) output_path = fxt_output_root / "benchmark-summary.csv" all_results.to_csv(output_path, index=False) print(f" -> Saved to {output_path}.") + + # logging to the mlflow + version = VERSION + git_hash = str(fxt_output_root).split("-")[-1] + logging_perf_results_to_mlflow(version, git_hash, all_results, fxt_mlflow_client) + + if os.environ.get("BENCHMARK_RESULTS_CLEAR", False): + shutil.rmtree(fxt_output_root) diff --git a/tox.ini b/tox.ini index 1d38a6e391b..aef83193ed0 100644 --- a/tox.ini +++ b/tox.ini @@ -63,6 +63,10 @@ commands = deps = {[testenv]deps} -r{toxinidir}/requirements/dev.txt +passenv = + {[testenv]passenv} + MLFLOW_TRACKING_SERVER_URI + BENCHMARK_RESULTS_CLEAR commands = python -m pytest -ra --showlocals --csv={toxworkdir}/{envname}.csv {posargs:tests/integration/{[testenv]test_dir}}