Disable Resnext101_ATSS model on XPU (#3514)

* diable resnext101_atss on XPU * give detailed xpu device info to perf tag * revert debug code --------- Co-authored-by: kirill prokofiev <kirill.prokofiev@intel.com>
openvinotoolkit · May 17, 2024 · 7d89f03 · 7d89f03
1 parent e9c3c9b
commit 7d89f03
Show file tree

Hide file tree

Showing 8 changed files with 55 additions and 12 deletions.
diff --git a/pyproject.toml b/pyproject.toml
@@ -347,9 +347,9 @@ max-returns = 10
 convention = "google"
 
 [tool.pytest.ini_options]
-# TODO: Add cpu when OTX can run integration test parallelly for each task.
 markers = [
-    "gpu: mark tests which require NVIDIA GPU device",
-    # "cpu: mark tests which require CPU device",
+    "gpu",  # mark tests which require NVIDIA GPU
+    "cpu",
+    "xpu",  # mark tests which require Intel dGPU
 ]
 python_files = "tests/**/*.py"
diff --git a/src/otx/algo/detection/atss.py b/src/otx/algo/detection/atss.py
@@ -40,6 +40,7 @@
 if TYPE_CHECKING:
     from lightning.pytorch.cli import LRSchedulerCallable, OptimizerCallable
     from torch import Tensor, nn
+    from typing_extensions import Self
 
     from otx.core.metrics import MetricCallable
 
@@ -362,3 +363,11 @@ def _build_model(self, num_classes: int) -> SingleStageDetector:
             test_cfg=test_cfg,
         )
         return SingleStageDetector(backbone, bbox_head, neck=neck, train_cfg=train_cfg, test_cfg=test_cfg)
+
+    def to(self, *args, **kwargs) -> Self:
+        """Return a model with specified device."""
+        ret = super().to(*args, **kwargs)
+        if self.device.type == "xpu":
+            msg = f"{type(self).__name__} doesn't support XPU."
+            raise RuntimeError(msg)
+        return ret
diff --git a/tests/conftest.py b/tests/conftest.py
@@ -133,6 +133,13 @@ def pytest_addoption(parser: pytest.Parser):
         type=str,
         help="Task type of OTX to use test.",
     )
+    parser.addoption(
+        "--device",
+        action="store",
+        default="gpu",
+        type=str,
+        help="Which device to use.",
+    )
 
 
 @pytest.fixture(scope="session")
@@ -344,10 +351,9 @@ def fxt_clean_up_mem_cache():
     MemCacheHandlerSingleton.delete()
 
 
-# TODO(Jaeguk): Add cpu param when OTX can run integration test parallelly for each task.
-@pytest.fixture(scope="module", params=[pytest.param("gpu", marks=pytest.mark.gpu)])
+@pytest.fixture(scope="session")
 def fxt_accelerator(request: pytest.FixtureRequest) -> str:
-    return request.param
+    return request.config.getoption("--device", "gpu")
 
 
 @pytest.fixture(params=set(OTXTaskType) - {OTXTaskType.DETECTION_SEMI_SL})

diff --git a/tests/perf/conftest.py b/tests/perf/conftest.py
@@ -227,19 +227,19 @@ def fxt_dataset(request: pytest.FixtureRequest, fxt_data_group) -> Benchmark.Dat
 
 
 @pytest.fixture(scope="session")
-def fxt_tags(fxt_user_name: str, fxt_version_tags: dict[str, str]) -> dict[str, str]:
+def fxt_tags(fxt_user_name: str, fxt_version_tags: dict[str, str], fxt_accelerator: str) -> dict[str, str]:
     """Tag fields to record the machine and user executing this perf test."""
     tags = {
         **fxt_version_tags,
         "user_name": fxt_user_name,
         "machine_name": platform.node(),
         "cpu_info": get_cpu_info()["brand_raw"],
-        "accelerator_info": subprocess.check_output(
-            ["nvidia-smi", "-L"],  # noqa: S603, S607
-        )
-        .decode()
-        .strip(),
     }
+    if fxt_accelerator == "gpu":
+        tags["accelerator_info"] = subprocess.check_output(["nvidia-smi", "-L"]).decode().strip()  # noqa: S603, S607
+    elif fxt_accelerator == "xpu":
+        raw = subprocess.check_output(args=["xpu-smi", "discovery", "--dump", "1,2"]).decode().strip()
+        tags["accelerator_info"] = "\n".join([ret.replace('"', "").replace(",", " : ") for ret in raw.split("\n")[1:]])
     msg = f"{tags = }"
     log.info(msg)
     return tags

diff --git a/tests/perf/test_classification.py b/tests/perf/test_classification.py
@@ -80,7 +80,11 @@ def test_perf(
         fxt_model: Benchmark.Model,
         fxt_dataset: Benchmark.Dataset,
         fxt_benchmark: Benchmark,
+        fxt_accelerator: str,
     ):
+        if fxt_model.name == "dino_v2" and fxt_accelerator == "xpu":
+            pytest.skip(f"{fxt_model.name} doesn't support {fxt_accelerator}.")
+
         self._test_perf(
             model=fxt_model,
             dataset=fxt_dataset,
@@ -155,7 +159,11 @@ def test_perf(
         fxt_model: Benchmark.Model,
         fxt_dataset: Benchmark.Dataset,
         fxt_benchmark: Benchmark,
+        fxt_accelerator: str,
     ):
+        if fxt_model.name == "dino_v2" and fxt_accelerator == "xpu":
+            pytest.skip(f"{fxt_model.name} doesn't support {fxt_accelerator}.")
+
         self._test_perf(
             model=fxt_model,
             dataset=fxt_dataset,
@@ -224,7 +232,11 @@ def test_perf(
         fxt_model: Benchmark.Model,
         fxt_dataset: Benchmark.Dataset,
         fxt_benchmark: Benchmark,
+        fxt_accelerator: str,
     ):
+        if fxt_model.name == "dino_v2" and fxt_accelerator == "xpu":
+            pytest.skip(f"{fxt_model.name} doesn't support {fxt_accelerator}.")
+
         self._test_perf(
             model=fxt_model,
             dataset=fxt_dataset,

diff --git a/tests/perf/test_detection.py b/tests/perf/test_detection.py
@@ -103,7 +103,11 @@ def test_perf(
         fxt_model: Benchmark.Model,
         fxt_dataset: Benchmark.Dataset,
         fxt_benchmark: Benchmark,
+        fxt_accelerator: str,
     ):
+        if fxt_model.name == "atss_resnext101" and fxt_accelerator == "xpu":
+            pytest.skip(f"{fxt_model.name} doesn't support {fxt_accelerator}.")
+
         self._test_perf(
             model=fxt_model,
             dataset=fxt_dataset,

diff --git a/tests/perf/test_instance_segmentation.py b/tests/perf/test_instance_segmentation.py
@@ -108,7 +108,11 @@ def test_perf(
         fxt_model: Benchmark.Model,
         fxt_dataset: Benchmark.Dataset,
         fxt_benchmark: Benchmark,
+        fxt_accelerator: str,
     ):
+        if fxt_model.name == "maskrcnn_r50" and fxt_accelerator == "xpu":
+            pytest.skip(f"{fxt_model.name} doesn't support {fxt_accelerator}.")
+
         self._test_perf(
             model=fxt_model,
             dataset=fxt_dataset,
@@ -196,7 +200,11 @@ def test_perf(
         fxt_model: Benchmark.Model,
         fxt_dataset: Benchmark.Dataset,
         fxt_benchmark: Benchmark,
+        fxt_accelerator: str,
     ):
+        if fxt_model.name == "maskrcnn_r50" and fxt_accelerator == "xpu":
+            pytest.skip(f"{fxt_model.name} doesn't support {fxt_accelerator}.")
+
         self._test_perf(
             model=fxt_model,
             dataset=fxt_dataset,

diff --git a/tests/perf/test_semantic_segmentation.py b/tests/perf/test_semantic_segmentation.py
@@ -82,7 +82,11 @@ def test_perf(
         fxt_model: Benchmark.Model,
         fxt_dataset: Benchmark.Dataset,
         fxt_benchmark: Benchmark,
+        fxt_accelerator: str,
     ):
+        if fxt_model.name == "dino_v2" and fxt_accelerator == "xpu":
+            pytest.skip(f"{fxt_model.name} doesn't support {fxt_accelerator}.")
+
         self._test_perf(
             model=fxt_model,
             dataset=fxt_dataset,