Samples tests refactoring

openvinotoolkit · Jan 31, 2025 · cafc9b1 · cafc9b1
1 parent 3b13a40
commit cafc9b1
Showing 8 changed files with 269 additions and 42 deletions.
diff --git a/.github/workflows/linux.yml b/.github/workflows/linux.yml
@@ -83,9 +83,10 @@ jobs:
     runs-on: aks-linux-4-cores-16gb
     container:
       image: openvinogithubactions.azurecr.io/ov_build/ubuntu_22_04_x64:${{ needs.openvino_download.outputs.docker_tag }}
-      volumes:
+      volumes: 
         - /mount:/mount
-      options: -e SCCACHE_AZURE_BLOB_CONTAINER -e SCCACHE_AZURE_CONNECTION_STRING -v ${{ github.workspace }}:${{ github.workspace }}
+        - ${{ github.workspace }}:${{ github.workspace }}
+      options: -e SCCACHE_AZURE_BLOB_CONTAINER -e SCCACHE_AZURE_CONNECTION_STRING
     env:
       CMAKE_GENERATOR: Unix Makefiles
       OV_INSTALL_DIR: ${{ github.workspace }}/ov
@@ -316,17 +317,25 @@ jobs:
         working-directory: ${{ env.SRC_DIR }}
 
   genai_samples_tests:
-    name: Samples Tests - ${{ matrix.build-type }}
+    name: Sample ${{ matrix.test.name }} (${{ matrix.build-type }})
     strategy:
       fail-fast: false
       matrix:
         build-type: [Release]
-    needs: [ openvino_download, genai_build_cmake, genai_build_wheel, genai_build_samples ]
+        test:
+          - name: 'LLM'
+            marker: 'llm'
+            cmd: 'tests/python_tests/samples'
+          - name: 'Whisper'
+            marker: 'whisper'
+            cmd: 'tests/python_tests/samples'
+
+    needs: [ openvino_download, genai_build_cmake, genai_build_wheel, genai_build_samples ] 
     timeout-minutes: 45
     defaults:
       run:
         shell: bash
-    runs-on: aks-linux-2-cores-8gb
+    runs-on: aks-linux-8-cores-32gb
     container:
       image: openvinogithubactions.azurecr.io/ov_test/ubuntu_22_04_x64:${{ needs.openvino_download.outputs.docker_tag }}
       volumes:
@@ -338,7 +347,8 @@ jobs:
       SRC_DIR: ${{ github.workspace }}/src
       BUILD_DIR: ${{ github.workspace }}/build
       MODELS_DIR: ${{ github.workspace }}/models
-
+      TEMP_DIR: ${{ github.workspace }}/temp
+
     steps:
       - name: Clone openvino.genai
         uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
@@ -362,43 +372,17 @@ jobs:
       - name: Install Wheels
         uses: ./src/.github/actions/install_wheel
         with:
-          packages: "openvino;openvino_tokenizers[transformers];openvino_genai"
+          packages: "openvino;openvino_tokenizers[transformers];openvino_genai[testing]"
           requirements_files: "${{ env.SRC_DIR }}/samples/requirements.txt"
           local_wheel_dir: ${{ env.INSTALL_DIR }}/wheels
-
-      - name: Download & convert Models and data
-        run: |
-          mkdir -p ${{ env.MODELS_DIR }}
-          optimum-cli export openvino --trust-remote-code --model TinyLlama/TinyLlama-1.1B-Chat-v1.0 ${{ env.MODELS_DIR }}/TinyLlama-1.1B-Chat-v1.0
-          optimum-cli export openvino --trust-remote-code --model openai/whisper-tiny ${{ env.MODELS_DIR }}/whisper-tiny
-          wget https://storage.openvinotoolkit.org/models_contrib/speech/2021.2/librispeech_s5/how_are_you_doing_today.wav -O ${{ env.MODELS_DIR }}/how_are_you_doing_today.wav
-
-      - name: Test multinomial_causal_lm.py
-        if: ${{ 'Release' == matrix.build-type }} # Python bindings can be built in Release only
-        timeout-minutes: 1
-        run: ${{ env.INSTALL_DIR }}/samples/python/text_generation/multinomial_causal_lm.py ./TinyLlama-1.1B-Chat-v1.0/ 0
-        working-directory: ${{ env.MODELS_DIR }}
-
-      - name: Test whisper_speech_recognition.py
-        if: ${{ 'Release' == matrix.build-type }} # Python bindings can be built in Release only
-        timeout-minutes: 1
-        run: ${{ env.INSTALL_DIR }}/samples/python/whisper_speech_recognition/whisper_speech_recognition.py ./whisper-tiny/ how_are_you_doing_today.wav
-        working-directory: ${{ env.MODELS_DIR }}
-
-      - name: C++ Tests Prerequisites
-        run: python -m pip uninstall openvino openvino-tokenizers openvino-genai -y
-
-      - name: Test greedy_causal_lm
-        run: |
-          source ${{ env.INSTALL_DIR }}/setupvars.sh
-          ${{ env.INSTALL_DIR }}/samples_bin/greedy_causal_lm ./TinyLlama-1.1B-Chat-v1.0/ ""
-        working-directory: ${{ env.MODELS_DIR }}
-
-      - name: Test whisper_speech_recognition
-        run: |
-          source ${{ env.INSTALL_DIR }}/setupvars.sh
-          ${{ env.INSTALL_DIR }}/samples_bin/whisper_speech_recognition ./whisper-tiny/ how_are_you_doing_today.wav
-        working-directory: ${{ env.MODELS_DIR }}
+
+      - name: Test Samples (Python and C++)
+        run: python -m pytest -vv -s ${{ env.SRC_DIR }}/${{ matrix.test.cmd }} -m "${{ env.TEST_MARKERS }}"
+        env:
+          LD_LIBRARY_PATH: "${{ env.INSTALL_DIR }}/runtime/lib/intel64:${{ env.INSTALL_DIR }}/runtime/3rdparty/tbb/lib:$LD_LIBRARY_PATH" # Required for C++ samples
+          SAMPLES_PY_DIR: "${{ env.INSTALL_DIR }}/samples/python"
+          SAMPLES_CPP_DIR: "${{ env.INSTALL_DIR }}/samples_bin"
+          TEST_MARKERS: ${{ (matrix.build-type == 'Release') && matrix.test.marker || format('{0} and cpp', matrix.test.marker) }}
 
   genai_build_nodejs_bindings:
     name: Build Node.js bindings

diff --git a/pyproject.toml b/pyproject.toml
@@ -32,6 +32,8 @@ classifiers = [
 dependencies = [
     "openvino_tokenizers~=2025.1.0.0.dev"
 ]
+[project.optional-dependencies]
+testing = ["pytest>=6.0"]
 
 [tool.py-build-cmake.module]
 directory = "src/python"
@@ -63,4 +65,4 @@ build-backend = "py_build_cmake.build"
 markers = [
     "nightly",
     "precommit: (deselect with '-m \"precommit\"')",
-]
+]
diff --git a/tests/python_tests/pytest.ini b/tests/python_tests/pytest.ini
@@ -1,8 +1,20 @@
 [pytest]
 
 markers =
+    ; The following markers are defined for categorizing tests:
+    ; precommit   - Tests that should be run before committing code.
+    ; nightly     - Tests that are run as part of the nightly build process.
+    ; real_models - Tests that involve real model execution.
+    ; llm         - Tests related to large language models.
+    ; whisper     - Tests related to the Whisper model.
+    ; cpp         - Tests that involve C++ code.
+    ; py          - Tests that involve Python code.
     precommit
     nightly
     real_models
+    llm
+    whisper
+    cpp
+    py
 
 addopts = -m precommit
diff --git a/tests/python_tests/samples/conftest.py b/tests/python_tests/samples/conftest.py
@@ -0,0 +1,96 @@
+import subprocess
+import os
+import tempfile
+import pytest
+import shutil
+import logging
+
+# Configure logging
+logging.basicConfig(level=logging.INFO)
+logger = logging.getLogger(__name__)
+
+# Define model names and directories
+MODELS = {
+    "TinyLlama-1.1B-Chat-v1.0": "TinyLlama/TinyLlama-1.1B-Chat-v1.0",
+    "TinyLlama-1.1B-intermediate-step-1431k-3T": "TinyLlama/TinyLlama-1.1B-intermediate-step-1431k-3T",
+    "WhisperTiny": "openai/whisper-tiny",
+    "open_llama_3b_v2": "openlm-research/open_llama_3b_v2"
+}
+
+TEST_FILES = {
+    "how_are_you_doing_today.wav": "https://storage.openvinotoolkit.org/models_contrib/speech/2021.2/librispeech_s5/how_are_you_doing_today.wav",
+    "adapter_model.safetensors": "https://huggingface.co/smangrul/tinyllama_lora_sql/resolve/main/adapter_model.safetensors"
+}
+
+TEMP_DIR = os.environ.get("TEMP_DIR", tempfile.mkdtemp())
+MODELS_DIR = os.path.join(TEMP_DIR, "test_models")
+TEST_DATA = os.path.join(TEMP_DIR, "test_data")
+
+SAMPLES_PY_DIR = os.environ.get("SAMPLES_PY_DIR", os.getcwd())
+SAMPLES_CPP_DIR = os.environ.get("SAMPLES_CPP_DIR", os.getcwd())
+
+# A shared fixture to hold data
+@pytest.fixture(scope="session")
+def shared_data():
+    return {}
+
+@pytest.fixture(scope="session", autouse=True)
+def setup_and_teardown(request):
+    """Fixture to set up and tear down the temporary directories."""
+    logger.info(f"Creating directories: {MODELS_DIR} and {TEST_DATA}")
+    os.makedirs(MODELS_DIR, exist_ok=True)
+    os.makedirs(TEST_DATA, exist_ok=True)
+    yield
+    if not os.environ.get("TEMP_DIR"):
+        logger.info(f"Removing temporary directory: {TEMP_DIR}")
+        shutil.rmtree(TEMP_DIR)
+    else:
+        logger.info(f"Skipping cleanup of temporary directory: {TEMP_DIR}")
+
+@pytest.fixture(scope="session")
+def convert_model(request):
+    """Fixture to convert the model once for the session."""
+    params = request.param
+    model_id = params.get("model_id")
+    extra_args = params.get("extra_args", [])
+    model_name = MODELS[model_id]
+    model_path = os.path.join(MODELS_DIR, model_name)
+    logger.info(f"Preparing model: {model_name}")
+    # Convert the model if not already converted
+    if not os.path.exists(model_path):
+        logger.info(f"Converting model: {model_name}")
+        command = [
+            "optimum-cli", "export", "openvino",
+            "--model", model_name, model_path
+        ]
+        if extra_args:
+            command.extend(extra_args)
+        result = subprocess.run(command, check=True)
+        assert result.returncode == 0, f"Model {model_name} conversion failed"
+    yield model_path
+    # Cleanup the model after tests
+    if os.path.exists(model_path):
+        logger.info(f"Removing converted model: {model_path}")
+        shutil.rmtree(model_path)
+
+@pytest.fixture(scope="session")
+def download_test_content(request):
+    """Download the test content from the given URL and return the file path."""
+    file_url = request.param
+    file_name = os.path.basename(file_url)
+    file_path = os.path.join(TEST_DATA, file_name)
+    if not os.path.exists(file_path):
+        logger.info(f"Downloading test content from {file_url}...")
+        result = subprocess.run(
+            ["wget", file_url, "-O", file_path],
+            check=True
+        )
+        assert result.returncode == 0, "Failed to download test content"
+        logger.info(f"Downloaded test content to {file_path}")
+    else:
+        logger.info(f"Test content already exists at {file_path}")
+    yield file_path
+    # Cleanup the test content after tests
+    if os.path.exists(file_path):
+        logger.info(f"Removing test content: {file_path}")
+        os.remove(file_path)
diff --git a/tests/python_tests/samples/test_greedy_causal_lm.py b/tests/python_tests/samples/test_greedy_causal_lm.py
@@ -0,0 +1,42 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+import os
+import subprocess
+import pytest
+from conftest import TEST_FILES, SAMPLES_PY_DIR, SAMPLES_CPP_DIR
+
+# Greedy causal LM samples
+
+@pytest.mark.llm
+@pytest.mark.cpp
+@pytest.mark.parametrize("convert_model", [
+    {"model_id": "TinyLlama-1.1B-Chat-v1.0"}
+], indirect=["convert_model"])
+@pytest.mark.parametrize("sample_args", [""])
+def test_cpp_sample_greedy_causal_lm_tiny_llama(convert_model, sample_args):
+    cpp_sample = os.path.join(SAMPLES_CPP_DIR, 'greedy_causal_lm')
+    exit_code = subprocess.run([cpp_sample, convert_model, sample_args], check=True).returncode
+    assert exit_code == 0, "C++ sample execution failed"
+
+@pytest.mark.llm
+@pytest.mark.cpp
+@pytest.mark.parametrize("convert_model", [
+    {"model_id": "open_llama_3b_v2"}
+], indirect=["convert_model"])
+@pytest.mark.parametrize("sample_args", ["return 0"])
+def test_cpp_sample_greedy_causal_lm_open_llama(convert_model, sample_args):
+    cpp_sample = os.path.join(SAMPLES_CPP_DIR, 'greedy_causal_lm')
+    exit_code = subprocess.run([cpp_sample, convert_model, sample_args], check=True).returncode
+    assert exit_code == 0, "C++ sample execution failed"
+
+# text_generation sample
+@pytest.mark.llm
+@pytest.mark.py
+@pytest.mark.parametrize("convert_model", ["TinyLlama-1.1B-intermediate-step-1431k-3T"], indirect=["convert_model"])
+@pytest.mark.parametrize("sample_args", ["How to create a table with two columns, one of them has type float, another one has type int?"])
+@pytest.mark.parametrize("download_test_content", [TEST_FILES["adapter_model.safetensors"]], indirect=True)
+def test_python_sample_text_generation(convert_model, download_test_content, sample_args):
+    script = os.path.join(SAMPLES_PY_DIR, "text_generation/lora.py")
+    result = subprocess.run(["python", script, convert_model, download_test_content, sample_args], check=True)
+    assert result.returncode == 0, f"Script execution failed for model {convert_model}"
diff --git a/tests/python_tests/samples/test_multinomial_causal_lm.py b/tests/python_tests/samples/test_multinomial_causal_lm.py
@@ -0,0 +1,42 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+import os
+import subprocess
+import pytest
+from conftest import SAMPLES_PY_DIR, SAMPLES_CPP_DIR
+
+# multinomial_causal_lm sample
+
+@pytest.mark.llm
+@pytest.mark.py
+@pytest.mark.parametrize("convert_model", [
+    {"model_id": "TinyLlama-1.1B-Chat-v1.0"},
+], indirect=["convert_model"])
+@pytest.mark.parametrize("sample_args", ["0"])
+def test_python_sample_multinomial_causal_lm_tiny_llama(convert_model, sample_args):
+    script = os.path.join(SAMPLES_PY_DIR, "multinomial_causal_lm/multinomial_causal_lm.py")
+    result = subprocess.run(["python", script, convert_model, sample_args], check=True)
+    assert result.returncode == 0, f"Script execution failed for model {convert_model} with argument {sample_args}"
+
+@pytest.mark.llm
+@pytest.mark.py
+@pytest.mark.parametrize("convert_model", [
+    {"model_id": "open_llama_3b_v2"},
+], indirect=["convert_model"])
+@pytest.mark.parametrize("sample_args", ["a", "return 0"])
+def test_python_sample_multinomial_causal_lm_open_llama(convert_model, sample_args, shared_data):
+    script = os.path.join(SAMPLES_PY_DIR, "multinomial_causal_lm/multinomial_causal_lm.py")
+    result = subprocess.run(["python", script, convert_model, sample_args], check=True)
+    assert result.returncode == 0, f"Script execution failed for model {convert_model} with argument {sample_args}"
+    shared_data.setdefault("multinomial_causal_lm", {}).setdefault("py", {}).setdefault("open_llama_3b_v2", {})[sample_args] = result.stdout
+
+@pytest.mark.llm    
+@pytest.mark.cpp
+@pytest.mark.py
+def test_sample_multinomial_causal_lm_diff(shared_data):
+    py_result = shared_data.get("multinomial_causal_lm", {}).get("py", {}).get("open_llama_3b_v2", {}).get("return 0")
+    cpp_result = shared_data.get("multinomial_causal_lm", {}).get("cpp", {}).get("open_llama_3b_v2", {}).get("return 0")
+    if not py_result or not cpp_result:
+        pytest.skip("Skipping because one of the prior tests was skipped or failed.")
+    assert py_result == cpp_result, "Results should match"
diff --git a/tests/python_tests/samples/test_text_generation.py b/tests/python_tests/samples/test_text_generation.py
@@ -0,0 +1,21 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+import os
+import subprocess
+import pytest
+from conftest import TEST_FILES, SAMPLES_PY_DIR, SAMPLES_CPP_DIR
+
+# text_generation sample
+
+@pytest.mark.llm
+@pytest.mark.py
+@pytest.mark.parametrize("convert_model", [
+    {"model_id": "TinyLlama-1.1B-intermediate-step-1431k-3T", "extra_args": ["--trust-remote-code"]}
+], indirect=["convert_model"])
+@pytest.mark.parametrize("sample_args", ["How to create a table with two columns, one of them has type float, another one has type int?"])
+@pytest.mark.parametrize("download_test_content", [TEST_FILES["adapter_model.safetensors"]], indirect=True)
+def test_python_sample_text_generation(convert_model, download_test_content, sample_args):
+    script = os.path.join(SAMPLES_PY_DIR, "text_generation/lora.py")
+    result = subprocess.run(["python", script, convert_model, download_test_content, sample_args], check=True)
+    assert result.returncode == 0, f"Script execution failed for model {convert_model}"
diff --git a/tests/python_tests/samples/test_whisper_speech_recognition.py b/tests/python_tests/samples/test_whisper_speech_recognition.py
@@ -0,0 +1,28 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+import os
+import subprocess
+import pytest
+from conftest import TEST_FILES, SAMPLES_PY_DIR, SAMPLES_CPP_DIR
+
+# whisper_speech_recognition sample
+@pytest.mark.whisper
+@pytest.mark.py
+@pytest.mark.parametrize("convert_model", [{"model_id": "WhisperTiny", "extra_args": ["--trust-remote-code"]}], 
+                         indirect=True, ids=lambda p: f"model={p['model_id']}")
+@pytest.mark.parametrize("download_test_content", [TEST_FILES["how_are_you_doing_today.wav"]], indirect=True)
+def test_python_sample_whisper_speech_recognition(convert_model, download_test_content):
+    script = os.path.join(SAMPLES_PY_DIR, "whisper_speech_recognition/whisper_speech_recognition.py")
+    result = subprocess.run(["python", script, convert_model, download_test_content], check=True)
+    assert result.returncode == 0, f"Script execution failed for model {convert_model}"
+
+@pytest.mark.whisper
+@pytest.mark.cpp
+@pytest.mark.parametrize("convert_model", [{"model_id": "WhisperTiny"}], 
+                         indirect=True, ids=lambda p: f"model={p['model_id']}")
+@pytest.mark.parametrize("download_test_content", [TEST_FILES["how_are_you_doing_today.wav"]], indirect=True)
+def test_cpp_sample_whisper_speech_recognition(convert_model, download_test_content):
+    cpp_sample = os.path.join(SAMPLES_CPP_DIR, 'whisper_speech_recognition')
+    exit_code = subprocess.run([cpp_sample, convert_model, download_test_content], check=True).returncode
+    assert exit_code == 0, "C++ sample execution failed"