From 6e935064dbe36d97f7bf1695c11e6de3e4ba086b Mon Sep 17 00:00:00 2001
From: Zach Kimberg <kimbergz@amazon.com>
Date: Tue, 9 Jul 2024 12:56:53 -0700
Subject: [PATCH 1/2] [CI] Add integration pytest marks

This adds marks to the pytest integration suite. They can be used to run
suites of tests outside of just the class restrictions. Specifically,
it is useful for running tests for a feature, backend, or instance
type.
---
 .github/workflows/integration_execute.yml | 16 ++++++--
 tests/integration/pytest.ini              | 14 +++++++
 tests/integration/tests.py                | 47 +++++++++++++++--------
 3 files changed, 57 insertions(+), 20 deletions(-)
 create mode 100644 tests/integration/pytest.ini

diff --git a/.github/workflows/integration_execute.yml b/.github/workflows/integration_execute.yml
index efdf85218..586e0fbb1 100644
--- a/.github/workflows/integration_execute.yml
+++ b/.github/workflows/integration_execute.yml
@@ -4,8 +4,13 @@ on:
   workflow_dispatch:
     inputs:
       test:
-        description: 'Which test to run. Should be a class in tests.py or TestClass::test_fun_name'
-        required: true
+        description: 'Which test to run as a pytest keyword expression. Should be a class in tests.py or TestClass::test_fun_name'
+        required: false
+        default: ""
+      mark:
+        description: 'Which tests to run as a pytest marker expression. Should be a mark from pytest.ini'
+        required: false
+        default: ""
       instance:
         description: 'Instance used for testing'
         required: true
@@ -72,6 +77,11 @@ jobs:
           sudo apt-get install python3 python-is-python3 python3-pip -y
       - name: Install pip dependencies
         run: pip3 install pytest requests "numpy<2" pillow huggingface_hub
+      - name: Install torch
+        # Use torch to get cuda capability of current device to selectively run tests
+        # Torch version doesn't really matter that much
+        run: |
+          pip3 install torch==2.3.0
       - name: install awscli
         run: |
           sudo apt-get update
@@ -96,7 +106,7 @@ jobs:
         env:
           TEST_DJL_VERSION: ${{ inputs.djl-version }}
         run: |
-          python -m pytest -k ${{ inputs.test }} tests.py
+          python -m pytest tests.py -k "${{ inputs.test }}"  -m "${{ inputs.mark }}"
       - name: Cleanup
         working-directory: tests/integration
         run: |
diff --git a/tests/integration/pytest.ini b/tests/integration/pytest.ini
new file mode 100644
index 000000000..1a822ab95
--- /dev/null
+++ b/tests/integration/pytest.ini
@@ -0,0 +1,14 @@
+[pytest]
+markers =
+    gpu: Runs on any gpu machine
+    gpu_4: Runs on a machine with at least 4 gpus (includes gpu mark)
+    inf: Runs on inferentia
+    aarch64: Runs on aarch64
+    cpu: Tests cpu
+
+    hf: Tests the hf accelerate backend
+    lmi_dist: Tests the lmi backend
+    vllm: Tests the vllm backend
+    trtllm: Tests the trtllm backend
+
+    lora: Tests lora feature
diff --git a/tests/integration/tests.py b/tests/integration/tests.py
index d405f9392..94f2a194b 100644
--- a/tests/integration/tests.py
+++ b/tests/integration/tests.py
@@ -84,8 +84,8 @@ def launch(self, env_vars=None, container=None, cmd=None):
             capture_output=True)
 
 
+@pytest.mark.cpu
 class TestCpuFull:
-    # Runs on cpu
     def test_python_model(self):
         with Runner('cpu-full', 'python_model', download=True) as r:
             r.launch(
@@ -110,9 +110,9 @@ def test_python_dynamic_batch(self):
             os.system("./test_client.sh image/jpg models/kitten.jpg")
 
 
+@pytest.mark.cpu
 @pytest.mark.parametrize('arch', ["cpu", "cpu-full"])
 class TestCpuBoth:
-    # Runs on cpu
     def test_pytorch(self, arch):
         with Runner(arch, 'pytorch', download=True) as r:
             r.launch(
@@ -166,8 +166,9 @@ def test_tensorflow_binary(self, arch):
             os.system("./test_client.sh tensor/ndlist 1,224,224,3")
 
 
+@pytest.mark.gpu
+@pytest.mark.gpu_4
 class TestGpu:
-    # Runs on any gpu instance
     def test_python_model(self):
         with Runner('pytorch-gpu', 'python_model', download=True) as r:
             r.launch(
@@ -185,8 +186,8 @@ def test_pytorch(self):
             os.system("./test_client.sh image/jpg models/kitten.jpg")
 
 
+@pytest.mark.aarch64
 class TestAarch64:
-    # Runs on aarch64
     def test_pytorch(self):
         with Runner('aarch64', 'pytorch_model', download=True) as r:
             r.launch(
@@ -204,8 +205,9 @@ def test_onnx(self):
             os.system("./test_client.sh image/jpg models/kitten.jpg")
 
 
+@pytest.mark.hf
+@pytest.mark.gpu_4
 class TestHfHandler:
-    # Runs on g5.12xl
     def test_gpt_neo(self):
         with Runner('lmi', 'test_gpt4all_lora') as r:
             prepare.build_hf_handler_model("gpt-neo-2.7b")
@@ -249,8 +251,9 @@ def test_streaming_t5_large(self):
             client.run("huggingface t5-large".split())
 
 
+@pytest.mark.trtllm
+@pytest.mark.gpu_4
 class TestTrtLlmHandler1:
-    # Runs on g5.12xl
     def test_llama2_13b_tp4(self):
         with Runner('tensorrt-llm', 'llama2-13b') as r:
             prepare.build_trtllm_handler_model("llama2-13b")
@@ -294,8 +297,9 @@ def test_santacoder(self):
             client.run("trtllm santacoder".split())
 
 
+@pytest.mark.trtllm
+@pytest.mark.gpu_4
 class TestTrtLlmHandler2:
-    # Runs on g5.12xl
     def test_llama2_7b_hf_smoothquant(self):
         with Runner('tensorrt-llm', 'llama2-7b-smoothquant') as r:
             prepare.build_trtllm_handler_model("llama2-7b-smoothquant")
@@ -327,8 +331,9 @@ def test_llama2_7b_chat(self):
             client.run("trtllm_chat llama2-7b-chat".split())
 
 
+@pytest.mark.lmi_dist
+@pytest.mark.gpu_4
 class TestSchedulerSingleGPU:
-    # Runs on g5.12xl
 
     def test_gpt2(self):
         with Runner('lmi', 'gpt2') as r:
@@ -343,8 +348,9 @@ def test_bllm(self):
             rb_client.run("scheduler_single_gpu bloom-560m".split())
 
 
+@pytest.mark.lmi_dist
+@pytest.mark.gpu_4
 class TestSchedulerMultiGPU:
-    # Runs on g5.12xl
 
     def test_gptj_6b(self):
         with Runner('lmi', 'gpt-j-6b') as r:
@@ -353,8 +359,9 @@ def test_gptj_6b(self):
             rb_client.run("scheduler_multi_gpu gpt-j-6b".split())
 
 
+@pytest.mark.lmi_dist
+@pytest.mark.gpu_4
 class TestLmiDist1:
-    # Runs on g5.12xl
 
     def test_gpt_neox_20b(self):
         with Runner('lmi', 'gpt-neox-20b') as r:
@@ -421,8 +428,9 @@ def test_llama2_tiny_autoawq(self):
         os.system('sudo rm -rf models')
 
 
+@pytest.mark.lmi_dist
+@pytest.mark.gpu_4
 class TestLmiDist2:
-    # Runs on g5.12xl
 
     def test_gpt_neox_20b(self):
         with Runner('lmi', 'octocoder') as r:
@@ -479,8 +487,9 @@ def test_llama2_7b_chat(self):
             client.run("lmi_dist_chat llama2-7b-chat".split())
 
 
+@pytest.mark.vllm
+@pytest.mark.gpu_4
 class TestVllm1:
-    # Runs on g5.12xl
 
     def test_gpt_neox_20b(self):
         with Runner('lmi', 'gpt-neox-20b') as r:
@@ -527,8 +536,10 @@ def test_qwen2_7b_fp8(self):
             client.run("vllm qwen2-7b-fp8".split())
 
 
+@pytest.mark.vllm
+@pytest.mark.lora
+@pytest.mark.gpu_4
 class TestVllmLora:
-    # Runs on g5.12xl
 
     def test_lora_unmerged(self):
         with Runner('lmi', 'llama-7b-unmerged-lora') as r:
@@ -567,8 +578,10 @@ def test_lora_llama3_8b(self):
             client.run("vllm_adapters llama3-8b-unmerged-lora".split())
 
 
+@pytest.mark.lmi_dist
+@pytest.mark.lora
+@pytest.mark.gpu_4
 class TestLmiDistLora:
-    # Runs on g5.12xl
 
     def test_lora_unmerged(self):
         with Runner('lmi', 'llama-7b-unmerged-lora') as r:
@@ -610,8 +623,8 @@ def test_lora_llama3_8b(self):
             client.run("lmi_dist_adapters llama3-8b-unmerged-lora".split())
 
 
+@pytest.mark.inf
 class TestNeuronx1:
-    # Runs on inf2.24xl
 
     def test_python_mode(self):
         with Runner('pytorch-inf2', 'test_python_mode', download=True) as r:
@@ -687,8 +700,8 @@ def test_partition(self, model):
             os.system('sudo rm -rf models')
 
 
+@pytest.mark.inf
 class TestNeuronx2:
-    # Runs on inf2.24xl
 
     def test_stream_opt(self):
         with Runner('pytorch-inf2', 'opt-1.3b-streaming') as r:
@@ -728,8 +741,8 @@ def test_stable_diffusion_xl(self):
                 "neuron-stable-diffusion stable-diffusion-xl-neuron".split())
 
 
+@pytest.mark.inf
 class TestNeuronxRollingBatch:
-    # Runs on inf2.24xl
 
     def test_llama_7b(self):
         with Runner('pytorch-inf2', 'llama-7b-rb') as r:

From e1e4964ee051e208fd9c2369862b61e54ee420a3 Mon Sep 17 00:00:00 2001
From: Zach Kimberg <kimbergz@amazon.com>
Date: Mon, 15 Jul 2024 13:21:06 -0700
Subject: [PATCH 2/2] Formatter

---
 tests/integration/tests.py | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/tests/integration/tests.py b/tests/integration/tests.py
index 94f2a194b..1690083f7 100644
--- a/tests/integration/tests.py
+++ b/tests/integration/tests.py
@@ -86,6 +86,7 @@ def launch(self, env_vars=None, container=None, cmd=None):
 
 @pytest.mark.cpu
 class TestCpuFull:
+
     def test_python_model(self):
         with Runner('cpu-full', 'python_model', download=True) as r:
             r.launch(
@@ -113,6 +114,7 @@ def test_python_dynamic_batch(self):
 @pytest.mark.cpu
 @pytest.mark.parametrize('arch', ["cpu", "cpu-full"])
 class TestCpuBoth:
+
     def test_pytorch(self, arch):
         with Runner(arch, 'pytorch', download=True) as r:
             r.launch(
@@ -169,6 +171,7 @@ def test_tensorflow_binary(self, arch):
 @pytest.mark.gpu
 @pytest.mark.gpu_4
 class TestGpu:
+
     def test_python_model(self):
         with Runner('pytorch-gpu', 'python_model', download=True) as r:
             r.launch(
@@ -188,6 +191,7 @@ def test_pytorch(self):
 
 @pytest.mark.aarch64
 class TestAarch64:
+
     def test_pytorch(self):
         with Runner('aarch64', 'pytorch_model', download=True) as r:
             r.launch(
@@ -208,6 +212,7 @@ def test_onnx(self):
 @pytest.mark.hf
 @pytest.mark.gpu_4
 class TestHfHandler:
+
     def test_gpt_neo(self):
         with Runner('lmi', 'test_gpt4all_lora') as r:
             prepare.build_hf_handler_model("gpt-neo-2.7b")
@@ -254,6 +259,7 @@ def test_streaming_t5_large(self):
 @pytest.mark.trtllm
 @pytest.mark.gpu_4
 class TestTrtLlmHandler1:
+
     def test_llama2_13b_tp4(self):
         with Runner('tensorrt-llm', 'llama2-13b') as r:
             prepare.build_trtllm_handler_model("llama2-13b")
@@ -300,6 +306,7 @@ def test_santacoder(self):
 @pytest.mark.trtllm
 @pytest.mark.gpu_4
 class TestTrtLlmHandler2:
+
     def test_llama2_7b_hf_smoothquant(self):
         with Runner('tensorrt-llm', 'llama2-7b-smoothquant') as r:
             prepare.build_trtllm_handler_model("llama2-7b-smoothquant")