deepjavalibrary · zachgk · Jul 16, 2024 · Jul 9, 2024 · Jul 15, 2024
@@ -4,8 +4,13 @@ on:
   workflow_dispatch:
     inputs:
       test:
-        description: 'Which test to run. Should be a class in tests.py or TestClass::test_fun_name'
-        required: true
+        description: 'Which test to run as a pytest keyword expression. Should be a class in tests.py or TestClass::test_fun_name'
+        required: false
+        default: ""
+      mark:
+        description: 'Which tests to run as a pytest marker expression. Should be a mark from pytest.ini'
+        required: false
+        default: ""
       instance:
         description: 'Instance used for testing'
         required: true
@@ -72,6 +77,11 @@ jobs:
           sudo apt-get install python3 python-is-python3 python3-pip -y
       - name: Install pip dependencies
         run: pip3 install pytest requests "numpy<2" pillow huggingface_hub
+      - name: Install torch
+        # Use torch to get cuda capability of current device to selectively run tests
+        # Torch version doesn't really matter that much
+        run: |
+          pip3 install torch==2.3.0
       - name: install awscli
         run: |
           sudo apt-get update
@@ -96,7 +106,7 @@ jobs:
         env:
           TEST_DJL_VERSION: ${{ inputs.djl-version }}
         run: |
-          python -m pytest -k ${{ inputs.test }} tests.py
+          python -m pytest tests.py -k "${{ inputs.test }}"  -m "${{ inputs.mark }}"
       - name: Cleanup
         working-directory: tests/integration
         run: |

@@ -0,0 +1,14 @@
+[pytest]
+markers =
+    gpu: Runs on any gpu machine
+    gpu_4: Runs on a machine with at least 4 gpus (includes gpu mark)
+    inf: Runs on inferentia
+    aarch64: Runs on aarch64
+    cpu: Tests cpu
+
+    hf: Tests the hf accelerate backend
+    lmi_dist: Tests the lmi backend
+    vllm: Tests the vllm backend
+    trtllm: Tests the trtllm backend
+
+    lora: Tests lora feature
@@ -84,8 +84,9 @@ def launch(self, env_vars=None, container=None, cmd=None):
             capture_output=True)
 
 
+@pytest.mark.cpu
 class TestCpuFull:
-    # Runs on cpu
+
     def test_python_model(self):
         with Runner('cpu-full', 'python_model', download=True) as r:
             r.launch(
@@ -110,9 +111,10 @@ def test_python_dynamic_batch(self):
             os.system("./test_client.sh image/jpg models/kitten.jpg")
 
 
+@pytest.mark.cpu
 @pytest.mark.parametrize('arch', ["cpu", "cpu-full"])
 class TestCpuBoth:
-    # Runs on cpu
+
     def test_pytorch(self, arch):
         with Runner(arch, 'pytorch', download=True) as r:
             r.launch(
@@ -166,8 +168,10 @@ def test_tensorflow_binary(self, arch):
             os.system("./test_client.sh tensor/ndlist 1,224,224,3")
 
 
+@pytest.mark.gpu
+@pytest.mark.gpu_4
 class TestGpu:
-    # Runs on any gpu instance
+
     def test_python_model(self):
         with Runner('pytorch-gpu', 'python_model', download=True) as r:
             r.launch(
@@ -185,8 +189,9 @@ def test_pytorch(self):
             os.system("./test_client.sh image/jpg models/kitten.jpg")
 
 
+@pytest.mark.aarch64
 class TestAarch64:
-    # Runs on aarch64
+
     def test_pytorch(self):
         with Runner('aarch64', 'pytorch_model', download=True) as r:
             r.launch(
@@ -204,8 +209,10 @@ def test_onnx(self):
             os.system("./test_client.sh image/jpg models/kitten.jpg")
 
 
+@pytest.mark.hf
+@pytest.mark.gpu_4
 class TestHfHandler:
-    # Runs on g5.12xl
+
     def test_gpt_neo(self):
         with Runner('lmi', 'test_gpt4all_lora') as r:
             prepare.build_hf_handler_model("gpt-neo-2.7b")
@@ -249,8 +256,10 @@ def test_streaming_t5_large(self):
             client.run("huggingface t5-large".split())
 
 
+@pytest.mark.trtllm
+@pytest.mark.gpu_4
 class TestTrtLlmHandler1:
-    # Runs on g5.12xl
+
     def test_llama2_13b_tp4(self):
         with Runner('tensorrt-llm', 'llama2-13b') as r:
             prepare.build_trtllm_handler_model("llama2-13b")
@@ -294,8 +303,10 @@ def test_santacoder(self):
             client.run("trtllm santacoder".split())
 
 
+@pytest.mark.trtllm
+@pytest.mark.gpu_4
 class TestTrtLlmHandler2:
-    # Runs on g5.12xl
+
     def test_llama2_7b_hf_smoothquant(self):
         with Runner('tensorrt-llm', 'llama2-7b-smoothquant') as r:
             prepare.build_trtllm_handler_model("llama2-7b-smoothquant")
@@ -327,8 +338,9 @@ def test_llama2_7b_chat(self):
             client.run("trtllm_chat llama2-7b-chat".split())
 
 
+@pytest.mark.lmi_dist
+@pytest.mark.gpu_4
 class TestSchedulerSingleGPU:
-    # Runs on g5.12xl
 
     def test_gpt2(self):
         with Runner('lmi', 'gpt2') as r:
@@ -343,8 +355,9 @@ def test_bllm(self):
             rb_client.run("scheduler_single_gpu bloom-560m".split())
 
 
+@pytest.mark.lmi_dist
+@pytest.mark.gpu_4
 class TestSchedulerMultiGPU:
-    # Runs on g5.12xl
 
     def test_gptj_6b(self):
         with Runner('lmi', 'gpt-j-6b') as r:
@@ -353,8 +366,9 @@ def test_gptj_6b(self):
             rb_client.run("scheduler_multi_gpu gpt-j-6b".split())
 
 
+@pytest.mark.lmi_dist
+@pytest.mark.gpu_4
 class TestLmiDist1:
-    # Runs on g5.12xl
 
     def test_gpt_neox_20b(self):
         with Runner('lmi', 'gpt-neox-20b') as r:
@@ -421,8 +435,9 @@ def test_llama2_tiny_autoawq(self):
         os.system('sudo rm -rf models')
 
 
+@pytest.mark.lmi_dist
+@pytest.mark.gpu_4
 class TestLmiDist2:
-    # Runs on g5.12xl
 
     def test_gpt_neox_20b(self):
         with Runner('lmi', 'octocoder') as r:
@@ -479,8 +494,9 @@ def test_llama2_7b_chat(self):
             client.run("lmi_dist_chat llama2-7b-chat".split())
 
 
+@pytest.mark.vllm
+@pytest.mark.gpu_4
 class TestVllm1:
-    # Runs on g5.12xl
 
     def test_gpt_neox_20b(self):
         with Runner('lmi', 'gpt-neox-20b') as r:
@@ -527,8 +543,10 @@ def test_qwen2_7b_fp8(self):
             client.run("vllm qwen2-7b-fp8".split())
 
 
+@pytest.mark.vllm
+@pytest.mark.lora
+@pytest.mark.gpu_4
 class TestVllmLora:
-    # Runs on g5.12xl
 
     def test_lora_unmerged(self):
         with Runner('lmi', 'llama-7b-unmerged-lora') as r:
@@ -567,8 +585,10 @@ def test_lora_llama3_8b(self):
             client.run("vllm_adapters llama3-8b-unmerged-lora".split())
 
 
+@pytest.mark.lmi_dist
+@pytest.mark.lora
+@pytest.mark.gpu_4
 class TestLmiDistLora:
-    # Runs on g5.12xl
 
     def test_lora_unmerged(self):
         with Runner('lmi', 'llama-7b-unmerged-lora') as r:
@@ -610,8 +630,8 @@ def test_lora_llama3_8b(self):
             client.run("lmi_dist_adapters llama3-8b-unmerged-lora".split())
 
 
+@pytest.mark.inf
 class TestNeuronx1:
-    # Runs on inf2.24xl
 
     def test_python_mode(self):
         with Runner('pytorch-inf2', 'test_python_mode', download=True) as r:
@@ -687,8 +707,8 @@ def test_partition(self, model):
             os.system('sudo rm -rf models')
 
 
+@pytest.mark.inf
 class TestNeuronx2:
-    # Runs on inf2.24xl
 
     def test_stream_opt(self):
         with Runner('pytorch-inf2', 'opt-1.3b-streaming') as r:
@@ -728,8 +748,8 @@ def test_stable_diffusion_xl(self):
                 "neuron-stable-diffusion stable-diffusion-xl-neuron".split())
 
 
+@pytest.mark.inf
 class TestNeuronxRollingBatch:
-    # Runs on inf2.24xl
 
     def test_llama_7b(self):
         with Runner('pytorch-inf2', 'llama-7b-rb') as r: