From 6e935064dbe36d97f7bf1695c11e6de3e4ba086b Mon Sep 17 00:00:00 2001 From: Zach Kimberg Date: Tue, 9 Jul 2024 12:56:53 -0700 Subject: [PATCH 1/2] [CI] Add integration pytest marks This adds marks to the pytest integration suite. They can be used to run suites of tests outside of just the class restrictions. Specifically, it is useful for running tests for a feature, backend, or instance type. --- .github/workflows/integration_execute.yml | 16 ++++++-- tests/integration/pytest.ini | 14 +++++++ tests/integration/tests.py | 47 +++++++++++++++-------- 3 files changed, 57 insertions(+), 20 deletions(-) create mode 100644 tests/integration/pytest.ini diff --git a/.github/workflows/integration_execute.yml b/.github/workflows/integration_execute.yml index efdf85218..586e0fbb1 100644 --- a/.github/workflows/integration_execute.yml +++ b/.github/workflows/integration_execute.yml @@ -4,8 +4,13 @@ on: workflow_dispatch: inputs: test: - description: 'Which test to run. Should be a class in tests.py or TestClass::test_fun_name' - required: true + description: 'Which test to run as a pytest keyword expression. Should be a class in tests.py or TestClass::test_fun_name' + required: false + default: "" + mark: + description: 'Which tests to run as a pytest marker expression. Should be a mark from pytest.ini' + required: false + default: "" instance: description: 'Instance used for testing' required: true @@ -72,6 +77,11 @@ jobs: sudo apt-get install python3 python-is-python3 python3-pip -y - name: Install pip dependencies run: pip3 install pytest requests "numpy<2" pillow huggingface_hub + - name: Install torch + # Use torch to get cuda capability of current device to selectively run tests + # Torch version doesn't really matter that much + run: | + pip3 install torch==2.3.0 - name: install awscli run: | sudo apt-get update @@ -96,7 +106,7 @@ jobs: env: TEST_DJL_VERSION: ${{ inputs.djl-version }} run: | - python -m pytest -k ${{ inputs.test }} tests.py + python -m pytest tests.py -k "${{ inputs.test }}" -m "${{ inputs.mark }}" - name: Cleanup working-directory: tests/integration run: | diff --git a/tests/integration/pytest.ini b/tests/integration/pytest.ini new file mode 100644 index 000000000..1a822ab95 --- /dev/null +++ b/tests/integration/pytest.ini @@ -0,0 +1,14 @@ +[pytest] +markers = + gpu: Runs on any gpu machine + gpu_4: Runs on a machine with at least 4 gpus (includes gpu mark) + inf: Runs on inferentia + aarch64: Runs on aarch64 + cpu: Tests cpu + + hf: Tests the hf accelerate backend + lmi_dist: Tests the lmi backend + vllm: Tests the vllm backend + trtllm: Tests the trtllm backend + + lora: Tests lora feature diff --git a/tests/integration/tests.py b/tests/integration/tests.py index d405f9392..94f2a194b 100644 --- a/tests/integration/tests.py +++ b/tests/integration/tests.py @@ -84,8 +84,8 @@ def launch(self, env_vars=None, container=None, cmd=None): capture_output=True) +@pytest.mark.cpu class TestCpuFull: - # Runs on cpu def test_python_model(self): with Runner('cpu-full', 'python_model', download=True) as r: r.launch( @@ -110,9 +110,9 @@ def test_python_dynamic_batch(self): os.system("./test_client.sh image/jpg models/kitten.jpg") +@pytest.mark.cpu @pytest.mark.parametrize('arch', ["cpu", "cpu-full"]) class TestCpuBoth: - # Runs on cpu def test_pytorch(self, arch): with Runner(arch, 'pytorch', download=True) as r: r.launch( @@ -166,8 +166,9 @@ def test_tensorflow_binary(self, arch): os.system("./test_client.sh tensor/ndlist 1,224,224,3") +@pytest.mark.gpu +@pytest.mark.gpu_4 class TestGpu: - # Runs on any gpu instance def test_python_model(self): with Runner('pytorch-gpu', 'python_model', download=True) as r: r.launch( @@ -185,8 +186,8 @@ def test_pytorch(self): os.system("./test_client.sh image/jpg models/kitten.jpg") +@pytest.mark.aarch64 class TestAarch64: - # Runs on aarch64 def test_pytorch(self): with Runner('aarch64', 'pytorch_model', download=True) as r: r.launch( @@ -204,8 +205,9 @@ def test_onnx(self): os.system("./test_client.sh image/jpg models/kitten.jpg") +@pytest.mark.hf +@pytest.mark.gpu_4 class TestHfHandler: - # Runs on g5.12xl def test_gpt_neo(self): with Runner('lmi', 'test_gpt4all_lora') as r: prepare.build_hf_handler_model("gpt-neo-2.7b") @@ -249,8 +251,9 @@ def test_streaming_t5_large(self): client.run("huggingface t5-large".split()) +@pytest.mark.trtllm +@pytest.mark.gpu_4 class TestTrtLlmHandler1: - # Runs on g5.12xl def test_llama2_13b_tp4(self): with Runner('tensorrt-llm', 'llama2-13b') as r: prepare.build_trtllm_handler_model("llama2-13b") @@ -294,8 +297,9 @@ def test_santacoder(self): client.run("trtllm santacoder".split()) +@pytest.mark.trtllm +@pytest.mark.gpu_4 class TestTrtLlmHandler2: - # Runs on g5.12xl def test_llama2_7b_hf_smoothquant(self): with Runner('tensorrt-llm', 'llama2-7b-smoothquant') as r: prepare.build_trtllm_handler_model("llama2-7b-smoothquant") @@ -327,8 +331,9 @@ def test_llama2_7b_chat(self): client.run("trtllm_chat llama2-7b-chat".split()) +@pytest.mark.lmi_dist +@pytest.mark.gpu_4 class TestSchedulerSingleGPU: - # Runs on g5.12xl def test_gpt2(self): with Runner('lmi', 'gpt2') as r: @@ -343,8 +348,9 @@ def test_bllm(self): rb_client.run("scheduler_single_gpu bloom-560m".split()) +@pytest.mark.lmi_dist +@pytest.mark.gpu_4 class TestSchedulerMultiGPU: - # Runs on g5.12xl def test_gptj_6b(self): with Runner('lmi', 'gpt-j-6b') as r: @@ -353,8 +359,9 @@ def test_gptj_6b(self): rb_client.run("scheduler_multi_gpu gpt-j-6b".split()) +@pytest.mark.lmi_dist +@pytest.mark.gpu_4 class TestLmiDist1: - # Runs on g5.12xl def test_gpt_neox_20b(self): with Runner('lmi', 'gpt-neox-20b') as r: @@ -421,8 +428,9 @@ def test_llama2_tiny_autoawq(self): os.system('sudo rm -rf models') +@pytest.mark.lmi_dist +@pytest.mark.gpu_4 class TestLmiDist2: - # Runs on g5.12xl def test_gpt_neox_20b(self): with Runner('lmi', 'octocoder') as r: @@ -479,8 +487,9 @@ def test_llama2_7b_chat(self): client.run("lmi_dist_chat llama2-7b-chat".split()) +@pytest.mark.vllm +@pytest.mark.gpu_4 class TestVllm1: - # Runs on g5.12xl def test_gpt_neox_20b(self): with Runner('lmi', 'gpt-neox-20b') as r: @@ -527,8 +536,10 @@ def test_qwen2_7b_fp8(self): client.run("vllm qwen2-7b-fp8".split()) +@pytest.mark.vllm +@pytest.mark.lora +@pytest.mark.gpu_4 class TestVllmLora: - # Runs on g5.12xl def test_lora_unmerged(self): with Runner('lmi', 'llama-7b-unmerged-lora') as r: @@ -567,8 +578,10 @@ def test_lora_llama3_8b(self): client.run("vllm_adapters llama3-8b-unmerged-lora".split()) +@pytest.mark.lmi_dist +@pytest.mark.lora +@pytest.mark.gpu_4 class TestLmiDistLora: - # Runs on g5.12xl def test_lora_unmerged(self): with Runner('lmi', 'llama-7b-unmerged-lora') as r: @@ -610,8 +623,8 @@ def test_lora_llama3_8b(self): client.run("lmi_dist_adapters llama3-8b-unmerged-lora".split()) +@pytest.mark.inf class TestNeuronx1: - # Runs on inf2.24xl def test_python_mode(self): with Runner('pytorch-inf2', 'test_python_mode', download=True) as r: @@ -687,8 +700,8 @@ def test_partition(self, model): os.system('sudo rm -rf models') +@pytest.mark.inf class TestNeuronx2: - # Runs on inf2.24xl def test_stream_opt(self): with Runner('pytorch-inf2', 'opt-1.3b-streaming') as r: @@ -728,8 +741,8 @@ def test_stable_diffusion_xl(self): "neuron-stable-diffusion stable-diffusion-xl-neuron".split()) +@pytest.mark.inf class TestNeuronxRollingBatch: - # Runs on inf2.24xl def test_llama_7b(self): with Runner('pytorch-inf2', 'llama-7b-rb') as r: From e1e4964ee051e208fd9c2369862b61e54ee420a3 Mon Sep 17 00:00:00 2001 From: Zach Kimberg Date: Mon, 15 Jul 2024 13:21:06 -0700 Subject: [PATCH 2/2] Formatter --- tests/integration/tests.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/tests/integration/tests.py b/tests/integration/tests.py index 94f2a194b..1690083f7 100644 --- a/tests/integration/tests.py +++ b/tests/integration/tests.py @@ -86,6 +86,7 @@ def launch(self, env_vars=None, container=None, cmd=None): @pytest.mark.cpu class TestCpuFull: + def test_python_model(self): with Runner('cpu-full', 'python_model', download=True) as r: r.launch( @@ -113,6 +114,7 @@ def test_python_dynamic_batch(self): @pytest.mark.cpu @pytest.mark.parametrize('arch', ["cpu", "cpu-full"]) class TestCpuBoth: + def test_pytorch(self, arch): with Runner(arch, 'pytorch', download=True) as r: r.launch( @@ -169,6 +171,7 @@ def test_tensorflow_binary(self, arch): @pytest.mark.gpu @pytest.mark.gpu_4 class TestGpu: + def test_python_model(self): with Runner('pytorch-gpu', 'python_model', download=True) as r: r.launch( @@ -188,6 +191,7 @@ def test_pytorch(self): @pytest.mark.aarch64 class TestAarch64: + def test_pytorch(self): with Runner('aarch64', 'pytorch_model', download=True) as r: r.launch( @@ -208,6 +212,7 @@ def test_onnx(self): @pytest.mark.hf @pytest.mark.gpu_4 class TestHfHandler: + def test_gpt_neo(self): with Runner('lmi', 'test_gpt4all_lora') as r: prepare.build_hf_handler_model("gpt-neo-2.7b") @@ -254,6 +259,7 @@ def test_streaming_t5_large(self): @pytest.mark.trtllm @pytest.mark.gpu_4 class TestTrtLlmHandler1: + def test_llama2_13b_tp4(self): with Runner('tensorrt-llm', 'llama2-13b') as r: prepare.build_trtllm_handler_model("llama2-13b") @@ -300,6 +306,7 @@ def test_santacoder(self): @pytest.mark.trtllm @pytest.mark.gpu_4 class TestTrtLlmHandler2: + def test_llama2_7b_hf_smoothquant(self): with Runner('tensorrt-llm', 'llama2-7b-smoothquant') as r: prepare.build_trtllm_handler_model("llama2-7b-smoothquant")