Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[CI] Add integration pytest marks #2176

Merged
merged 2 commits into from
Jul 16, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 13 additions & 3 deletions .github/workflows/integration_execute.yml
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,13 @@ on:
workflow_dispatch:
inputs:
test:
description: 'Which test to run. Should be a class in tests.py or TestClass::test_fun_name'
required: true
description: 'Which test to run as a pytest keyword expression. Should be a class in tests.py or TestClass::test_fun_name'
required: false
default: ""
mark:
description: 'Which tests to run as a pytest marker expression. Should be a mark from pytest.ini'
required: false
default: ""
instance:
description: 'Instance used for testing'
required: true
Expand Down Expand Up @@ -72,6 +77,11 @@ jobs:
sudo apt-get install python3 python-is-python3 python3-pip -y
- name: Install pip dependencies
run: pip3 install pytest requests "numpy<2" pillow huggingface_hub
- name: Install torch
# Use torch to get cuda capability of current device to selectively run tests
# Torch version doesn't really matter that much
run: |
pip3 install torch==2.3.0
- name: install awscli
run: |
sudo apt-get update
Expand All @@ -96,7 +106,7 @@ jobs:
env:
TEST_DJL_VERSION: ${{ inputs.djl-version }}
run: |
python -m pytest -k ${{ inputs.test }} tests.py
python -m pytest tests.py -k "${{ inputs.test }}" -m "${{ inputs.mark }}"
- name: Cleanup
working-directory: tests/integration
run: |
Expand Down
14 changes: 14 additions & 0 deletions tests/integration/pytest.ini
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
[pytest]
markers =
gpu: Runs on any gpu machine
gpu_4: Runs on a machine with at least 4 gpus (includes gpu mark)
inf: Runs on inferentia
aarch64: Runs on aarch64
cpu: Tests cpu

hf: Tests the hf accelerate backend
lmi_dist: Tests the lmi backend
vllm: Tests the vllm backend
trtllm: Tests the trtllm backend

lora: Tests lora feature
54 changes: 37 additions & 17 deletions tests/integration/tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -84,8 +84,9 @@ def launch(self, env_vars=None, container=None, cmd=None):
capture_output=True)


@pytest.mark.cpu
class TestCpuFull:
# Runs on cpu

def test_python_model(self):
with Runner('cpu-full', 'python_model', download=True) as r:
r.launch(
Expand All @@ -110,9 +111,10 @@ def test_python_dynamic_batch(self):
os.system("./test_client.sh image/jpg models/kitten.jpg")


@pytest.mark.cpu
@pytest.mark.parametrize('arch', ["cpu", "cpu-full"])
class TestCpuBoth:
# Runs on cpu

def test_pytorch(self, arch):
with Runner(arch, 'pytorch', download=True) as r:
r.launch(
Expand Down Expand Up @@ -166,8 +168,10 @@ def test_tensorflow_binary(self, arch):
os.system("./test_client.sh tensor/ndlist 1,224,224,3")


@pytest.mark.gpu
@pytest.mark.gpu_4
class TestGpu:
# Runs on any gpu instance

def test_python_model(self):
with Runner('pytorch-gpu', 'python_model', download=True) as r:
r.launch(
Expand All @@ -185,8 +189,9 @@ def test_pytorch(self):
os.system("./test_client.sh image/jpg models/kitten.jpg")


@pytest.mark.aarch64
class TestAarch64:
# Runs on aarch64

def test_pytorch(self):
with Runner('aarch64', 'pytorch_model', download=True) as r:
r.launch(
Expand All @@ -204,8 +209,10 @@ def test_onnx(self):
os.system("./test_client.sh image/jpg models/kitten.jpg")


@pytest.mark.hf
@pytest.mark.gpu_4
class TestHfHandler:
# Runs on g5.12xl

def test_gpt_neo(self):
with Runner('lmi', 'test_gpt4all_lora') as r:
prepare.build_hf_handler_model("gpt-neo-2.7b")
Expand Down Expand Up @@ -249,8 +256,10 @@ def test_streaming_t5_large(self):
client.run("huggingface t5-large".split())


@pytest.mark.trtllm
@pytest.mark.gpu_4
class TestTrtLlmHandler1:
# Runs on g5.12xl

def test_llama2_13b_tp4(self):
with Runner('tensorrt-llm', 'llama2-13b') as r:
prepare.build_trtllm_handler_model("llama2-13b")
Expand Down Expand Up @@ -294,8 +303,10 @@ def test_santacoder(self):
client.run("trtllm santacoder".split())


@pytest.mark.trtllm
@pytest.mark.gpu_4
class TestTrtLlmHandler2:
# Runs on g5.12xl

def test_llama2_7b_hf_smoothquant(self):
with Runner('tensorrt-llm', 'llama2-7b-smoothquant') as r:
prepare.build_trtllm_handler_model("llama2-7b-smoothquant")
Expand Down Expand Up @@ -327,8 +338,9 @@ def test_llama2_7b_chat(self):
client.run("trtllm_chat llama2-7b-chat".split())


@pytest.mark.lmi_dist
@pytest.mark.gpu_4
class TestSchedulerSingleGPU:
# Runs on g5.12xl

def test_gpt2(self):
with Runner('lmi', 'gpt2') as r:
Expand All @@ -343,8 +355,9 @@ def test_bllm(self):
rb_client.run("scheduler_single_gpu bloom-560m".split())


@pytest.mark.lmi_dist
@pytest.mark.gpu_4
class TestSchedulerMultiGPU:
# Runs on g5.12xl

def test_gptj_6b(self):
with Runner('lmi', 'gpt-j-6b') as r:
Expand All @@ -353,8 +366,9 @@ def test_gptj_6b(self):
rb_client.run("scheduler_multi_gpu gpt-j-6b".split())


@pytest.mark.lmi_dist
@pytest.mark.gpu_4
class TestLmiDist1:
# Runs on g5.12xl

def test_gpt_neox_20b(self):
with Runner('lmi', 'gpt-neox-20b') as r:
Expand Down Expand Up @@ -421,8 +435,9 @@ def test_llama2_tiny_autoawq(self):
os.system('sudo rm -rf models')


@pytest.mark.lmi_dist
@pytest.mark.gpu_4
class TestLmiDist2:
# Runs on g5.12xl

def test_gpt_neox_20b(self):
with Runner('lmi', 'octocoder') as r:
Expand Down Expand Up @@ -479,8 +494,9 @@ def test_llama2_7b_chat(self):
client.run("lmi_dist_chat llama2-7b-chat".split())


@pytest.mark.vllm
@pytest.mark.gpu_4
class TestVllm1:
# Runs on g5.12xl

def test_gpt_neox_20b(self):
with Runner('lmi', 'gpt-neox-20b') as r:
Expand Down Expand Up @@ -527,8 +543,10 @@ def test_qwen2_7b_fp8(self):
client.run("vllm qwen2-7b-fp8".split())


@pytest.mark.vllm
@pytest.mark.lora
@pytest.mark.gpu_4
class TestVllmLora:
# Runs on g5.12xl

def test_lora_unmerged(self):
with Runner('lmi', 'llama-7b-unmerged-lora') as r:
Expand Down Expand Up @@ -567,8 +585,10 @@ def test_lora_llama3_8b(self):
client.run("vllm_adapters llama3-8b-unmerged-lora".split())


@pytest.mark.lmi_dist
@pytest.mark.lora
@pytest.mark.gpu_4
class TestLmiDistLora:
# Runs on g5.12xl

def test_lora_unmerged(self):
with Runner('lmi', 'llama-7b-unmerged-lora') as r:
Expand Down Expand Up @@ -610,8 +630,8 @@ def test_lora_llama3_8b(self):
client.run("lmi_dist_adapters llama3-8b-unmerged-lora".split())


@pytest.mark.inf
class TestNeuronx1:
# Runs on inf2.24xl

def test_python_mode(self):
with Runner('pytorch-inf2', 'test_python_mode', download=True) as r:
Expand Down Expand Up @@ -687,8 +707,8 @@ def test_partition(self, model):
os.system('sudo rm -rf models')


@pytest.mark.inf
class TestNeuronx2:
# Runs on inf2.24xl

def test_stream_opt(self):
with Runner('pytorch-inf2', 'opt-1.3b-streaming') as r:
Expand Down Expand Up @@ -728,8 +748,8 @@ def test_stable_diffusion_xl(self):
"neuron-stable-diffusion stable-diffusion-xl-neuron".split())


@pytest.mark.inf
class TestNeuronxRollingBatch:
# Runs on inf2.24xl

def test_llama_7b(self):
with Runner('pytorch-inf2', 'llama-7b-rb') as r:
Expand Down
Loading