Skip to content

Commit

Permalink
add deepspeek-r1-distill integration test
Browse files Browse the repository at this point in the history
  • Loading branch information
siddvenk committed Jan 28, 2025
1 parent 21b54d1 commit 34095b7
Show file tree
Hide file tree
Showing 4 changed files with 24 additions and 2 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -51,14 +51,16 @@ def parse_chat_completions_request_vllm(
exclude = {"messages"}
param = chat_params.model_dump(exclude_none=True, exclude=exclude)

# TODO - figure out what we need to pass for given format
content_format = resolve_chat_template_content_format(
chat_template=None,
given_format="auto",
tokenizer=tokenizer,
)

conversation, mm_data = parse_chat_messages(
chat_params.messages, rolling_batch.get_model_config(), tokenizer, content_format)
chat_params.messages, rolling_batch.get_model_config(), tokenizer,
content_format)

prompt_data: Union[str, List[int]]
if is_mistral_tokenizer:
Expand Down
12 changes: 11 additions & 1 deletion tests/integration/llm/client.py
Original file line number Diff line number Diff line change
Expand Up @@ -566,6 +566,11 @@ def get_model_name():
"batch_size": [1, 4],
"seq_length": [256],
},
"deepseek-r1-llama": {
"batch_size": [1, 4],
"seq_length": [256],
"tokenizer": "deepseek-ai/DeepSeek-R1-Distill-Llama-8B"
},
}

vllm_neo_model_spec = {
Expand Down Expand Up @@ -597,7 +602,12 @@ def get_model_name():
"batch_size": [1, 4],
"seq_length": [256],
"tokenizer": "TheBloke/Llama-2-7B-Chat-fp16"
}
},
"deepseek-r1-llama": {
"batch_size": [1, 4],
"seq_length": [256],
"tokenizer": "deepseek-ai/DeepSeek-R1-Distill-Llama-8B"
},
}

lmi_dist_aiccl_model_spec = {
Expand Down
3 changes: 3 additions & 0 deletions tests/integration/llm/prepare.py
Original file line number Diff line number Diff line change
Expand Up @@ -1056,6 +1056,9 @@
"option.max_model_len": 8192,
"option.max_rolling_batch_size": 16,
"option.enforce_eager": True,
},
"deepseek-r1-llama": {
"option.model_id": "s3://djl-llm/deepseek-r1-distill-llama-8b/"
}
}

Expand Down
7 changes: 7 additions & 0 deletions tests/integration/tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -596,6 +596,13 @@ def test_llama2_7b_chat(self):
r.launch()
client.run("vllm_chat llama2-7b-chat".split())

def test_deepspeed_1_distill_llama_8b(self):
with Runner('lmi', 'deepseek-r1-llama') as r:
prepare.build_vllm_model('deepseek-r1-llama')
r.launch()
client.run("vllm deepseek-r1-llama")
client.run("vllm_chat deepseek-r1-llama")

@pytest.mark.skipif(not is_applicable_cuda_capability(89),
reason="Unsupported CUDA capability")
def test_qwen2_7b_fp8(self):
Expand Down

0 comments on commit 34095b7

Please sign in to comment.