From 34095b7b0cb69295699dd9cd9bf5ac3f9fd9c60d Mon Sep 17 00:00:00 2001 From: Siddharth Venkatesan Date: Tue, 28 Jan 2025 10:35:03 -0800 Subject: [PATCH] add deepspeek-r1-distill integration test --- .../djl_python/chat_completions/vllm_chat_utils.py | 4 +++- tests/integration/llm/client.py | 12 +++++++++++- tests/integration/llm/prepare.py | 3 +++ tests/integration/tests.py | 7 +++++++ 4 files changed, 24 insertions(+), 2 deletions(-) diff --git a/engines/python/setup/djl_python/chat_completions/vllm_chat_utils.py b/engines/python/setup/djl_python/chat_completions/vllm_chat_utils.py index bb1b15872..dea00555d 100644 --- a/engines/python/setup/djl_python/chat_completions/vllm_chat_utils.py +++ b/engines/python/setup/djl_python/chat_completions/vllm_chat_utils.py @@ -51,6 +51,7 @@ def parse_chat_completions_request_vllm( exclude = {"messages"} param = chat_params.model_dump(exclude_none=True, exclude=exclude) + # TODO - figure out what we need to pass for given format content_format = resolve_chat_template_content_format( chat_template=None, given_format="auto", @@ -58,7 +59,8 @@ def parse_chat_completions_request_vllm( ) conversation, mm_data = parse_chat_messages( - chat_params.messages, rolling_batch.get_model_config(), tokenizer, content_format) + chat_params.messages, rolling_batch.get_model_config(), tokenizer, + content_format) prompt_data: Union[str, List[int]] if is_mistral_tokenizer: diff --git a/tests/integration/llm/client.py b/tests/integration/llm/client.py index 821f55d12..55f0967d5 100644 --- a/tests/integration/llm/client.py +++ b/tests/integration/llm/client.py @@ -566,6 +566,11 @@ def get_model_name(): "batch_size": [1, 4], "seq_length": [256], }, + "deepseek-r1-llama": { + "batch_size": [1, 4], + "seq_length": [256], + "tokenizer": "deepseek-ai/DeepSeek-R1-Distill-Llama-8B" + }, } vllm_neo_model_spec = { @@ -597,7 +602,12 @@ def get_model_name(): "batch_size": [1, 4], "seq_length": [256], "tokenizer": "TheBloke/Llama-2-7B-Chat-fp16" - } + }, + "deepseek-r1-llama": { + "batch_size": [1, 4], + "seq_length": [256], + "tokenizer": "deepseek-ai/DeepSeek-R1-Distill-Llama-8B" + }, } lmi_dist_aiccl_model_spec = { diff --git a/tests/integration/llm/prepare.py b/tests/integration/llm/prepare.py index 1142a1b7f..861484b85 100644 --- a/tests/integration/llm/prepare.py +++ b/tests/integration/llm/prepare.py @@ -1056,6 +1056,9 @@ "option.max_model_len": 8192, "option.max_rolling_batch_size": 16, "option.enforce_eager": True, + }, + "deepseek-r1-llama": { + "option.model_id": "s3://djl-llm/deepseek-r1-distill-llama-8b/" } } diff --git a/tests/integration/tests.py b/tests/integration/tests.py index d25e3b46b..59f0d8558 100644 --- a/tests/integration/tests.py +++ b/tests/integration/tests.py @@ -596,6 +596,13 @@ def test_llama2_7b_chat(self): r.launch() client.run("vllm_chat llama2-7b-chat".split()) + def test_deepspeed_1_distill_llama_8b(self): + with Runner('lmi', 'deepseek-r1-llama') as r: + prepare.build_vllm_model('deepseek-r1-llama') + r.launch() + client.run("vllm deepseek-r1-llama") + client.run("vllm_chat deepseek-r1-llama") + @pytest.mark.skipif(not is_applicable_cuda_capability(89), reason="Unsupported CUDA capability") def test_qwen2_7b_fp8(self):