add deepspeek-r1-distill integration test

deepjavalibrary · Jan 28, 2025 · 34095b7 · 34095b7
1 parent 21b54d1
commit 34095b7
Show file tree

Hide file tree

Showing 4 changed files with 24 additions and 2 deletions.
diff --git a/engines/python/setup/djl_python/chat_completions/vllm_chat_utils.py b/engines/python/setup/djl_python/chat_completions/vllm_chat_utils.py
@@ -51,14 +51,16 @@ def parse_chat_completions_request_vllm(
     exclude = {"messages"}
     param = chat_params.model_dump(exclude_none=True, exclude=exclude)
 
+    # TODO - figure out what we need to pass for given format
     content_format = resolve_chat_template_content_format(
         chat_template=None,
         given_format="auto",
         tokenizer=tokenizer,
     )
 
     conversation, mm_data = parse_chat_messages(
-        chat_params.messages, rolling_batch.get_model_config(), tokenizer, content_format)
+        chat_params.messages, rolling_batch.get_model_config(), tokenizer,
+        content_format)
 
     prompt_data: Union[str, List[int]]
     if is_mistral_tokenizer:

diff --git a/tests/integration/llm/client.py b/tests/integration/llm/client.py
@@ -566,6 +566,11 @@ def get_model_name():
         "batch_size": [1, 4],
         "seq_length": [256],
     },
+    "deepseek-r1-llama": {
+        "batch_size": [1, 4],
+        "seq_length": [256],
+        "tokenizer": "deepseek-ai/DeepSeek-R1-Distill-Llama-8B"
+    },
 }
 
 vllm_neo_model_spec = {
@@ -597,7 +602,12 @@ def get_model_name():
         "batch_size": [1, 4],
         "seq_length": [256],
         "tokenizer": "TheBloke/Llama-2-7B-Chat-fp16"
-    }
+    },
+    "deepseek-r1-llama": {
+        "batch_size": [1, 4],
+        "seq_length": [256],
+        "tokenizer": "deepseek-ai/DeepSeek-R1-Distill-Llama-8B"
+    },
 }
 
 lmi_dist_aiccl_model_spec = {

diff --git a/tests/integration/llm/prepare.py b/tests/integration/llm/prepare.py
@@ -1056,6 +1056,9 @@
         "option.max_model_len": 8192,
         "option.max_rolling_batch_size": 16,
         "option.enforce_eager": True,
+    },
+    "deepseek-r1-llama": {
+        "option.model_id": "s3://djl-llm/deepseek-r1-distill-llama-8b/"
     }
 }
 

diff --git a/tests/integration/tests.py b/tests/integration/tests.py
@@ -596,6 +596,13 @@ def test_llama2_7b_chat(self):
             r.launch()
             client.run("vllm_chat llama2-7b-chat".split())
 
+    def test_deepspeed_1_distill_llama_8b(self):
+        with Runner('lmi', 'deepseek-r1-llama') as r:
+            prepare.build_vllm_model('deepseek-r1-llama')
+            r.launch()
+            client.run("vllm deepseek-r1-llama")
+            client.run("vllm_chat deepseek-r1-llama")
+
     @pytest.mark.skipif(not is_applicable_cuda_capability(89),
                         reason="Unsupported CUDA capability")
     def test_qwen2_7b_fp8(self):