diff --git a/serving/docker/lmi-container-requirements-common.txt b/serving/docker/lmi-container-requirements-common.txt index e0da26dbc..4b9134c01 100644 --- a/serving/docker/lmi-container-requirements-common.txt +++ b/serving/docker/lmi-container-requirements-common.txt @@ -1,6 +1,6 @@ -peft +peft==0.13.2 protobuf==3.20.3 -transformers>=4.45.2 +transformers==4.45.2 hf-transfer zstandard datasets==3.0.1 @@ -23,8 +23,8 @@ onnx sentence_transformers onnxruntime-gpu==1.20.0 autoawq==0.2.5 -tokenizers>=0.20.3 -pydantic>=2.9.2 +tokenizers==0.20.3 +pydantic==2.9.2 optimum==1.23.2 torch==2.5.1 torchvision==0.20.1 diff --git a/serving/docker/requirements-lmi.txt b/serving/docker/requirements-lmi.txt index 387a70b43..c72c6c283 100644 --- a/serving/docker/requirements-lmi.txt +++ b/serving/docker/requirements-lmi.txt @@ -1,4 +1,4 @@ --r requirements-common.txt +peft==0.13.2 llmcompressor # flash infer kernels for vllm/lmi-dist https://github.com/flashinfer-ai/flashinfer/releases/download/v0.1.6/flashinfer-0.1.6+cu124torch2.4-cp311-cp311-linux_x86_64.whl diff --git a/serving/docker/requirements-vllm.txt b/serving/docker/requirements-vllm.txt index 7977615bd..8b1f0b1fa 100644 --- a/serving/docker/requirements-vllm.txt +++ b/serving/docker/requirements-vllm.txt @@ -1,3 +1,3 @@ --r requirements-common.txt +peft==0.14.0 llmcompressor vllm==0.7.1 \ No newline at end of file diff --git a/tests/integration/llm/client.py b/tests/integration/llm/client.py index b99111cfa..3aa3da0d3 100644 --- a/tests/integration/llm/client.py +++ b/tests/integration/llm/client.py @@ -1198,15 +1198,21 @@ def batch_generation(batch_size): def batch_generation_chat(batch_size): messages = [ [{ - "role": "system", - "content": "You are a helpful assistant." + "role": "user", + "content": "hello, can you help me?" + }, { + "role": "assistant", + "content": "Hi, what can i help you with today?" }, { "role": "user", "content": "What is deep learning?" }], [{ - "role": "system", - "content": "You are a helpful assistant." + "role": "user", + "content": "hello, can you help me?" + }, { + "role": "assistant", + "content": "Hi, what can i help you with today?" }, { "role": "user", "content": "Who won the world series in 2020?" @@ -1218,45 +1224,61 @@ def batch_generation_chat(batch_size): "content": "Where was it played?" }], [{ - "role": "system", - "content": "You are a helpful assistant." + "role": "user", + "content": "hello, can you help me?" + }, { + "role": "assistant", + "content": "Hi, what can i help you with today?" }, { "role": "user", "content": "How do I build a car from cardboard and paper clips?" }], [{ - "role": "system", - "content": "You are a helpful assistant." + "role": "user", + "content": "hello, can you help me?" + }, { + "role": "assistant", + "content": "Hi, what can i help you with today?" }, { "role": "user", "content": "Hello!" }], [{ - "role": "system", - "content": "You are a helpful assistant." + "role": "user", + "content": "hello, can you help me?" + }, { + "role": "assistant", + "content": "Hi, what can i help you with today?" }, { "role": "user", "content": "Who are you?" }], [{ - "role": "system", - "content": "You are a helpful assistant." + "role": "user", + "content": "hello, can you help me?" + }, { + "role": "assistant", + "content": "Hi, what can i help you with today?" }, { "role": "user", "content": "Hello world!" }], [{ - "role": - "system", - "content": - "You're a helpful assistant! Answer the users question best you can." + "role": "user", + "content": "hello, can you help me?" + }, { + "role": "assistant", + "content": "Hi, what can i help you with today?" }, { "role": "user", "content": "What is the weather like in Brooklyn, New York?" }], [{ - "role": "system", - "content": "You are a helpful assistant." + "role": "user", + "content": "hello, can you help me?" + }, { + "role": "assistant", + "content": "Hi, what can i help you with today?" }, { "role": "user", diff --git a/tests/integration/llm/prepare.py b/tests/integration/llm/prepare.py index 9b589e4ac..2a7f1f15c 100644 --- a/tests/integration/llm/prepare.py +++ b/tests/integration/llm/prepare.py @@ -763,7 +763,7 @@ "option.tensor_parallel_degree": 4 }, "mistral-7b": { - "option.model_id": "s3://djl-llm/mistral-7b", + "option.model_id": "s3://djl-llm/mistral-7b-instruct-v03", "option.task": "text-generation", "option.tensor_parallel_degree": 4, "option.max_rolling_batch_size": 4