From 65469473ff1a40ba1f2a0592d4c46cb920198174 Mon Sep 17 00:00:00 2001 From: Roger Wang <136131678+ywang96@users.noreply.github.com> Date: Thu, 12 Sep 2024 17:31:18 -0700 Subject: [PATCH] [Misc] Update Pixtral example (#8431) --- examples/offline_inference_pixtral.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/examples/offline_inference_pixtral.py b/examples/offline_inference_pixtral.py index 738d890607e37..c12ff7021cf51 100644 --- a/examples/offline_inference_pixtral.py +++ b/examples/offline_inference_pixtral.py @@ -11,7 +11,7 @@ # - Server: # # ```bash -# vllm serve mistralai/Pixtral-12B-2409 --tokenizer_mode mistral --limit_mm_per_prompt 'image=4' --max_num_batched_tokens 16384 +# vllm serve mistralai/Pixtral-12B-2409 --tokenizer-mode mistral --limit-mm-per-prompt 'image=4' --max-model-len 16384 # ``` # # - Client: @@ -45,6 +45,7 @@ def run_simple_demo(): model_name = "mistralai/Pixtral-12B-2409" sampling_params = SamplingParams(max_tokens=8192) + # Lower max_num_seqs or max_model_len on low-VRAM GPUs. llm = LLM(model=model_name, tokenizer_mode="mistral") prompt = "Describe this image in one sentence." @@ -83,7 +84,7 @@ def run_advanced_demo(): model=model_name, tokenizer_mode="mistral", limit_mm_per_prompt={"image": max_img_per_msg}, - max_num_batched_tokens=max_img_per_msg * max_tokens_per_img, + max_model_len=max_img_per_msg * max_tokens_per_img, ) prompt = "Describe the following image."