From 981716cd66226d4c19ff9bfebee6bd654007578b Mon Sep 17 00:00:00 2001 From: Chen Zhang Date: Fri, 17 Jan 2025 04:30:08 +0800 Subject: [PATCH] [Bugfix] Set enforce_eager automatically for mllama (#12127) Signed-off-by: Chen Zhang --- examples/offline_inference/vision_language.py | 1 - examples/offline_inference/vision_language_multi_image.py | 1 - vllm/config.py | 8 +++++--- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/examples/offline_inference/vision_language.py b/examples/offline_inference/vision_language.py index 8bc715a50e0db..69228bbf22949 100644 --- a/examples/offline_inference/vision_language.py +++ b/examples/offline_inference/vision_language.py @@ -325,7 +325,6 @@ def run_mllama(question: str, modality: str): model=model_name, max_model_len=4096, max_num_seqs=16, - enforce_eager=True, disable_mm_preprocessor_cache=args.disable_mm_preprocessor_cache, ) diff --git a/examples/offline_inference/vision_language_multi_image.py b/examples/offline_inference/vision_language_multi_image.py index 33ef5f316f040..cf3c5dd4e0a2c 100644 --- a/examples/offline_inference/vision_language_multi_image.py +++ b/examples/offline_inference/vision_language_multi_image.py @@ -186,7 +186,6 @@ def load_mllama(question, image_urls: List[str]) -> ModelRequestData: model=model_name, max_model_len=4096, max_num_seqs=16, - enforce_eager=True, limit_mm_per_prompt={"image": len(image_urls)}, ) diff --git a/vllm/config.py b/vllm/config.py index a5f2161068d2a..79754bd04102f 100644 --- a/vllm/config.py +++ b/vllm/config.py @@ -607,10 +607,12 @@ def _verify_cuda_graph(self) -> None: self.max_seq_len_to_capture = min(self.max_seq_len_to_capture, self.max_model_len) - if (self.hf_config.model_type == 'deepseek_v3' + MODEL_NOT_SUPPORT_CUDA_GRAPH = ['deepseek_v3', 'mllama'] + if (self.hf_config.model_type in MODEL_NOT_SUPPORT_CUDA_GRAPH and not self.enforce_eager): - logger.warning("CUDA graph is not supported for Deepseek V3 yet, " - "fallback to the eager mode.") + logger.warning( + "CUDA graph is not supported for %s yet, fallback to the eager " + "mode.", self.hf_config.model_type) self.enforce_eager = True def _verify_bnb_config(self) -> None: