diff --git a/vllm/config.py b/vllm/config.py index 79754bd04102f..ac5a4c91b1738 100644 --- a/vllm/config.py +++ b/vllm/config.py @@ -3174,7 +3174,8 @@ def __post_init__(self): if self.compilation_config is None: self.compilation_config = CompilationConfig() - if envs.VLLM_USE_V1 and not self.model_config.enforce_eager: + if envs.VLLM_USE_V1 and self.model_config is not None and \ + not self.model_config.enforce_eager: # NOTE(woosuk): Currently, we use inductor because the piecewise # CUDA graphs do not work properly with the custom CUDA kernels. # FIXME(woosuk): Disable inductor to reduce the compilation time