From fef6ade63f4a3a4bd44ee26deb5aa9b75c9b3c1b Mon Sep 17 00:00:00 2001 From: sasha0552 Date: Wed, 30 Oct 2024 20:39:50 +0000 Subject: [PATCH] use xformers --- crash_repro.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/crash_repro.py b/crash_repro.py index 4e2a49c0a3b80..6d6aa3f390c49 100644 --- a/crash_repro.py +++ b/crash_repro.py @@ -5,6 +5,7 @@ import torch +from tests.kernels.utils import override_backend_env_variable from vllm import LLM, SamplingParams, TokensPrompt from vllm.distributed import destroy_model_parallel, destroy_distributed_environment @@ -24,6 +25,8 @@ @pytest.fixture def llm(enable_chunked_prefill, enable_prefix_caching, use_v2_block_manager, monkeypatch): + override_backend_env_variable(monkeypatch, "XFORMERS") + if not use_v2_block_manager: monkeypatch.setenv("VLLM_ALLOW_DEPRECATED_BLOCK_MANAGER_V1", "1")