From 4de3a867fdd3f40190d887838b1f4c4a9b4be48f Mon Sep 17 00:00:00 2001 From: youkaichao Date: Thu, 25 Jul 2024 17:44:09 -0700 Subject: [PATCH] [ci][distributed] fix flaky tests (#6806) --- tests/distributed/test_pipeline_parallel.py | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/tests/distributed/test_pipeline_parallel.py b/tests/distributed/test_pipeline_parallel.py index d666b8a1d44bd..5ff39ddfbf996 100644 --- a/tests/distributed/test_pipeline_parallel.py +++ b/tests/distributed/test_pipeline_parallel.py @@ -1,3 +1,10 @@ +""" +WARNING: This test runs in both single-node (4 GPUs) and multi-node + (2 node with 2 GPUs each) modes. If the test only uses 2 GPUs, it is + important to set the distributed backend to "mp" to avoid Ray scheduling + all workers in a node other than the head node, which can cause the test + to fail. +""" import os import pytest @@ -78,7 +85,7 @@ def test_pp_cudagraph(PP_SIZE, MODEL_NAME, ATTN_BACKEND): "--pipeline-parallel-size", str(PP_SIZE), "--distributed-executor-backend", - "ray", + "mp", ] os.environ["VLLM_ATTENTION_BACKEND"] = ATTN_BACKEND