diff --git a/tests/entrypoints/openai/test_basic.py b/tests/entrypoints/openai/test_basic.py index 2c721d9ba7609..b3bbd10dfe879 100644 --- a/tests/entrypoints/openai/test_basic.py +++ b/tests/entrypoints/openai/test_basic.py @@ -1,9 +1,11 @@ +import socket from http import HTTPStatus import openai import pytest import requests +from vllm import envs from vllm.version import __version__ as VLLM_VERSION from ...utils import RemoteOpenAIServer @@ -59,3 +61,20 @@ async def test_log_metrics(client: openai.AsyncOpenAI): response = requests.get(base_url + "/metrics") assert response.status_code == HTTPStatus.OK + + +@pytest.mark.asyncio +async def test_fronted_multiprocessing_flag(): + # Build server without the flag to disable multiprocessing + with RemoteOpenAIServer("facebook/opt-125m", []), \ + socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s, \ + pytest.raises(OSError, match="Address already in use"): + # Ensure we see the backend port in use + s.bind(("localhost", envs.VLLM_RPC_PORT)) + + # Build server with the flag to disable multiprocessing + with RemoteOpenAIServer("facebook/opt-125m", + ["--disable-frontend-multiprocessing"]), \ + socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s: + # Ensure the backend port is free -> no multiprocessing is happening + s.bind(("localhost", envs.VLLM_RPC_PORT))