Skip to content

Commit

Permalink
[Serve] Shorten proxy timeout to 10s (#36470)
Browse files Browse the repository at this point in the history
Create a new config `PROXY_HEALTH_CHECK_TIMEOUT_S` and set it to 10s so we can have a faster recover time for http proxies.

Co-authored-by: Edward Oakes <ed.nmi.oakes@gmail.com>
  • Loading branch information
GeneDer and edoakes authored Jun 21, 2023
1 parent cc983fc commit 5059adb
Show file tree
Hide file tree
Showing 3 changed files with 7 additions and 6 deletions.
3 changes: 2 additions & 1 deletion python/ray/serve/_private/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -106,7 +106,8 @@
DEFAULT_HEALTH_CHECK_TIMEOUT_S = 30
DEFAULT_MAX_CONCURRENT_QUERIES = 100

# HTTP Proxy health check period
# HTTP Proxy health check configs
PROXY_HEALTH_CHECK_TIMEOUT_S = 10
PROXY_HEALTH_CHECK_PERIOD_S = (
float(os.environ.get("RAY_SERVE_PROXY_HEALTH_CHECK_PERIOD_S", "10")) or 10
)
Expand Down
6 changes: 3 additions & 3 deletions python/ray/serve/_private/http_state.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
from ray.serve.config import HTTPOptions, DeploymentMode
from ray.serve._private.constants import (
ASYNC_CONCURRENCY,
DEFAULT_HEALTH_CHECK_TIMEOUT_S,
PROXY_HEALTH_CHECK_TIMEOUT_S,
SERVE_LOGGER_NAME,
SERVE_PROXY_NAME,
SERVE_NAMESPACE,
Expand Down Expand Up @@ -181,14 +181,14 @@ def update(self):
self.try_update_status(HTTPProxyStatus.UNHEALTHY)
elif (
time.time() - self._last_health_check_time
> DEFAULT_HEALTH_CHECK_TIMEOUT_S
> PROXY_HEALTH_CHECK_TIMEOUT_S
):
# Health check hasn't returned and the timeout is up, consider it
# failed.
self._health_check_obj_ref = None
logger.warning(
"Didn't receive health check response for HTTP proxy "
f"{self._node_id} after {DEFAULT_HEALTH_CHECK_TIMEOUT_S}s"
f"{self._node_id} after {PROXY_HEALTH_CHECK_TIMEOUT_S}s"
)
self.try_update_status(HTTPProxyStatus.UNHEALTHY)
else:
Expand Down
4 changes: 2 additions & 2 deletions python/ray/serve/tests/test_http_state.py
Original file line number Diff line number Diff line change
Expand Up @@ -416,7 +416,7 @@ async def check_health(self):
assert proxy_state._consecutive_health_check_failures == 3


@patch("ray.serve._private.http_state.DEFAULT_HEALTH_CHECK_TIMEOUT_S", 0.1)
@patch("ray.serve._private.http_state.PROXY_HEALTH_CHECK_TIMEOUT_S", 0.1)
@patch("ray.serve._private.http_state.PROXY_HEALTH_CHECK_PERIOD_S", 0.1)
def test_http_proxy_state_check_health_always_timeout_timeout_eq_period():
"""Test calling update method on HTTPProxyState when the proxy state is HEALTHY and
Expand Down Expand Up @@ -461,7 +461,7 @@ async def check_health(self):
assert proxy_state._consecutive_health_check_failures == 3


@patch("ray.serve._private.http_state.DEFAULT_HEALTH_CHECK_TIMEOUT_S", 1)
@patch("ray.serve._private.http_state.PROXY_HEALTH_CHECK_TIMEOUT_S", 1)
@patch("ray.serve._private.http_state.PROXY_HEALTH_CHECK_PERIOD_S", 0.1)
def test_http_proxy_state_check_health_always_timeout_timeout_greater_than_period():
"""Test calling update method on HTTPProxyState when the proxy state is HEALTHY and
Expand Down

0 comments on commit 5059adb

Please sign in to comment.