diff --git a/doc/source/serve/production-guide/monitoring.md b/doc/source/serve/production-guide/monitoring.md index 48c8aa4433c4..410f14ad9c9d 100644 --- a/doc/source/serve/production-guide/monitoring.md +++ b/doc/source/serve/production-guide/monitoring.md @@ -232,6 +232,8 @@ The following metrics are exposed by Ray Serve: - The number of exceptions that have occurred in the deployment. * - ``serve_deployment_replica_starts`` [**] - The number of times this replica has been restarted due to failure. + * - ``serve_deployment_replica_healthy`` + - Whether this deployment replica is healthy. 1 means healthy, 0 unhealthy. * - ``serve_deployment_processing_latency_ms`` [**] - The latency for queries to be processed. * - ``serve_replica_processing_queries`` [**] diff --git a/python/ray/serve/_private/deployment_state.py b/python/ray/serve/_private/deployment_state.py index df91af5f3170..b432d2eb2992 100644 --- a/python/ray/serve/_private/deployment_state.py +++ b/python/ray/serve/_private/deployment_state.py @@ -51,9 +51,9 @@ ) from ray.serve._private.version import DeploymentVersion, VersionedReplica - -from ray.util.scheduling_strategies import NodeAffinitySchedulingStrategy +from ray.util import metrics from ray._private.gcs_utils import GcsClient +from ray.util.scheduling_strategies import NodeAffinitySchedulingStrategy logger = logging.getLogger(SERVE_LOGGER_NAME) @@ -983,6 +983,15 @@ def __init__( self._name, DeploymentStatus.UPDATING ) + self.health_check_gauge = metrics.Gauge( + "serve_deployment_replica_healthy", + description=( + "Tracks whether this deployment replica is healthy. 1 means " + "healthy, 0 means unhealthy." + ), + tag_keys=("deployment", "replica"), + ) + def should_autoscale(self) -> bool: """ Check if the deployment is under autoscaling @@ -1489,12 +1498,18 @@ def _check_and_update_replicas(self) -> bool: for replica in self._replicas.pop(states=[ReplicaState.RUNNING]): if replica.check_health(): self._replicas.add(ReplicaState.RUNNING, replica) + self.health_check_gauge.set( + 1, tags={"deployment": self._name, "replica": replica.replica_tag} + ) else: running_replicas_changed = True logger.warning( f"Replica {replica.replica_tag} of deployment " f"{self._name} failed health check, stopping it." ) + self.health_check_gauge.set( + 0, tags={"deployment": self._name, "replica": replica.replica_tag} + ) replica.stop(graceful=False) self._replicas.add(ReplicaState.STOPPING, replica) # If this is a replica of the target version, the deployment diff --git a/python/ray/serve/tests/test_metrics.py b/python/ray/serve/tests/test_metrics.py index c72e840198bb..680d6ba134b8 100644 --- a/python/ray/serve/tests/test_metrics.py +++ b/python/ray/serve/tests/test_metrics.py @@ -46,6 +46,7 @@ def verify_metrics(do_assert=False): "serve_deployment_processing_latency_ms", # gauge "serve_replica_processing_queries", + "serve_deployment_replica_healthy", # handle "serve_handle_request_counter", ]