Skip to content

Commit

Permalink
Support fetching metrics from remote Prometheus server (#433)
Browse files Browse the repository at this point in the history
* Update mocked app to support prometheus

1. Update histogram to make metric generation monotone increasing
2. Support to scrape app metrics from prometheus

* Support scraping prometheus metrics

* Add different labels for same metrics

* Use a single source to track all metrics

* Update dockerfile

* Fix metric name

* fix some typos
  • Loading branch information
Jeffwan authored Nov 26, 2024
1 parent ca0cc64 commit 3691295
Show file tree
Hide file tree
Showing 17 changed files with 499 additions and 136 deletions.
1 change: 1 addition & 0 deletions build/container/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ COPY pkg/controller/ pkg/controller/
COPY pkg/utils/ pkg/utils/
COPY pkg/cache/ pkg/cache/
COPY pkg/client/ pkg/client/
COPY pkg/metrics/ pkg/metrics/

# Build
# the GOARCH has not a default value to allow the binary be built according to the host where the command
Expand Down
1 change: 1 addition & 0 deletions build/container/Dockerfile.gateway
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ COPY pkg/plugins/ pkg/plugins/
COPY pkg/utils/ pkg/utils/
COPY pkg/cache/ pkg/cache/
COPY pkg/client/ pkg/client/
COPY pkg/metrics/ pkg/metrics/

# Build
# the GOARCH has not a default value to allow the binary be built according to the host where the command
Expand Down
136 changes: 94 additions & 42 deletions docs/development/app/app.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
import time
from random import randint
import os

try:
from kubernetes import client, config
except Exception as e:
Expand Down Expand Up @@ -207,60 +208,78 @@ def set_metrics():
return {"status": "error", "message": "No data provided"}, 400


# Generic histogram template
histogram_template = """
# HELP vllm:{metric_name} {description}
# TYPE vllm:{metric_name} {metric_type}
vllm:{metric_name}_sum{{model_name="{model_name}"}} {value}
{buckets}
vllm:{metric_name}_count{{model_name="{model_name}"}} {count}
"""

counter_gauge_template = """
# HELP vllm:{metric_name} {description}
# TYPE vllm:{metric_name} {metric_type}
vllm:{metric_name}{{model_name="{model_name}"}} {value}
"""
# Initialize global state to keep track of metrics data
metrics_state = {}


def generate_histogram_metric(metric_name, description, model_name, total_sum, total_count, buckets, metric_type="histogram"):
def generate_histogram_metric(metric_name, description, model_name, buckets, new_requests):
"""
Generate a histogram metric string for a specific metric type.
Generate Prometheus histogram metrics with dynamically updated bucket values.
Args:
metric_name (str): Name of the metric.
description (str): Name of the metric description
model_name (str): Name of the model.
total_sum (float): Total sum value for the metric.
total_count (int): Total count value for the metric.
description (str): Metric description.
model_name (str): Model name.
buckets (list): List of bucket boundaries.
new_requests (dict): Dictionary with new requests to update bucket values.
Returns:
str: Prometheus-formatted histogram string.
str: Prometheus-formatted histogram metric.
"""
# Histogram definitions
# Assign random values for each bucket
bucket_values = {}
cumulative_count = 0
global metrics_state

# Initialize state if not already present
if metric_name not in metrics_state:
metrics_state[metric_name] = {
"buckets": {bucket: 0 for bucket in buckets}, # Bucket values
"total_sum": 0, # Total sum of all values
"total_count": 0 # Total count of all events
}

# Retrieve current metric state
current_state = metrics_state[metric_name]

# Update buckets and ensure cumulative nature
for bucket in buckets:
value = random.randint(0, 10) # Assign random values
cumulative_count += value
bucket_values[bucket] = cumulative_count
if bucket in new_requests:
# Add new requests for this bucket
current_state["buckets"][bucket] += new_requests[bucket]

# Ensure cumulative updates for histogram buckets
if bucket != buckets[0]: # Skip the first bucket
current_state["buckets"][bucket] = max(
current_state["buckets"][bucket],
current_state["buckets"][buckets[buckets.index(bucket) - 1]]
)

# Update total_count and total_sum
current_state["total_count"] = current_state["buckets"][buckets[-1]] # `+Inf` bucket is the total count
current_state["total_sum"] += sum(
float(bucket) * value for bucket, value in new_requests.items() if bucket != "+Inf"
)

# Format bucket strings
# Generate Prometheus bucket strings
bucket_strings = "\n".join(
[f'vllm:{metric_type}_bucket{{le="{bucket}",model_name="{model_name}"}} {value}'
for bucket, value in bucket_values.items()]
[f'vllm:{metric_name}_bucket{{le="{bucket}",model_name="{model_name}"}} {current_state["buckets"][bucket]}'
for bucket in buckets]
)

# Fill in the histogram template
# Return formatted histogram metric
histogram_template = """
# HELP vllm:{metric_name} {description}
# TYPE vllm:{metric_name} histogram
vllm:{metric_name}_sum{{model_name="{model_name}"}} {value}
{buckets}
vllm:{metric_name}_count{{model_name="{model_name}"}} {count}
"""

return histogram_template.format(
metric_name=metric_name,
metric_type=metric_type,
description=description,
model_name=model_name,
value=total_sum,
value=current_state["total_sum"],
buckets=bucket_strings,
count=total_count
count=current_state["total_count"]
)


Expand All @@ -278,6 +297,12 @@ def generate_counter_gauge_metric(metric_name, metric_type, description, model_n
Returns:
str: A formatted Prometheus metric string.
"""
counter_gauge_template = """
# HELP vllm:{metric_name} {description}
# TYPE vllm:{metric_name} {metric_type}
vllm:{metric_name}{{model_name="{model_name}"}} {value}
"""

return counter_gauge_template.format(
metric_name=metric_name,
metric_type=metric_type,
Expand Down Expand Up @@ -362,7 +387,14 @@ def metrics():

# Generate all metrics
metrics_output = "".join(
generate_counter_gauge_metric(metric["name"], metric["type"], metric["description"], model_name, metric["value"])
generate_counter_gauge_metric(metric["name"], metric["type"], metric["description"], model_name,
metric["value"])
for metric in simple_metrics
)

lora_metrics_output = "".join(
generate_counter_gauge_metric(metric["name"], metric["type"], metric["description"], "lora-model-1",
metric["value"])
for metric in simple_metrics
)

Expand Down Expand Up @@ -420,12 +452,32 @@ def metrics():
},
]

histogram_metrics_output = "".join(
generate_histogram_metric(metric["name"], metric["description"], model_name, 100, 100, metric["buckets"])
for metric in histogram_metrics
)

return Response(metrics_output+histogram_metrics_output, mimetype='text/plain')
# Generate metrics output
histogram_metrics_output = ""
for metric in histogram_metrics:
# Simulate random new requests for the metric
new_requests = {bucket: random.randint(0, 5) for bucket in metric["buckets"]}
histogram_metrics_output += generate_histogram_metric(
metric_name=metric["name"],
description=metric["description"],
model_name=model_name,
buckets=metric["buckets"],
new_requests=new_requests
)

lora_histogram_metrics_output = ""
for metric in histogram_metrics:
# Simulate random new requests for the metric
new_requests = {bucket: random.randint(0, 5) for bucket in metric["buckets"]}
lora_histogram_metrics_output += generate_histogram_metric(
metric_name=metric["name"],
description=metric["description"],
model_name="lora-model-1",
buckets=metric["buckets"],
new_requests=new_requests
)

return Response(metrics_output + lora_metrics_output + histogram_metrics_output + lora_histogram_metrics_output, mimetype='text/plain')


if __name__ == '__main__':
Expand Down
7 changes: 7 additions & 0 deletions docs/development/app/deployment.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -45,11 +45,18 @@ kind: Service
metadata:
name: llama2-70b
namespace: default
labels:
prometheus-discovery: "true"
annotations:
prometheus.io/scrape: "true"
prometheus.io/path: "/metrics"
prometheus.io/port: "8000"
spec:
selector:
model.aibrix.ai/name: "llama2-70b"
ports:
- protocol: TCP
name: metrics
port: 8000
targetPort: 8000
nodePort: 30081
Expand Down
17 changes: 17 additions & 0 deletions docs/development/app/service-monitor.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
apiVersion: monitoring.coreos.com/v1
kind: ServiceMonitor
metadata:
name: metric-exporter-testing
namespace: default
spec:
endpoints:
- interval: 15s
path: /metrics
port: metrics
namespaceSelector:
matchNames:
- default
selector:
matchLabels:
prometheus-discovery: "true"

2 changes: 2 additions & 0 deletions go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -59,12 +59,14 @@ require (
github.com/google/pprof v0.0.0-20240424215950-a892ee059fd6 // indirect
github.com/imdario/mergo v1.0.0 // indirect
github.com/josharian/intern v1.0.0 // indirect
github.com/jpillora/backoff v1.0.0 // indirect
github.com/json-iterator/go v1.1.12 // indirect
github.com/leodido/go-urn v1.4.0 // indirect
github.com/mailru/easyjson v0.7.7 // indirect
github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd // indirect
github.com/modern-go/reflect2 v1.0.2 // indirect
github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 // indirect
github.com/mwitkow/go-conntrack v0.0.0-20190716064945-2f068394615f // indirect
github.com/pkg/errors v0.9.1 // indirect
github.com/pmezard/go-difflib v1.0.0 // indirect
github.com/prometheus/client_golang v1.19.1 // indirect
Expand Down
4 changes: 4 additions & 0 deletions go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -75,6 +75,8 @@ github.com/gorilla/mux v1.8.1 h1:TuBL49tXwgrFYWhqrNgrUNEY92u81SPhu7sTdzQEiWY=
github.com/gorilla/mux v1.8.1/go.mod h1:AKf9I4AEqPTmMytcMc0KkNouC66V3BtZ4qD5fmWSiMQ=
github.com/josharian/intern v1.0.0 h1:vlS4z54oSdjm0bgjRigI+G1HpF+tI+9rE5LLzOg8HmY=
github.com/josharian/intern v1.0.0/go.mod h1:5DoeVV0s6jJacbCEi61lwdGj/aVlrQvzHFFd8Hwg//Y=
github.com/jpillora/backoff v1.0.0 h1:uvFg412JmmHBHw7iwprIxkPMI+sGQ4kzOWsMeHnm2EA=
github.com/jpillora/backoff v1.0.0/go.mod h1:J/6gKK9jxlEcS3zixgDgUAsiuZ7yrSoa/FX5e0EB2j4=
github.com/json-iterator/go v1.1.12 h1:PV8peI4a0ysnczrg+LtxykD8LfKY9ML6u2jnxaEnrnM=
github.com/json-iterator/go v1.1.12/go.mod h1:e30LSqwooZae/UwlEbR2852Gd8hjQvJoHmT4TnhNGBo=
github.com/kisielk/errcheck v1.5.0/go.mod h1:pFxgyoBC7bSaBwPgfKdkLd5X25qrDl4LWUI2bnpBCr8=
Expand All @@ -97,6 +99,8 @@ github.com/modern-go/reflect2 v1.0.2 h1:xBagoLtFs94CBntxluKeaWgTMpvLxC4ur3nMaC9G
github.com/modern-go/reflect2 v1.0.2/go.mod h1:yWuevngMOJpCy52FWWMvUC8ws7m/LJsjYzDa0/r8luk=
github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 h1:C3w9PqII01/Oq1c1nUAm88MOHcQC9l5mIlSMApZMrHA=
github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822/go.mod h1:+n7T8mK8HuQTcFwEeznm/DIxMOiR9yIdICNftLE1DvQ=
github.com/mwitkow/go-conntrack v0.0.0-20190716064945-2f068394615f h1:KUppIJq7/+SVif2QVs3tOP0zanoHgBEVAwHxUSIzRqU=
github.com/mwitkow/go-conntrack v0.0.0-20190716064945-2f068394615f/go.mod h1:qRWi+5nqEBWmkhHvq77mSJWrCKwh8bxhgT7d/eI7P4U=
github.com/onsi/ginkgo/v2 v2.17.2 h1:7eMhcy3GimbsA3hEnVKdw/PQM9XN9krpKVXsZdph0/g=
github.com/onsi/ginkgo/v2 v2.17.2/go.mod h1:nP2DPOQoNsQmsVyv5rDA8JkXQoCs6goXIvr/PRJ1eCc=
github.com/onsi/gomega v1.33.1 h1:dsYjIxxSR755MDmKVsaFQTE22ChNBcuuTWgkUDSubOk=
Expand Down
Loading

0 comments on commit 3691295

Please sign in to comment.