Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Support fetching metrics from remote Prometheus server #433

Merged
merged 7 commits into from
Nov 26, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions build/container/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ COPY pkg/controller/ pkg/controller/
COPY pkg/utils/ pkg/utils/
COPY pkg/cache/ pkg/cache/
COPY pkg/client/ pkg/client/
COPY pkg/metrics/ pkg/metrics/

# Build
# the GOARCH has not a default value to allow the binary be built according to the host where the command
Expand Down
1 change: 1 addition & 0 deletions build/container/Dockerfile.gateway
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ COPY pkg/plugins/ pkg/plugins/
COPY pkg/utils/ pkg/utils/
COPY pkg/cache/ pkg/cache/
COPY pkg/client/ pkg/client/
COPY pkg/metrics/ pkg/metrics/

# Build
# the GOARCH has not a default value to allow the binary be built according to the host where the command
Expand Down
136 changes: 94 additions & 42 deletions docs/development/app/app.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
import time
from random import randint
import os

try:
from kubernetes import client, config
except Exception as e:
Expand Down Expand Up @@ -207,60 +208,78 @@ def set_metrics():
return {"status": "error", "message": "No data provided"}, 400


# Generic histogram template
histogram_template = """
# HELP vllm:{metric_name} {description}
# TYPE vllm:{metric_name} {metric_type}
vllm:{metric_name}_sum{{model_name="{model_name}"}} {value}
{buckets}
vllm:{metric_name}_count{{model_name="{model_name}"}} {count}
"""

counter_gauge_template = """
# HELP vllm:{metric_name} {description}
# TYPE vllm:{metric_name} {metric_type}
vllm:{metric_name}{{model_name="{model_name}"}} {value}
"""
# Initialize global state to keep track of metrics data
metrics_state = {}


def generate_histogram_metric(metric_name, description, model_name, total_sum, total_count, buckets, metric_type="histogram"):
def generate_histogram_metric(metric_name, description, model_name, buckets, new_requests):
"""
Generate a histogram metric string for a specific metric type.
Generate Prometheus histogram metrics with dynamically updated bucket values.

Args:
metric_name (str): Name of the metric.
description (str): Name of the metric description
model_name (str): Name of the model.
total_sum (float): Total sum value for the metric.
total_count (int): Total count value for the metric.
description (str): Metric description.
model_name (str): Model name.
buckets (list): List of bucket boundaries.
new_requests (dict): Dictionary with new requests to update bucket values.

Returns:
str: Prometheus-formatted histogram string.
str: Prometheus-formatted histogram metric.
"""
# Histogram definitions
# Assign random values for each bucket
bucket_values = {}
cumulative_count = 0
global metrics_state

# Initialize state if not already present
if metric_name not in metrics_state:
metrics_state[metric_name] = {
"buckets": {bucket: 0 for bucket in buckets}, # Bucket values
"total_sum": 0, # Total sum of all values
"total_count": 0 # Total count of all events
}

# Retrieve current metric state
current_state = metrics_state[metric_name]

# Update buckets and ensure cumulative nature
for bucket in buckets:
value = random.randint(0, 10) # Assign random values
cumulative_count += value
bucket_values[bucket] = cumulative_count
if bucket in new_requests:
# Add new requests for this bucket
current_state["buckets"][bucket] += new_requests[bucket]

# Ensure cumulative updates for histogram buckets
if bucket != buckets[0]: # Skip the first bucket
current_state["buckets"][bucket] = max(
current_state["buckets"][bucket],
current_state["buckets"][buckets[buckets.index(bucket) - 1]]
)

# Update total_count and total_sum
current_state["total_count"] = current_state["buckets"][buckets[-1]] # `+Inf` bucket is the total count
current_state["total_sum"] += sum(
float(bucket) * value for bucket, value in new_requests.items() if bucket != "+Inf"
)

# Format bucket strings
# Generate Prometheus bucket strings
bucket_strings = "\n".join(
[f'vllm:{metric_type}_bucket{{le="{bucket}",model_name="{model_name}"}} {value}'
for bucket, value in bucket_values.items()]
[f'vllm:{metric_name}_bucket{{le="{bucket}",model_name="{model_name}"}} {current_state["buckets"][bucket]}'
for bucket in buckets]
)

# Fill in the histogram template
# Return formatted histogram metric
histogram_template = """
# HELP vllm:{metric_name} {description}
# TYPE vllm:{metric_name} histogram
vllm:{metric_name}_sum{{model_name="{model_name}"}} {value}
{buckets}
vllm:{metric_name}_count{{model_name="{model_name}"}} {count}
"""

return histogram_template.format(
metric_name=metric_name,
metric_type=metric_type,
description=description,
model_name=model_name,
value=total_sum,
value=current_state["total_sum"],
buckets=bucket_strings,
count=total_count
count=current_state["total_count"]
)


Expand All @@ -278,6 +297,12 @@ def generate_counter_gauge_metric(metric_name, metric_type, description, model_n
Returns:
str: A formatted Prometheus metric string.
"""
counter_gauge_template = """
# HELP vllm:{metric_name} {description}
# TYPE vllm:{metric_name} {metric_type}
vllm:{metric_name}{{model_name="{model_name}"}} {value}
"""

return counter_gauge_template.format(
metric_name=metric_name,
metric_type=metric_type,
Expand Down Expand Up @@ -362,7 +387,14 @@ def metrics():

# Generate all metrics
metrics_output = "".join(
generate_counter_gauge_metric(metric["name"], metric["type"], metric["description"], model_name, metric["value"])
generate_counter_gauge_metric(metric["name"], metric["type"], metric["description"], model_name,
metric["value"])
for metric in simple_metrics
)

lora_metrics_output = "".join(
generate_counter_gauge_metric(metric["name"], metric["type"], metric["description"], "lora-model-1",
metric["value"])
for metric in simple_metrics
)

Expand Down Expand Up @@ -420,12 +452,32 @@ def metrics():
},
]

histogram_metrics_output = "".join(
generate_histogram_metric(metric["name"], metric["description"], model_name, 100, 100, metric["buckets"])
for metric in histogram_metrics
)

return Response(metrics_output+histogram_metrics_output, mimetype='text/plain')
# Generate metrics output
histogram_metrics_output = ""
for metric in histogram_metrics:
# Simulate random new requests for the metric
new_requests = {bucket: random.randint(0, 5) for bucket in metric["buckets"]}
histogram_metrics_output += generate_histogram_metric(
metric_name=metric["name"],
description=metric["description"],
model_name=model_name,
buckets=metric["buckets"],
new_requests=new_requests
)

lora_histogram_metrics_output = ""
for metric in histogram_metrics:
# Simulate random new requests for the metric
new_requests = {bucket: random.randint(0, 5) for bucket in metric["buckets"]}
lora_histogram_metrics_output += generate_histogram_metric(
metric_name=metric["name"],
description=metric["description"],
model_name="lora-model-1",
buckets=metric["buckets"],
new_requests=new_requests
)

return Response(metrics_output + lora_metrics_output + histogram_metrics_output + lora_histogram_metrics_output, mimetype='text/plain')


if __name__ == '__main__':
Expand Down
7 changes: 7 additions & 0 deletions docs/development/app/deployment.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -45,11 +45,18 @@ kind: Service
metadata:
name: llama2-70b
namespace: default
labels:
prometheus-discovery: "true"
annotations:
prometheus.io/scrape: "true"
prometheus.io/path: "/metrics"
prometheus.io/port: "8000"
spec:
selector:
model.aibrix.ai/name: "llama2-70b"
ports:
- protocol: TCP
name: metrics
port: 8000
targetPort: 8000
nodePort: 30081
Expand Down
17 changes: 17 additions & 0 deletions docs/development/app/service-monitor.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
apiVersion: monitoring.coreos.com/v1
kind: ServiceMonitor
metadata:
name: metric-exporter-testing
namespace: default
spec:
endpoints:
- interval: 15s
path: /metrics
port: metrics
namespaceSelector:
matchNames:
- default
selector:
matchLabels:
prometheus-discovery: "true"

2 changes: 2 additions & 0 deletions go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -59,12 +59,14 @@ require (
github.com/google/pprof v0.0.0-20240424215950-a892ee059fd6 // indirect
github.com/imdario/mergo v1.0.0 // indirect
github.com/josharian/intern v1.0.0 // indirect
github.com/jpillora/backoff v1.0.0 // indirect
github.com/json-iterator/go v1.1.12 // indirect
github.com/leodido/go-urn v1.4.0 // indirect
github.com/mailru/easyjson v0.7.7 // indirect
github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd // indirect
github.com/modern-go/reflect2 v1.0.2 // indirect
github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 // indirect
github.com/mwitkow/go-conntrack v0.0.0-20190716064945-2f068394615f // indirect
github.com/pkg/errors v0.9.1 // indirect
github.com/pmezard/go-difflib v1.0.0 // indirect
github.com/prometheus/client_golang v1.19.1 // indirect
Expand Down
4 changes: 4 additions & 0 deletions go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -75,6 +75,8 @@ github.com/gorilla/mux v1.8.1 h1:TuBL49tXwgrFYWhqrNgrUNEY92u81SPhu7sTdzQEiWY=
github.com/gorilla/mux v1.8.1/go.mod h1:AKf9I4AEqPTmMytcMc0KkNouC66V3BtZ4qD5fmWSiMQ=
github.com/josharian/intern v1.0.0 h1:vlS4z54oSdjm0bgjRigI+G1HpF+tI+9rE5LLzOg8HmY=
github.com/josharian/intern v1.0.0/go.mod h1:5DoeVV0s6jJacbCEi61lwdGj/aVlrQvzHFFd8Hwg//Y=
github.com/jpillora/backoff v1.0.0 h1:uvFg412JmmHBHw7iwprIxkPMI+sGQ4kzOWsMeHnm2EA=
github.com/jpillora/backoff v1.0.0/go.mod h1:J/6gKK9jxlEcS3zixgDgUAsiuZ7yrSoa/FX5e0EB2j4=
github.com/json-iterator/go v1.1.12 h1:PV8peI4a0ysnczrg+LtxykD8LfKY9ML6u2jnxaEnrnM=
github.com/json-iterator/go v1.1.12/go.mod h1:e30LSqwooZae/UwlEbR2852Gd8hjQvJoHmT4TnhNGBo=
github.com/kisielk/errcheck v1.5.0/go.mod h1:pFxgyoBC7bSaBwPgfKdkLd5X25qrDl4LWUI2bnpBCr8=
Expand All @@ -97,6 +99,8 @@ github.com/modern-go/reflect2 v1.0.2 h1:xBagoLtFs94CBntxluKeaWgTMpvLxC4ur3nMaC9G
github.com/modern-go/reflect2 v1.0.2/go.mod h1:yWuevngMOJpCy52FWWMvUC8ws7m/LJsjYzDa0/r8luk=
github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 h1:C3w9PqII01/Oq1c1nUAm88MOHcQC9l5mIlSMApZMrHA=
github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822/go.mod h1:+n7T8mK8HuQTcFwEeznm/DIxMOiR9yIdICNftLE1DvQ=
github.com/mwitkow/go-conntrack v0.0.0-20190716064945-2f068394615f h1:KUppIJq7/+SVif2QVs3tOP0zanoHgBEVAwHxUSIzRqU=
github.com/mwitkow/go-conntrack v0.0.0-20190716064945-2f068394615f/go.mod h1:qRWi+5nqEBWmkhHvq77mSJWrCKwh8bxhgT7d/eI7P4U=
github.com/onsi/ginkgo/v2 v2.17.2 h1:7eMhcy3GimbsA3hEnVKdw/PQM9XN9krpKVXsZdph0/g=
github.com/onsi/ginkgo/v2 v2.17.2/go.mod h1:nP2DPOQoNsQmsVyv5rDA8JkXQoCs6goXIvr/PRJ1eCc=
github.com/onsi/gomega v1.33.1 h1:dsYjIxxSR755MDmKVsaFQTE22ChNBcuuTWgkUDSubOk=
Expand Down
Loading
Loading