vllm-project · Jeffwan · Nov 26, 2024 · Nov 25, 2024 · Nov 25, 2024 · Nov 25, 2024
diff --git a/build/container/Dockerfile b/build/container/Dockerfile
@@ -18,6 +18,7 @@ COPY pkg/controller/ pkg/controller/
 COPY pkg/utils/ pkg/utils/
 COPY pkg/cache/ pkg/cache/
 COPY pkg/client/ pkg/client/
+COPY pkg/metrics/ pkg/metrics/
 
 # Build
 # the GOARCH has not a default value to allow the binary be built according to the host where the command

diff --git a/build/container/Dockerfile.gateway b/build/container/Dockerfile.gateway
@@ -18,6 +18,7 @@ COPY pkg/plugins/ pkg/plugins/
 COPY pkg/utils/ pkg/utils/
 COPY pkg/cache/ pkg/cache/
 COPY pkg/client/ pkg/client/
+COPY pkg/metrics/ pkg/metrics/
 
 # Build
 # the GOARCH has not a default value to allow the binary be built according to the host where the command

diff --git a/docs/development/app/app.py b/docs/development/app/app.py
@@ -5,6 +5,7 @@
 import time
 from random import randint
 import os
+
 try:
     from kubernetes import client, config
 except Exception as e:
@@ -207,60 +208,78 @@ def set_metrics():
         return {"status": "error", "message": "No data provided"}, 400
 
 
-# Generic histogram template
-histogram_template = """
-# HELP vllm:{metric_name} {description}
-# TYPE vllm:{metric_name} {metric_type}
-vllm:{metric_name}_sum{{model_name="{model_name}"}} {value}
-{buckets}
-vllm:{metric_name}_count{{model_name="{model_name}"}} {count}
-"""
-
-counter_gauge_template = """
-# HELP vllm:{metric_name} {description}
-# TYPE vllm:{metric_name} {metric_type}
-vllm:{metric_name}{{model_name="{model_name}"}} {value}
-"""
+# Initialize global state to keep track of metrics data
+metrics_state = {}
 
 
-def generate_histogram_metric(metric_name, description, model_name, total_sum, total_count, buckets, metric_type="histogram"):
+def generate_histogram_metric(metric_name, description, model_name, buckets, new_requests):
     """
-    Generate a histogram metric string for a specific metric type.
+    Generate Prometheus histogram metrics with dynamically updated bucket values.
 
     Args:
         metric_name (str): Name of the metric.
-        description (str): Name of the metric description
-        model_name (str): Name of the model.
-        total_sum (float): Total sum value for the metric.
-        total_count (int): Total count value for the metric.
+        description (str): Metric description.
+        model_name (str): Model name.
+        buckets (list): List of bucket boundaries.
+        new_requests (dict): Dictionary with new requests to update bucket values.
 
     Returns:
-        str: Prometheus-formatted histogram string.
+        str: Prometheus-formatted histogram metric.
     """
-    # Histogram definitions
-    # Assign random values for each bucket
-    bucket_values = {}
-    cumulative_count = 0
+    global metrics_state
+
+    # Initialize state if not already present
+    if metric_name not in metrics_state:
+        metrics_state[metric_name] = {
+            "buckets": {bucket: 0 for bucket in buckets},  # Bucket values
+            "total_sum": 0,  # Total sum of all values
+            "total_count": 0  # Total count of all events
+        }
+
+    # Retrieve current metric state
+    current_state = metrics_state[metric_name]
+
+    # Update buckets and ensure cumulative nature
     for bucket in buckets:
-        value = random.randint(0, 10)  # Assign random values
-        cumulative_count += value
-        bucket_values[bucket] = cumulative_count
+        if bucket in new_requests:
+            # Add new requests for this bucket
+            current_state["buckets"][bucket] += new_requests[bucket]
+
+        # Ensure cumulative updates for histogram buckets
+        if bucket != buckets[0]:  # Skip the first bucket
+            current_state["buckets"][bucket] = max(
+                current_state["buckets"][bucket],
+                current_state["buckets"][buckets[buckets.index(bucket) - 1]]
+            )
+
+    # Update total_count and total_sum
+    current_state["total_count"] = current_state["buckets"][buckets[-1]]  # `+Inf` bucket is the total count
+    current_state["total_sum"] += sum(
+        float(bucket) * value for bucket, value in new_requests.items() if bucket != "+Inf"
+    )
 
-    # Format bucket strings
+    # Generate Prometheus bucket strings
     bucket_strings = "\n".join(
-        [f'vllm:{metric_type}_bucket{{le="{bucket}",model_name="{model_name}"}} {value}'
-         for bucket, value in bucket_values.items()]
+        [f'vllm:{metric_name}_bucket{{le="{bucket}",model_name="{model_name}"}} {current_state["buckets"][bucket]}'
+         for bucket in buckets]
     )
 
-    # Fill in the histogram template
+    # Return formatted histogram metric
+    histogram_template = """
+# HELP vllm:{metric_name} {description}
+# TYPE vllm:{metric_name} histogram
+vllm:{metric_name}_sum{{model_name="{model_name}"}} {value}
+{buckets}
+vllm:{metric_name}_count{{model_name="{model_name}"}} {count}
+"""
+
     return histogram_template.format(
         metric_name=metric_name,
-        metric_type=metric_type,
         description=description,
         model_name=model_name,
-        value=total_sum,
+        value=current_state["total_sum"],
         buckets=bucket_strings,
-        count=total_count
+        count=current_state["total_count"]
     )
 
 
@@ -278,6 +297,12 @@ def generate_counter_gauge_metric(metric_name, metric_type, description, model_n
     Returns:
         str: A formatted Prometheus metric string.
     """
+    counter_gauge_template = """
+# HELP vllm:{metric_name} {description}
+# TYPE vllm:{metric_name} {metric_type}
+vllm:{metric_name}{{model_name="{model_name}"}} {value}
+"""
+
     return counter_gauge_template.format(
         metric_name=metric_name,
         metric_type=metric_type,
@@ -362,7 +387,14 @@ def metrics():
 
     # Generate all metrics
     metrics_output = "".join(
-        generate_counter_gauge_metric(metric["name"], metric["type"], metric["description"], model_name, metric["value"])
+        generate_counter_gauge_metric(metric["name"], metric["type"], metric["description"], model_name,
+                                      metric["value"])
+        for metric in simple_metrics
+    )
+
+    lora_metrics_output = "".join(
+        generate_counter_gauge_metric(metric["name"], metric["type"], metric["description"], "lora-model-1",
+                                      metric["value"])
         for metric in simple_metrics
     )
 
@@ -420,12 +452,32 @@ def metrics():
         },
     ]
 
-    histogram_metrics_output = "".join(
-        generate_histogram_metric(metric["name"],  metric["description"], model_name, 100, 100, metric["buckets"])
-        for metric in histogram_metrics
-    )
-
-    return Response(metrics_output+histogram_metrics_output, mimetype='text/plain')
+    # Generate metrics output
+    histogram_metrics_output = ""
+    for metric in histogram_metrics:
+        # Simulate random new requests for the metric
+        new_requests = {bucket: random.randint(0, 5) for bucket in metric["buckets"]}
+        histogram_metrics_output += generate_histogram_metric(
+            metric_name=metric["name"],
+            description=metric["description"],
+            model_name=model_name,
+            buckets=metric["buckets"],
+            new_requests=new_requests
+        )
+
+    lora_histogram_metrics_output = ""
+    for metric in histogram_metrics:
+        # Simulate random new requests for the metric
+        new_requests = {bucket: random.randint(0, 5) for bucket in metric["buckets"]}
+        lora_histogram_metrics_output += generate_histogram_metric(
+            metric_name=metric["name"],
+            description=metric["description"],
+            model_name="lora-model-1",
+            buckets=metric["buckets"],
+            new_requests=new_requests
+        )
+
+    return Response(metrics_output + lora_metrics_output + histogram_metrics_output + lora_histogram_metrics_output, mimetype='text/plain')
 
 
 if __name__ == '__main__':

diff --git a/docs/development/app/deployment.yaml b/docs/development/app/deployment.yaml
@@ -45,11 +45,18 @@ kind: Service
 metadata:
   name: llama2-70b
   namespace: default
+  labels:
+    prometheus-discovery: "true"
+  annotations:
+    prometheus.io/scrape: "true"
+    prometheus.io/path: "/metrics"
+    prometheus.io/port: "8000"
 spec:
   selector:
     model.aibrix.ai/name: "llama2-70b"
   ports:
     - protocol: TCP
+      name: metrics
       port: 8000
       targetPort: 8000
       nodePort: 30081

diff --git a/docs/development/app/service-monitor.yaml b/docs/development/app/service-monitor.yaml
@@ -0,0 +1,17 @@
+apiVersion: monitoring.coreos.com/v1
+kind: ServiceMonitor
+metadata:
+  name: metric-exporter-testing
+  namespace: default
+spec:
+  endpoints:
+    - interval: 15s
+      path: /metrics
+      port: metrics
+  namespaceSelector:
+    matchNames:
+      - default
+  selector:
+    matchLabels:
+      prometheus-discovery: "true"
+
diff --git a/go.mod b/go.mod
@@ -59,12 +59,14 @@ require (
 	github.com/google/pprof v0.0.0-20240424215950-a892ee059fd6 // indirect
 	github.com/imdario/mergo v1.0.0 // indirect
 	github.com/josharian/intern v1.0.0 // indirect
+	github.com/jpillora/backoff v1.0.0 // indirect
 	github.com/json-iterator/go v1.1.12 // indirect
 	github.com/leodido/go-urn v1.4.0 // indirect
 	github.com/mailru/easyjson v0.7.7 // indirect
 	github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd // indirect
 	github.com/modern-go/reflect2 v1.0.2 // indirect
 	github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 // indirect
+	github.com/mwitkow/go-conntrack v0.0.0-20190716064945-2f068394615f // indirect
 	github.com/pkg/errors v0.9.1 // indirect
 	github.com/pmezard/go-difflib v1.0.0 // indirect
 	github.com/prometheus/client_golang v1.19.1 // indirect

diff --git a/go.sum b/go.sum
@@ -75,6 +75,8 @@ github.com/gorilla/mux v1.8.1 h1:TuBL49tXwgrFYWhqrNgrUNEY92u81SPhu7sTdzQEiWY=
 github.com/gorilla/mux v1.8.1/go.mod h1:AKf9I4AEqPTmMytcMc0KkNouC66V3BtZ4qD5fmWSiMQ=
 github.com/josharian/intern v1.0.0 h1:vlS4z54oSdjm0bgjRigI+G1HpF+tI+9rE5LLzOg8HmY=
 github.com/josharian/intern v1.0.0/go.mod h1:5DoeVV0s6jJacbCEi61lwdGj/aVlrQvzHFFd8Hwg//Y=
+github.com/jpillora/backoff v1.0.0 h1:uvFg412JmmHBHw7iwprIxkPMI+sGQ4kzOWsMeHnm2EA=
+github.com/jpillora/backoff v1.0.0/go.mod h1:J/6gKK9jxlEcS3zixgDgUAsiuZ7yrSoa/FX5e0EB2j4=
 github.com/json-iterator/go v1.1.12 h1:PV8peI4a0ysnczrg+LtxykD8LfKY9ML6u2jnxaEnrnM=
 github.com/json-iterator/go v1.1.12/go.mod h1:e30LSqwooZae/UwlEbR2852Gd8hjQvJoHmT4TnhNGBo=
 github.com/kisielk/errcheck v1.5.0/go.mod h1:pFxgyoBC7bSaBwPgfKdkLd5X25qrDl4LWUI2bnpBCr8=
@@ -97,6 +99,8 @@ github.com/modern-go/reflect2 v1.0.2 h1:xBagoLtFs94CBntxluKeaWgTMpvLxC4ur3nMaC9G
 github.com/modern-go/reflect2 v1.0.2/go.mod h1:yWuevngMOJpCy52FWWMvUC8ws7m/LJsjYzDa0/r8luk=
 github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 h1:C3w9PqII01/Oq1c1nUAm88MOHcQC9l5mIlSMApZMrHA=
 github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822/go.mod h1:+n7T8mK8HuQTcFwEeznm/DIxMOiR9yIdICNftLE1DvQ=
+github.com/mwitkow/go-conntrack v0.0.0-20190716064945-2f068394615f h1:KUppIJq7/+SVif2QVs3tOP0zanoHgBEVAwHxUSIzRqU=
+github.com/mwitkow/go-conntrack v0.0.0-20190716064945-2f068394615f/go.mod h1:qRWi+5nqEBWmkhHvq77mSJWrCKwh8bxhgT7d/eI7P4U=
 github.com/onsi/ginkgo/v2 v2.17.2 h1:7eMhcy3GimbsA3hEnVKdw/PQM9XN9krpKVXsZdph0/g=
 github.com/onsi/ginkgo/v2 v2.17.2/go.mod h1:nP2DPOQoNsQmsVyv5rDA8JkXQoCs6goXIvr/PRJ1eCc=
 github.com/onsi/gomega v1.33.1 h1:dsYjIxxSR755MDmKVsaFQTE22ChNBcuuTWgkUDSubOk=