Skip to content

Commit

Permalink
[Misc ]Add runtime api metrics (#251)
Browse files Browse the repository at this point in the history
* feat: add info metrics

* feat: add metrics about aibrix runtime api
  • Loading branch information
brosoul authored Sep 27, 2024
1 parent 8817158 commit 795d3e7
Show file tree
Hide file tree
Showing 3 changed files with 85 additions and 6 deletions.
2 changes: 2 additions & 0 deletions python/aibrix/aibrix/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,3 +16,5 @@
DEFAULT_METRIC_COLLECTOR_TIMEOUT = 1

DOWNLOAD_CACHE_DIR = ".cache"

EXCLUDE_METRICS_HTTP_ENDPOINTS = ["/metrics/"]
39 changes: 39 additions & 0 deletions python/aibrix/aibrix/metrics/metrics.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
# Copyright 2024 The Aibrix Team.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

from prometheus_client import CollectorRegistry, Counter, Histogram, Info


REGISTRY = CollectorRegistry()

INFO_METRICS = Info(
name="aibrix:info",
documentation="AIBrix Info",
# labelnames=["version", "engine", "engine_version"],
registry=REGISTRY,
)

HTTP_COUNTER_METRICS = Counter(
name="aibrix:api_request_total",
documentation="Count of AIBrix API Requests by method, endpoint and status",
labelnames=["method", "endpoint", "status"],
registry=REGISTRY,
)
HTTP_LATENCY_METRICS = Histogram(
name="aibrix:api_request_latency",
documentation="Latency of AIBrix API Requests by method, endpoint and status",
labelnames=["method", "endpoint", "status"],
buckets=[0.1, 0.2, 0.5, 1, 2, 5],
registry=REGISTRY,
)
50 changes: 44 additions & 6 deletions python/aibrix/app.py
Original file line number Diff line number Diff line change
@@ -1,16 +1,24 @@
import os
import shutil
from pathlib import Path
import time
from urllib.parse import urljoin

from aibrix.config import EXCLUDE_METRICS_HTTP_ENDPOINTS
from aibrix.metrics.metrics import (
HTTP_COUNTER_METRICS,
HTTP_LATENCY_METRICS,
INFO_METRICS,
REGISTRY,
)
import uvicorn
from fastapi import APIRouter, FastAPI, Request, Response
from fastapi.datastructures import State
from fastapi.responses import JSONResponse
from prometheus_client import CollectorRegistry, make_asgi_app, multiprocess
from prometheus_client import make_asgi_app, multiprocess
from starlette.routing import Mount

from aibrix import envs
from aibrix import __version__, envs
from aibrix.logger import init_logger
from aibrix.metrics.engine_rules import get_metric_standard_rules
from aibrix.metrics.http_collector import HTTPCollector
Expand Down Expand Up @@ -54,21 +62,21 @@ def mount_metrics(app: FastAPI):
logger.info(
f"AIBrix to use {prometheus_multiproc_dir_path} as PROMETHEUS_MULTIPROC_DIR"
)
registry = CollectorRegistry()
multiprocess.MultiProcessCollector(registry)
# registry = CollectorRegistry()
multiprocess.MultiProcessCollector(REGISTRY)

# construct scrape metric config
engine = envs.INFERENCE_ENGINE

scrape_endpoint = urljoin(envs.INFERENCE_ENGINE_ENDPOINT, envs.METRIC_SCRAPE_PATH)
collector = HTTPCollector(scrape_endpoint, get_metric_standard_rules(engine))
registry.register(collector)
REGISTRY.register(collector)
logger.info(
f"AIBrix to scrape metrics from {scrape_endpoint}, use {engine} standard rules"
)

# Add prometheus asgi middleware to route /metrics requests
metrics_route = Mount("/metrics", make_asgi_app(registry=registry))
metrics_route = Mount("/metrics", make_asgi_app(registry=REGISTRY))

app.routes.append(metrics_route)

Expand Down Expand Up @@ -101,11 +109,41 @@ async def unload_lora_adapter(request: UnloadLoraAdapterRequest, raw_request: Re

def build_app():
app = FastAPI(debug=False)
INFO_METRICS.info(
{
"version": __version__.__version__,
"engine": envs.INFERENCE_ENGINE,
"engine_version": envs.INFERENCE_ENGINE_VERSION,
}
)
mount_metrics(app)
init_app_state(app.state)
app.include_router(router)
return app


app = build_app()


@app.middleware("http")
async def add_router_prometheus_middlerware(request: Request, call_next):
method = request.method
endpoint = request.scope.get("path")
# Exclude endpoints that do not require metrics
if endpoint in EXCLUDE_METRICS_HTTP_ENDPOINTS:
response = await call_next(request)
return response

start_time = time.perf_counter()
response = await call_next(request)
process_time = time.perf_counter() - start_time

status = response.status_code
HTTP_LATENCY_METRICS.labels(
method=method, endpoint=endpoint, status=status
).observe(process_time)
HTTP_COUNTER_METRICS.labels(method=method, endpoint=endpoint, status=status).inc()
return response


uvicorn.run(app, port=envs.SERVER_PORT)

0 comments on commit 795d3e7

Please sign in to comment.