Skip to content

Commit

Permalink
[Misc] Disable specific endpoints logs (#418)
Browse files Browse the repository at this point in the history
  • Loading branch information
Jeffwan authored Nov 20, 2024
1 parent 0d9f521 commit e7a1541
Showing 1 changed file with 29 additions and 7 deletions.
36 changes: 29 additions & 7 deletions docs/development/app/app.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,6 @@
from flask import Flask, request, Response, jsonify
from werkzeug import serving
import re
import time
from random import randint
import os
Expand All @@ -7,9 +9,6 @@
except Exception as e:
print(f"Failed to import kubernetes, skip: {e}")

app = Flask(__name__)
v1 = None

# Global storage for overridden values
overrides = {}

Expand Down Expand Up @@ -69,13 +68,33 @@
}
]


# Note: this is to supress /metrics logs, gateway sends request to pods to scrape
# the metrics and results in lots of meaningless requests that we do not want to log.
def disable_endpoint_logs():
"""Disable logs for requests to specific endpoints."""
disabled_endpoints = ('/', '/healthz', '/metrics')
parent_log_request = serving.WSGIRequestHandler.log_request

def log_request(self, *args, **kwargs):
if not any(re.match(f"{de}$", self.path) for de in disabled_endpoints):
parent_log_request(self, *args, **kwargs)

serving.WSGIRequestHandler.log_request = log_request


app = Flask(__name__)
disable_endpoint_logs()


@app.route('/v1/models', methods=['GET'])
def get_models():
return jsonify({
"object": "list",
"data": models
})


@app.route('/v1/load_lora_adapter', methods=['POST'])
def load_model():
lora_name = request.json.get('lora_name')
Expand Down Expand Up @@ -113,7 +132,7 @@ def completion():

prompt_tokens = randint(1, 100)
completion_tokens = randint(1, 100)

# Simulated response
response = {
"id": "cmpl-uqkvlQyYK7bGYrRHQ0eXlWi7",
Expand All @@ -132,7 +151,7 @@ def completion():
"usage": {
"prompt_tokens": prompt_tokens,
"completion_tokens": completion_tokens,
"total_tokens": prompt_tokens+completion_tokens
"total_tokens": prompt_tokens + completion_tokens
}
}
return jsonify(response), 200
Expand All @@ -147,7 +166,7 @@ def chat_completions():

prompt_tokens = randint(1, 100)
completion_tokens = randint(1, 100)

# Simulated response
response = {
"id": "chatcmpl-abc123",
Expand All @@ -157,7 +176,7 @@ def chat_completions():
"usage": {
"prompt_tokens": prompt_tokens,
"completion_tokens": completion_tokens,
"total_tokens": prompt_tokens+completion_tokens
"total_tokens": prompt_tokens + completion_tokens
},
"choices": [
{
Expand All @@ -173,6 +192,7 @@ def chat_completions():
}
return jsonify(response), 200


@app.route('/set_metrics', methods=['POST'])
def set_metrics():
global overrides
Expand All @@ -185,6 +205,7 @@ def set_metrics():
else:
return {"status": "error", "message": "No data provided"}, 400


@app.route('/metrics')
def metrics():
# get deployment information
Expand Down Expand Up @@ -234,6 +255,7 @@ def metrics():
"""
return Response(metrics_output, mimetype='text/plain')


if __name__ == '__main__':
try:
# config.load_kube_config()
Expand Down

0 comments on commit e7a1541

Please sign in to comment.