Skip to content

Commit

Permalink
Merge pull request #238 from VedantMahabaleshwarkar/revertmetrics
Browse files Browse the repository at this point in the history
Revert #220 and #225
  • Loading branch information
openshift-merge-bot[bot] authored Jul 17, 2024
2 parents d135f79 + 190e10d commit b0b041c
Show file tree
Hide file tree
Showing 14 changed files with 237 additions and 580 deletions.
8 changes: 0 additions & 8 deletions Containerfile
Original file line number Diff line number Diff line change
Expand Up @@ -13,10 +13,6 @@ RUN go mod download
COPY main.go main.go
#COPY api/ api/
COPY controllers/ controllers/
COPY controllers/constants/ovms-metrics.json metrics_dashboards/ovms-metrics.json
COPY controllers/constants/tgis-metrics.json metrics_dashboards/tgis-metrics.json
COPY controllers/constants/vllm-metrics.json metrics_dashboards/vllm-metrics.json
COPY controllers/constants/caikit-metrics.json metrics_dashboards/caikit-metrics.json

# Build
USER root
Expand All @@ -27,10 +23,6 @@ RUN CGO_ENABLED=0 GOOS=linux go build -a -o manager main.go
FROM registry.access.redhat.com/ubi8/ubi-minimal:8.6
WORKDIR /
COPY --from=builder /workspace/manager .
COPY --from=builder /workspace/metrics_dashboards/ovms-metrics.json .
COPY --from=builder /workspace/metrics_dashboards/tgis-metrics.json .
COPY --from=builder /workspace/metrics_dashboards/vllm-metrics.json .
COPY --from=builder /workspace/metrics_dashboards/caikit-metrics.json .
USER 65532:65532

ENTRYPOINT ["/manager"]
29 changes: 0 additions & 29 deletions controllers/comparators/configmap_comparator.go

This file was deleted.

103 changes: 53 additions & 50 deletions controllers/constants/caikit-metrics.json
Original file line number Diff line number Diff line change
@@ -1,52 +1,55 @@
{
"config": [
{
"title": "Number of requests",
"type": "REQUEST_COUNT",
"queries": [
{
"title": "Number of successful incoming requests",
"query": "sum(increase(predict_rpc_count_total{namespace='${NAMESPACE}',code='OK',model_id='${MODEL_NAME}'}[${RATE_INTERVAL}]))"
},
{
"title": "Number of failed incoming requests",
"query": "sum(increase(predict_rpc_count_total{namespace='${NAMESPACE}',code!='OK',model_id='${MODEL_NAME}'}[${RATE_INTERVAL}]))"
}
]
},
{
"title": "Average response time (ms)",
"type": "MEAN_LATENCY",
"queries": [
{
"title": "Average inference latency",
"query": "sum by (model_id) (rate(predict_caikit_library_duration_seconds_sum{namespace='${NAMESPACE}',model_id='${MODEL_NAME}'}[1m])) / sum by (model_id) (rate(predict_caikit_library_duration_seconds_count{namespace='${NAMESPACE}',model_id='${MODEL_NAME}'}[${RATE_INTERVAL}]))"
},
{
"title": "Average e2e latency",
"query": "sum by (model_id) (rate(caikit_core_load_model_duration_seconds_sum{namespace='${NAMESPACE}',model_id='${MODEL_NAME}'}[1m]) + rate(predict_caikit_library_duration_seconds_sum{namespace='${NAMESPACE}',model_id='${MODEL_NAME}'}[1m])) / sum by (model_id) (rate(caikit_core_load_model_duration_seconds_count{namespace='${NAMESPACE}',model_id='${MODEL_NAME}'}[${RATE_INTERVAL}]) + rate(predict_caikit_library_duration_seconds_count{namespace='${NAMESPACE}',model_id='${MODEL_NAME}'}[${RATE_INTERVAL}]))"
}
]
},
{
"title": "CPU utilization %",
"type": "CPU_USAGE",
"queries": [
{
"title": "CPU usage",
"query": "sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{namespace='${NAMESPACE}'} * on(namespace,pod) group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{namespace='${NAMESPACE}', workload=~'${MODEL_NAME}-predictor-.*', workload_type=~'deployment'}) by (pod)"
}
]
},
{
"title": "Memory utilization %",
"type": "MEMORY_USAGE",
"queries": [
{
"title": "Memory usage",
"query": "sum(container_memory_working_set_bytes{namespace='${NAMESPACE}', pod=~'${MODEL_NAME}-predictor-.*'}) by (pod)"
}
]
}
]
"metrics": {
"supported": "true",
"config": [
{
"title": "Number of requests",
"type": "REQUEST_COUNT",
"queries": [
{
"title": "Number of successful incoming requests",
"query": "sum(increase(predict_rpc_count_total{namespace='${NAMESPACE}',code='OK',model_id='${MODEL_NAME}'}[${RATE_INTERVAL}]))"
},
{
"title": "Number of failed incoming requests",
"query": "sum(increase(predict_rpc_count_total{namespace='${NAMESPACE}',code!='OK',model_id='${MODEL_NAME}'}[${RATE_INTERVAL}]))"
}
]
},
{
"title": "Average response time (ms)",
"type": "MEAN_LATENCY",
"queries": [
{
"title": "Average inference latency",
"query": "sum by (model_id) (rate(predict_caikit_library_duration_seconds_sum{namespace='${NAMESPACE}',model_id='${MODEL_NAME}'}[1m])) / sum by (model_id) (rate(predict_caikit_library_duration_seconds_count{namespace='${NAMESPACE}',model_id='${MODEL_NAME}'}[${RATE_INTERVAL}]))"
},
{
"title": "Average e2e latency",
"query": "sum by (model_id) (rate(caikit_core_load_model_duration_seconds_sum{namespace='${NAMESPACE}',model_id='${MODEL_NAME}'}[1m]) + rate(predict_caikit_library_duration_seconds_sum{namespace='${NAMESPACE}',model_id='${MODEL_NAME}'}[1m])) / sum by (model_id) (rate(caikit_core_load_model_duration_seconds_count{namespace='${NAMESPACE}',model_id='${MODEL_NAME}'}[${RATE_INTERVAL}]) + rate(predict_caikit_library_duration_seconds_count{namespace='${NAMESPACE}',model_id='${MODEL_NAME}'}[${RATE_INTERVAL}]))"
}
]
},
{
"title": "CPU utilization %",
"type": "CPU_USAGE",
"queries": [
{
"title": "CPU usage",
"query": "sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{namespace='${NAMESPACE}'}* on(namespace,pod) group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{namespace='${NAMESPACE}', workload=~'${MODEL_NAME}-predictor-.*', workload_type=~'deployment'}) by (pod)"
}
]
},
{
"title": "Memory utilization %",
"type": "MEMORY_USAGE",
"queries": [
{
"title": "Memory usage",
"query": "sum(container_memory_working_set_bytes{namespace='${NAMESPACE}', pod=~'${MODEL_NAME}-predictor-.*'}) by (pod)"
}
]
}
]
}
}
5 changes: 0 additions & 5 deletions controllers/constants/constants.go
Original file line number Diff line number Diff line change
Expand Up @@ -54,11 +54,6 @@ const (

const (
DefaultStorageConfig = "storage-config"
IntervalValue = "1m"
OvmsImageName = "openvino_model_server"
TgisImageName = "text-generation-inference"
VllmImageName = "vllm"
CaikitImageName = "caikit-nlp"
)

// openshift
Expand Down
103 changes: 53 additions & 50 deletions controllers/constants/ovms-metrics.json
Original file line number Diff line number Diff line change
@@ -1,52 +1,55 @@
{
"config": [
{
"title": "Number of requests",
"type": "REQUEST_COUNT",
"queries": [
{
"title": "Number of successful incoming requests",
"query": "sum(increase(ovms_requests_success{namespace='${NAMESPACE}',name='${MODEL_NAME}'}[${RATE_INTERVAL}]))"
},
{
"title": "Number of failed incoming requests",
"query": "sum(increase(ovms_requests_fail{namespace='${NAMESPACE}',name='${MODEL_NAME}'}[${RATE_INTERVAL}]))"
}
]
},
{
"title": "Average response time (ms)",
"type": "MEAN_LATENCY",
"queries": [
{
"title": "Average inference latency",
"query": "sum by (name) (rate(ovms_inference_time_us_sum{namespace='${NAMESPACE}', name='${MODEL_NAME}'}[1m])) / sum by (name) (rate(ovms_inference_time_us_count{namespace='${NAMESPACE}', name='${MODEL_NAME}'}[${RATE_INTERVAL}]))"
},
{
"title": "Average e2e latency",
"query": "sum by (name) (rate(ovms_request_time_us_sum{name='${MODEL_NAME}'}[1m])) / sum by (name) (rate(ovms_request_time_us_count{name='${MODEL_NAME}'}[${RATE_INTERVAL}]))"
}
]
},
{
"title": "CPU utilization %",
"type": "CPU_USAGE",
"queries": [
{
"title": "CPU usage",
"query": "sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{namespace='${NAMESPACE}'} * on(namespace,pod) group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{namespace='${NAMESPACE}', workload=~'${MODEL_NAME}-predictor-.*', workload_type=~'deployment'}) by (pod)"
}
]
},
{
"title": "Memory utilization %",
"type": "MEMORY_USAGE",
"queries": [
{
"title": "Memory usage",
"query": "sum(container_memory_working_set_bytes{namespace='${NAMESPACE}', pod=~'${MODEL_NAME}-predictor-.*'}) by (pod)"
}
]
}
]
"metrics": {
"supported": "true",
"config": [
{
"title": "Number of requests",
"type": "REQUEST_COUNT",
"queries": [
{
"title": "Number of successful incoming requests",
"query": "sum(increase(ovms_requests_success{namespace='${NAMESPACE}',name='${MODEL_NAME}'}[${RATE_INTERVAL}]))"
},
{
"title": "Number of failed incoming requests",
"query": "sum(increase(ovms_requests_fail{namespace='${NAMESPACE}',name='${MODEL_NAME}'}[${RATE_INTERVAL}]))"
}
]
},
{
"title": "Average response time (ms)",
"type": "MEAN_LATENCY",
"queries": [
{
"title": "Average inference latency",
"query": "sum by (name) (rate(ovms_inference_time_us_sum{namespace='${NAMESPACE}', name='${MODEL_NAME}'}[1m])) / sum by (name) (rate(ovms_inference_time_us_count{namespace='${NAMESPACE}', name='${MODEL_NAME}'}[{RATE_INTERVAL}]))"
},
{
"title": "Average e2e latency",
"query": "sum by (name) (rate(ovms_request_time_us_sum{name='${MODEL_NAME}'}[1m])) / sum by (name) (rate(ovms_request_time_us_count{name='${MODEL_NAME}'}[{RATE_INTERVAL}]))"
}
]
},
{
"title": "CPU utilization %",
"type": "CPU_USAGE",
"queries": [
{
"title": "CPU usage",
"query": "sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{namespace='${NAMESPACE}'}* on(namespace,pod) group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{namespace='${NAMESPACE}', workload=~'${MODEL_NAME}-predictor-.*', workload_type=~'deployment'}) by (pod)"
}
]
},
{
"title": "Memory utilization %",
"type": "MEMORY_USAGE",
"queries": [
{
"title": "Memory usage",
"query": "sum(container_memory_working_set_bytes{namespace='$(MODEL_NAMESPACE)', pod=~'${MODEL_NAME}-predictor-.*'}) by (pod)"
}
]
}
]
}
}
103 changes: 53 additions & 50 deletions controllers/constants/tgis-metrics.json
Original file line number Diff line number Diff line change
@@ -1,52 +1,55 @@
{
"config": [
{
"title": "Number of requests",
"type": "REQUEST_COUNT",
"queries": [
{
"title": "Number of successful incoming requests",
"query": "sum(increase(tgi_request_success{namespace='${NAMESPACE}', pod=~'${MODEL_NAME}-predictor-.*'}[${RATE_INTERVAL}]))"
},
{
"title": "Number of failed incoming requests",
"query": "sum(increase(tgi_request_failure{namespace='${NAMESPACE}', pod=~'${MODEL_NAME}-predictor-.*'}[${RATE_INTERVAL}]))"
}
]
},
{
"title": "Average response time (ms)",
"type": "MEAN_LATENCY",
"queries": [
{
"title": "Average inference latency",
"query": "sum by (pod) (rate(tgi_request_inference_duration_sum{namespace='${NAMESPACE}', pod=~'${MODEL_NAME}-predictor-.*'}[${RATE_INTERVAL}])) / sum by (pod) (rate(tgi_request_inference_duration_count{namespace='${NAMESPACE}', pod=~'${MODEL_NAME}-predictor-.*'}[${RATE_INTERVAL}])) "
},
{
"title": "Average e2e latency",
"query": "sum by (pod) (rate(tgi_request_duration_sum{namespace='${NAMESPACE}', pod=~'${MODEL_NAME}-predictor-.*'}[${RATE_INTERVAL}])) / sum by (pod) (rate(tgi_request_duration_count{namespace='${NAMESPACE}', pod=~'${MODEL_NAME}-predictor-.*'}[${RATE_INTERVAL}]))"
}
]
},
{
"title": "CPU utilization %",
"type": "CPU_USAGE",
"queries": [
{
"title": "CPU usage",
"query": "sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{namespace='${NAMESPACE}'} * on(namespace,pod) group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{namespace='${NAMESPACE}', workload=~'${MODEL_NAME}-predictor-.*', workload_type=~'deployment'}) by (pod)"
}
]
},
{
"title": "Memory utilization %",
"type": "MEMORY_USAGE",
"queries": [
{
"title": "Memory usage",
"query": "sum(container_memory_working_set_bytes{namespace='${NAMESPACE}', pod=~'${MODEL_NAME}-predictor-.*'}) by (pod)"
}
]
}
]
"metrics": {
"supported": "true",
"config": [
{
"title": "Number of requests",
"type": "REQUEST_COUNT",
"queries": [
{
"title": "Number of successful incoming requests",
"query": "sum(increase(tgi_request_success{namespace=${NAMESPACE}, pod=~'${MODEL_NAME}-predictor-.*'}[${RATE_INTERVAL}]))"
},
{
"title": "Number of failed incoming requests",
"query": "sum(increase(tgi_request_failure{namespace=${NAMESPACE}, pod=~'${MODEL_NAME}-predictor-.*'}[${RATE_INTERVAL}]))"
}
]
},
{
"title": "Average response time (ms)",
"type": "MEAN_LATENCY",
"queries": [
{
"title": "Average inference latency",
"query": "sum by (pod) (rate(tgi_request_inference_duration_sum{namespace=${NAMESPACE}, pod=~'${MODEL_NAME}-predictor-.*'}[${RATE_INTERVAL}])) / sum by (pod) (rate(tgi_request_inference_duration_count{namespace=${NAMESPACE}, pod=~'${MODEL_NAME}-predictor-.*'}[${RATE_INTERVAL}])) "
},
{
"title": "Average e2e latency",
"query": "sum by (pod) (rate(tgi_request_duration_sum{namespace=${NAMESPACE}, pod=~'${MODEL_NAME}-predictor-.*'}[${RATE_INTERVAL}])) / sum by (pod) (rate(tgi_request_duration_count{namespace=${NAMESPACE}, pod=~'${MODEL_NAME}-predictor-.*'}[${RATE_INTERVAL}]))"
}
]
},
{
"title": "CPU utilization %",
"type": "CPU_USAGE",
"queries": [
{
"title": "CPU usage",
"query": "sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{namespace='${NAMESPACE}'}* on(namespace,pod) group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{namespace='${NAMESPACE}', workload=~'${MODEL_NAME}-predictor-.*', workload_type=~'deployment'}) by (pod)"
}
]
},
{
"title": "Memory utilization %",
"type": "MEMORY_USAGE",
"queries": [
{
"title": "Memory usage",
"query": "sum(container_memory_working_set_bytes{namespace='$(MODEL_NAMESPACE)', pod=~'${MODEL_NAME}-predictor-.*'}) by (pod)"
}
]
}
]
}
}
Loading

0 comments on commit b0b041c

Please sign in to comment.