diff --git a/python/kserve/README.md b/python/kserve/README.md index d30dc87e32d..a44f964f4ca 100644 --- a/python/kserve/README.md +++ b/python/kserve/README.md @@ -59,12 +59,12 @@ It supports the following storage providers: For latency metrics, send a request to `/metrics`. Prometheus latency histograms are emitted for each of the steps (pre/postprocessing, explain, predict). Additionally, the latencies of each step are logged per request. -| Metric Name | Description | Type | -|------------------------------------|--------------------------------|-----------| -| request_preprocessing_seconds | pre-processing request latency | Histogram | -| request_explain_processing_seconds | explain request latency | Histogram | -| request_predict_processing_seconds | prediction request latency | Histogram | -| request_postprocessing_seconds | pre-processing request latency | Histogram | +| Metric Name | Description | Type | +|-----------------------------------|--------------------------------|-----------| +| request_preprocess_seconds | pre-processing request latency | Histogram | +| request_explain_seconds | explain request latency | Histogram | +| request_predict_seconds | prediction request latency | Histogram | +| request_postprocess_seconds | pre-processing request latency | Histogram | ## KServe Client diff --git a/python/kserve/kserve/model.py b/python/kserve/kserve/model.py index 5cee8974383..fd0803ca66f 100644 --- a/python/kserve/kserve/model.py +++ b/python/kserve/kserve/model.py @@ -35,10 +35,10 @@ PREDICTOR_V2_URL_FORMAT = "http://{0}/v2/models/{1}/infer" EXPLAINER_V2_URL_FORMAT = "http://{0}/v2/models/{1}/explain" -PRE_HIST_TIME = Histogram('request_preprocessing_seconds', 'pre-processing request latency') -POST_HIST_TIME = Histogram('request_postprocessing_seconds', 'post-processing request latency') -PREDICT_HIST_TIME = Histogram('request_predict_processing_seconds', 'prediction request latency') -EXPLAIN_HIST_TIME = Histogram('request_explain_processing_seconds', 'explain request latency') +PRE_HIST_TIME = Histogram('request_preprocess_seconds', 'pre-process request latency') +POST_HIST_TIME = Histogram('request_postprocess_seconds', 'post-process request latency') +PREDICT_HIST_TIME = Histogram('request_predict_seconds', 'predict request latency') +EXPLAIN_HIST_TIME = Histogram('request_explain_seconds', 'explain request latency') class ModelType(Enum):