From dea42aabf8ca7d9737cf109d1675f2ce55f11344 Mon Sep 17 00:00:00 2001 From: Avram Tudor Date: Tue, 18 Feb 2025 09:07:32 +0200 Subject: [PATCH] metrics: track job processor in duration metric (#154) Co-authored-by: Avram Tudor --- docs/env_vars.md | 2 +- docs/monitoring.md | 2 +- skynet/modules/monitoring.py | 2 +- skynet/modules/ttt/summaries/jobs.py | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/docs/env_vars.md b/docs/env_vars.md index ff051a66..4041e7ce 100644 --- a/docs/env_vars.md +++ b/docs/env_vars.md @@ -8,7 +8,7 @@ Skynet is configurable via environment variables. Some are shared by all modules |--------------------------------|-------------------------------------------------------------|-------------------------------------------|---------------------------------------------------------------------------------| | `ENABLED_MODULES` | Which modules should be enabled, separated by commas | `summaries:dispatcher,summaries:executor,assistant` | `summaries:dispatcher`, `summaries:executor`, `assistant`, `streaming_whisper` | | `BYPASS_AUTHORIZATION` | If signed JWT authorization should be enabled | `false` | `true`, `false` | -| `ENABLE_MONITORING` | If the Prometheus metrics endpoint should be enabled or not | `true` | `true`, `false` | +| `ENABLE_METRICS` | If the Prometheus metrics endpoint should be enabled or not | `true` | `true`, `false` | | `ASAP_PUB_KEYS_REPO_URL` | Public key repository URL | `NULL` | N/A | | `ASAP_PUB_KEYS_FOLDER` | Public key repository root path | `NULL` | N/A | | `ASAP_PUB_KEYS_AUDS` | Allowed JWT audiences, separated by commas | `NULL` | N/A | diff --git a/docs/monitoring.md b/docs/monitoring.md index cbd074cc..ed82d418 100644 --- a/docs/monitoring.md +++ b/docs/monitoring.md @@ -2,7 +2,7 @@ Skynet exposes a Prometheus `/metrics` endpoint on port `8001`. -The metrics endpoint can be disabled by setting the `ENABLE_MONITORING` env var to `false`. +The metrics endpoint can be disabled by setting the `ENABLE_METRICS` env var to `false`. ## Exposed metrics diff --git a/skynet/modules/monitoring.py b/skynet/modules/monitoring.py index f256a7a5..4a4ffa29 100644 --- a/skynet/modules/monitoring.py +++ b/skynet/modules/monitoring.py @@ -26,7 +26,7 @@ namespace=PROMETHEUS_NAMESPACE, subsystem=PROMETHEUS_SUMMARIES_SUBSYSTEM, buckets=[5**n for n in range(4)], - labelnames=['app_id'], + labelnames=['app_id', 'processor'], ) SUMMARY_FULL_DURATION_METRIC = Histogram( diff --git a/skynet/modules/ttt/summaries/jobs.py b/skynet/modules/ttt/summaries/jobs.py index 7dfd36e9..4a3249a9 100644 --- a/skynet/modules/ttt/summaries/jobs.py +++ b/skynet/modules/ttt/summaries/jobs.py @@ -149,7 +149,7 @@ async def update_done_job(job: Job, result: str, processor: Processors, has_fail await db.lrem(RUNNING_JOBS_KEY, 0, job.id) - SUMMARY_DURATION_METRIC.labels(updated_job.metadata.app_id).observe(updated_job.computed_duration) + SUMMARY_DURATION_METRIC.labels(updated_job.metadata.app_id, processor.value).observe(updated_job.computed_duration) SUMMARY_FULL_DURATION_METRIC.observe(updated_job.computed_full_duration) SUMMARY_INPUT_LENGTH_METRIC.observe(len(updated_job.payload.text))