From 6179cef8b902958b14e7f1ff3fd8483090ca59e1 Mon Sep 17 00:00:00 2001 From: Ketan Umare Date: Thu, 1 Apr 2021 20:44:41 -0700 Subject: [PATCH] updated Signed-off-by: Ketan Umare --- .../prometheus/flytepropeller-dashboard.json | 34 +++++++++--------- stats/flytepropeller_dashboard.py | 36 +++++++++---------- 2 files changed, 35 insertions(+), 35 deletions(-) diff --git a/deployment/stats/prometheus/flytepropeller-dashboard.json b/deployment/stats/prometheus/flytepropeller-dashboard.json index 872f164ff3..db3c930e00 100644 --- a/deployment/stats/prometheus/flytepropeller-dashboard.json +++ b/deployment/stats/prometheus/flytepropeller-dashboard.json @@ -189,7 +189,7 @@ "targets": [ { "datasource": "", - "expr": "sum(rate(flyte:propeller:all:round:abort_error[5m]))*300", + "expr": "sum(rate(flyte:propeller:all:round:abort_error[5m]))", "format": "time_series", "hide": false, "instant": false, @@ -295,7 +295,7 @@ "targets": [ { "datasource": "", - "expr": "sum(deriv(flyte:propeller:all:round:system_error_unlabeled[5m]))*300", + "expr": "sum(deriv(flyte:propeller:all:round:system_error_unlabeled[5m]))", "format": "time_series", "hide": false, "instant": false, @@ -2119,7 +2119,7 @@ "targets": [ { "datasource": "", - "expr": "sum(rate(flyte:propeller:all:node:perma_system_error_duration_unlabeled_ms_count[5m])) * 300", + "expr": "sum(rate(flyte:propeller:all:node:perma_system_error_duration_unlabeled_ms_count[5m]))", "format": "time_series", "hide": false, "instant": false, @@ -2133,7 +2133,7 @@ }, { "datasource": "", - "expr": "sum(rate(flyte:propeller:all:node:perma_user_error_duration_unlabeled_ms[5m])) * 300", + "expr": "sum(rate(flyte:propeller:all:node:perma_user_error_duration_unlabeled_ms[5m]))", "format": "time_series", "hide": false, "instant": false, @@ -2147,7 +2147,7 @@ }, { "datasource": "", - "expr": "sum(rate(flyte:propeller:all:node:perma_unknown_error_duration_unlabeled_ms[5m])) * 300", + "expr": "sum(rate(flyte:propeller:all:node:perma_unknown_error_duration_unlabeled_ms[5m]))", "format": "time_series", "hide": false, "instant": false, @@ -2369,7 +2369,7 @@ "targets": [ { "datasource": "", - "expr": "sum(rate(flyte:propeller:all:workflow:event_recording:success_duration_ms_count[5m])) by (wf) * 300", + "expr": "sum(rate(flyte:propeller:all:workflow:event_recording:success_duration_ms_count[5m])) by (wf)", "format": "time_series", "hide": false, "instant": false, @@ -2383,7 +2383,7 @@ }, { "datasource": "", - "expr": "sum(rate(flyte:propeller:all:workflow:event_recording:failure_duration_ms_count[5m])) by (wf) * 300", + "expr": "sum(rate(flyte:propeller:all:workflow:event_recording:failure_duration_ms_count[5m])) by (wf)", "format": "time_series", "hide": false, "instant": false, @@ -2595,7 +2595,7 @@ "targets": [ { "datasource": "", - "expr": "sum(rate(flyte:propeller:all:node:event_recording:success_duration_ms_count[5m])) by (wf) * 300", + "expr": "sum(rate(flyte:propeller:all:node:event_recording:success_duration_ms_count[5m])) by (wf)", "format": "time_series", "hide": false, "instant": false, @@ -2609,7 +2609,7 @@ }, { "datasource": "", - "expr": "sum(rate(flyte:propeller:all:node:event_recording:failure_duration_ms_count[5m])) by (wf) * 300", + "expr": "sum(rate(flyte:propeller:all:node:event_recording:failure_duration_ms_count[5m])) by (wf)", "format": "time_series", "hide": false, "instant": false, @@ -2821,7 +2821,7 @@ "targets": [ { "datasource": "", - "expr": "sum(rate(flyte:propeller:all:task:event_recording:success_duration_ms_count[5m])) by (wf) * 300", + "expr": "sum(rate(flyte:propeller:all:task:event_recording:success_duration_ms_count[5m])) by (wf)", "format": "time_series", "hide": false, "instant": false, @@ -2835,7 +2835,7 @@ }, { "datasource": "", - "expr": "sum(rate(flyte:propeller:all:task:event_recording:failure_duration_ms_count[5m])) by (wf) * 300", + "expr": "sum(rate(flyte:propeller:all:task:event_recording:failure_duration_ms_count[5m])) by (wf)", "format": "time_series", "hide": false, "instant": false, @@ -3047,7 +3047,7 @@ "targets": [ { "datasource": "", - "expr": "sum(rate(flyte:propeller:all:node:build_dynamic_workflow_us_count[5m])) by (wf) * 300", + "expr": "sum(rate(flyte:propeller:all:node:build_dynamic_workflow_us_count[5m])) by (wf)", "format": "time_series", "hide": false, "instant": false, @@ -3153,7 +3153,7 @@ "targets": [ { "datasource": "", - "expr": "sum(rate(flyte:propeller:all:admin_launcher:cache_hit[5m])) * 300", + "expr": "sum(rate(flyte:propeller:all:admin_launcher:cache_hit[5m]))", "format": "time_series", "hide": false, "instant": false, @@ -3167,7 +3167,7 @@ }, { "datasource": "", - "expr": "sum(rate(flyte:propeller:all:admin_launcher:cache_miss[5m])) * 300", + "expr": "sum(rate(flyte:propeller:all:admin_launcher:cache_miss[5m]))", "format": "time_series", "hide": false, "instant": false, @@ -3389,7 +3389,7 @@ "targets": [ { "datasource": "", - "expr": "sum(rate(flyte:propeller:all:wf_update_latency_ms_count[5m])) * 300", + "expr": "sum(rate(flyte:propeller:all:wf_update_latency_ms_count[5m]))", "format": "time_series", "hide": false, "instant": false, @@ -3495,7 +3495,7 @@ "targets": [ { "datasource": "", - "expr": "sum(rate(flyte:propeller:all:wf_update_conflict[5m])) * 300", + "expr": "sum(rate(flyte:propeller:all:wf_update_conflict[5m]))", "format": "time_series", "hide": false, "instant": false, @@ -3601,7 +3601,7 @@ "targets": [ { "datasource": "", - "expr": "sum(rate(flyte:propeller:all:wf_update_failed[5m])) * 300", + "expr": "sum(rate(flyte:propeller:all:wf_update_failed[5m]))", "format": "time_series", "hide": false, "instant": false, diff --git a/stats/flytepropeller_dashboard.py b/stats/flytepropeller_dashboard.py index 51140ad617..cf61868ae3 100644 --- a/stats/flytepropeller_dashboard.py +++ b/stats/flytepropeller_dashboard.py @@ -1,7 +1,7 @@ import typing from grafanalib.core import ( - Dashboard, Graph, + Dashboard, Graph, Gauge, Stat, OPS_FORMAT, Row, SHORT_FORMAT, single_y_axis, Target, YAxes, YAxis, MILLISECONDS_FORMAT, DataSourceInput, PERCENT_FORMAT, NO_FORMAT ) @@ -103,7 +103,7 @@ def system_errors() -> Graph: dataSource=DATASOURCE, targets=[ Target( - expr='sum(deriv(flyte:propeller:all:round:system_error_unlabeled[5m]))*300', + expr='sum(deriv(flyte:propeller:all:round:system_error_unlabeled[5m]))', refId='A', ), ], @@ -120,7 +120,7 @@ def abort_errors() -> Graph: dataSource=DATASOURCE, targets=[ Target( - expr='sum(rate(flyte:propeller:all:round:abort_error[5m]))*300', + expr='sum(rate(flyte:propeller:all:round:abort_error[5m]))', refId='A', ), ], @@ -351,13 +351,13 @@ def admin_launcher_cache() -> Graph: dataSource=DATASOURCE, targets=[ Target( - expr=f'sum(rate(flyte:propeller:all:admin_launcher:cache_hit[5m])) * 300', + expr=f'sum(rate(flyte:propeller:all:admin_launcher:cache_hit[5m]))', legendFormat="hit", refId='A', ), Target( - expr=f'sum(rate(flyte:propeller:all:admin_launcher:cache_miss[5m])) * 300', + expr=f'sum(rate(flyte:propeller:all:admin_launcher:cache_miss[5m]))', legendFormat="miss", refId='B', ), @@ -384,7 +384,7 @@ def dynamic_wf_build() -> typing.List[Graph]: dataSource=DATASOURCE, targets=[ Target( - expr=f'sum(rate(flyte:propeller:all:node:build_dynamic_workflow_us_count[5m])) by (wf) * 300', + expr=f'sum(rate(flyte:propeller:all:node:build_dynamic_workflow_us_count[5m])) by (wf)', refId='A', ), ], @@ -411,12 +411,12 @@ def task_event_recording() -> typing.List[Graph]: dataSource=DATASOURCE, targets=[ Target( - expr=f'sum(rate(flyte:propeller:all:task:event_recording:success_duration_ms_count[5m])) by (wf) * 300', + expr=f'sum(rate(flyte:propeller:all:task:event_recording:success_duration_ms_count[5m])) by (wf)', legendFormat="success wf", refId='A', ), Target( - expr=f'sum(rate(flyte:propeller:all:task:event_recording:failure_duration_ms_count[5m])) by (wf) * 300', + expr=f'sum(rate(flyte:propeller:all:task:event_recording:failure_duration_ms_count[5m])) by (wf)', legendFormat="failure", refId='A', ), @@ -444,12 +444,12 @@ def node_event_recording() -> typing.List[Graph]: dataSource=DATASOURCE, targets=[ Target( - expr=f'sum(rate(flyte:propeller:all:node:event_recording:success_duration_ms_count[5m])) by (wf) * 300', + expr=f'sum(rate(flyte:propeller:all:node:event_recording:success_duration_ms_count[5m])) by (wf)', legendFormat="success", refId='A', ), Target( - expr=f'sum(rate(flyte:propeller:all:node:event_recording:failure_duration_ms_count[5m])) by (wf) * 300', + expr=f'sum(rate(flyte:propeller:all:node:event_recording:failure_duration_ms_count[5m])) by (wf)', legendFormat="failure", refId='A', ), @@ -477,12 +477,12 @@ def wf_event_recording() -> typing.List[Graph]: dataSource=DATASOURCE, targets=[ Target( - expr=f'sum(rate(flyte:propeller:all:workflow:event_recording:success_duration_ms_count[5m])) by (wf) * 300', + expr=f'sum(rate(flyte:propeller:all:workflow:event_recording:success_duration_ms_count[5m])) by (wf)', legendFormat="success", refId='A', ), Target( - expr=f'sum(rate(flyte:propeller:all:workflow:event_recording:failure_duration_ms_count[5m])) by (wf) * 300', + expr=f'sum(rate(flyte:propeller:all:workflow:event_recording:failure_duration_ms_count[5m])) by (wf)', legendFormat="failure", refId='A', ), @@ -513,7 +513,7 @@ def wf_store_latency(collapse: bool) -> Row: dataSource=DATASOURCE, targets=[ Target( - expr=f'sum(rate(flyte:propeller:all:wf_update_latency_ms_count[5m])) * 300', + expr=f'sum(rate(flyte:propeller:all:wf_update_latency_ms_count[5m]))', refId='A', ), ], @@ -524,7 +524,7 @@ def wf_store_latency(collapse: bool) -> Row: dataSource=DATASOURCE, targets=[ Target( - expr=f'sum(rate(flyte:propeller:all:wf_update_conflict[5m])) * 300', + expr=f'sum(rate(flyte:propeller:all:wf_update_conflict[5m]))', refId='A', ), ], @@ -535,7 +535,7 @@ def wf_store_latency(collapse: bool) -> Row: dataSource=DATASOURCE, targets=[ Target( - expr=f'sum(rate(flyte:propeller:all:wf_update_failed[5m])) * 300', + expr=f'sum(rate(flyte:propeller:all:wf_update_failed[5m]))', refId='A', ), ], @@ -575,17 +575,17 @@ def node_errors() -> Graph: dataSource=DATASOURCE, targets=[ Target( - expr=f'sum(rate(flyte:propeller:all:node:perma_system_error_duration_unlabeled_ms_count[5m])) * 300', + expr=f'sum(rate(flyte:propeller:all:node:perma_system_error_duration_unlabeled_ms_count[5m]))', legendFormat="system error", refId='A', ), Target( - expr=f'sum(rate(flyte:propeller:all:node:perma_user_error_duration_unlabeled_ms[5m])) * 300', + expr=f'sum(rate(flyte:propeller:all:node:perma_user_error_duration_unlabeled_ms[5m]))', legendFormat="user error", refId='A', ), Target( - expr=f'sum(rate(flyte:propeller:all:node:perma_unknown_error_duration_unlabeled_ms[5m])) * 300', + expr=f'sum(rate(flyte:propeller:all:node:perma_unknown_error_duration_unlabeled_ms[5m]))', legendFormat="user error", refId='A', ),