From 8bde0b602b9d1b98f3ff5561bf2dab85e3822cd0 Mon Sep 17 00:00:00 2001 From: Julia Date: Thu, 18 May 2023 20:28:41 +0200 Subject: [PATCH 01/12] Add draft dashboard for oncall metrics to provisioning plugin settings --- .../dashboards/oncall_metrics_dashboard.json | 1313 +++++++++++++++++ grafana-plugin/src/plugin.json | 6 + 2 files changed, 1319 insertions(+) create mode 100644 grafana-plugin/src/dashboards/oncall_metrics_dashboard.json diff --git a/grafana-plugin/src/dashboards/oncall_metrics_dashboard.json b/grafana-plugin/src/dashboards/oncall_metrics_dashboard.json new file mode 100644 index 0000000000..3e7f3f11c6 --- /dev/null +++ b/grafana-plugin/src/dashboards/oncall_metrics_dashboard.json @@ -0,0 +1,1313 @@ +{ + "annotations": { + "list": [ + { + "builtIn": 1, + "datasource": { + "type": "grafana", + "uid": "-- Grafana --" + }, + "enable": true, + "hide": true, + "iconColor": "rgba(0, 211, 255, 1)", + "name": "Annotations & Alerts", + "type": "dashboard" + } + ] + }, + "editable": false, + "fiscalYearStartMonth": 0, + "graphTooltip": 0, + "id": null, + "links": [], + "liveNow": false, + "panels": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "gridPos": { + "h": 2, + "w": 1, + "x": 0, + "y": 0 + }, + "id": 16, + "options": { + "code": { + "language": "plaintext", + "showLineNumbers": false, + "showMiniMap": false + }, + "content": "
\n \n
", + "mode": "html" + }, + "pluginVersion": "9.5.2", + "transparent": true, + "type": "text" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "description": "", + "gridPos": { + "h": 2, + "w": 23, + "x": 1, + "y": 0 + }, + "id": 17, + "options": { + "code": { + "language": "plaintext", + "showLineNumbers": false, + "showMiniMap": false + }, + "content": "# Grafana OnCall Insights", + "mode": "markdown" + }, + "pluginVersion": "9.5.2", + "transparent": true, + "type": "text" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "gridPos": { + "h": 2, + "w": 24, + "x": 0, + "y": 3 + }, + "id": 18, + "options": { + "code": { + "language": "plaintext", + "showLineNumbers": false, + "showMiniMap": false + }, + "content": "📣 This is a read-only dashboard. To make a copy, click \"Settings\" and \"Save as\".", + "mode": "markdown" + }, + "pluginVersion": "9.5.2", + "transparent": true, + "type": "text" + }, + { + "collapsed": false, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 5 + }, + "id": 19, + "panels": [], + "title": "Overview", + "type": "row" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "text", + "value": null + } + ] + }, + "unit": "none" + }, + "overrides": [] + }, + "gridPos": { + "h": 7, + "w": 5, + "x": 0, + "y": 6 + }, + "id": 4, + "options": { + "colorMode": "value", + "graphMode": "none", + "justifyMode": "center", + "orientation": "auto", + "reduceOptions": { + "calcs": ["lastNotNull"], + "fields": "", + "values": false + }, + "textMode": "auto" + }, + "pluginVersion": "9.5.2", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "disableTextWrap": false, + "editorMode": "builder", + "excludeNullMetadata": false, + "exemplar": false, + "expr": "sum($alert_groups_total{slug=~\"$instance\", team=~\"$team\", integration=~\"$integration\"})", + "format": "time_series", + "fullMetaSearch": false, + "instant": true, + "legendFormat": "__auto", + "range": false, + "refId": "A", + "useBackend": false + } + ], + "title": "Total alert groups", + "type": "stat" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + } + ] + } + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "firing" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "red", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "acknowledged" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "yellow", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "silenced" + }, + "properties": [ + { + "id": "color", + "value": { + "mode": "fixed" + } + } + ] + } + ] + }, + "gridPos": { + "h": 7, + "w": 7, + "x": 5, + "y": 6 + }, + "id": 1, + "options": { + "displayMode": "gradient", + "minVizHeight": 10, + "minVizWidth": 0, + "orientation": "horizontal", + "reduceOptions": { + "calcs": ["lastNotNull"], + "fields": "", + "values": false + }, + "showUnfilled": true, + "valueMode": "color" + }, + "pluginVersion": "9.5.2", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "disableTextWrap": false, + "editorMode": "builder", + "excludeNullMetadata": false, + "expr": "sum by(state) ($alert_groups_total{slug=~\"$instance\", team=~\"$team\", integration=~\"$integration\"})", + "fullMetaSearch": false, + "legendFormat": "__auto", + "range": true, + "refId": "A", + "useBackend": false + } + ], + "title": "Total alert groups by state", + "transformations": [ + { + "id": "joinByLabels", + "options": { + "value": "state" + } + }, + { + "id": "organize", + "options": { + "excludeByName": {}, + "indexByName": { + "acknowledged": 1, + "firing": 0, + "resolved": 2, + "silenced": 3 + }, + "renameByName": {} + } + } + ], + "type": "bargauge" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + } + }, + "mappings": [], + "unit": "none" + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "resolved" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "green", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "firing" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "red", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "acknowledged" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "yellow", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "silenced" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#b6b6ba", + "mode": "fixed" + } + } + ] + } + ] + }, + "gridPos": { + "h": 7, + "w": 7, + "x": 12, + "y": 6 + }, + "id": 13, + "options": { + "displayLabels": ["percent"], + "legend": { + "displayMode": "list", + "placement": "right", + "showLegend": true, + "values": [] + }, + "pieType": "pie", + "reduceOptions": { + "calcs": ["lastNotNull"], + "fields": "", + "values": false + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "pluginVersion": "9.5.2", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "editorMode": "code", + "expr": "sum by(state) ($alert_groups_total{slug=~\"$instance\", team=~\"$team\", integration=~\"$integration\"})", + "legendFormat": "__auto", + "range": true, + "refId": "A" + } + ], + "title": "Total alert groups by states %", + "transformations": [ + { + "id": "joinByLabels", + "options": { + "value": "state" + } + }, + { + "id": "organize", + "options": { + "excludeByName": { + "acknowledged": false, + "firing": false, + "resolved": false + }, + "indexByName": { + "acknowledged": 1, + "firing": 0, + "resolved": 2, + "silenced": 3 + }, + "renameByName": { + "firing": "" + } + } + } + ], + "type": "piechart" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "description": "Mean time between the start and first action of all alert groups for the last 7 days", + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "text", + "value": null + }, + { + "color": "text", + "value": 80.0001 + } + ] + }, + "unit": "s" + }, + "overrides": [] + }, + "gridPos": { + "h": 7, + "w": 5, + "x": 19, + "y": 6 + }, + "id": 14, + "options": { + "colorMode": "value", + "graphMode": "none", + "justifyMode": "center", + "orientation": "auto", + "reduceOptions": { + "calcs": ["lastNotNull"], + "fields": "", + "values": false + }, + "textMode": "auto" + }, + "pluginVersion": "9.5.2", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "editorMode": "code", + "exemplar": false, + "expr": "sum($alert_groups_response_time_seconds_sum{slug=~\"$instance\", team=~\"$team\", integration=~\"$integration\"}) / sum($alert_groups_response_time_seconds_count{slug=~\"$instance\", team=~\"$team\", integration=~\"$integration\"})", + "instant": true, + "legendFormat": "__auto", + "range": false, + "refId": "A" + } + ], + "title": "Mean time to respond (MTTR)", + "type": "stat" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 80, + "gradientMode": "opacity", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineStyle": { + "fill": "solid" + }, + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "normal" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "decimals": 0, + "displayName": "${__field.labels.integration}", + "mappings": [], + "noValue": "0", + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 24, + "x": 0, + "y": 13 + }, + "id": 2, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "right", + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "desc" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "disableTextWrap": false, + "editorMode": "code", + "excludeNullMetadata": false, + "exemplar": false, + "expr": "increase(sum by (integration) ($alert_groups_total{slug=~\"$instance\", team=~\"$team\", integration=~\"$integration\"})[1h:])", + "fullMetaSearch": false, + "instant": false, + "legendFormat": "__auto", + "range": true, + "refId": "A", + "useBackend": false + } + ], + "title": "New alert groups during time period", + "type": "timeseries" + }, + { + "collapsed": false, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 23 + }, + "id": 11, + "panels": [], + "title": "Integrations data", + "type": "row" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "custom": { + "align": "left", + "cellOptions": { + "mode": "gradient", + "type": "gauge", + "valueDisplayMode": "text" + }, + "filterable": false, + "inspect": false + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "blue", + "value": null + } + ] + } + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "Integration" + }, + "properties": [ + { + "id": "custom.cellOptions", + "value": { + "type": "auto" + } + }, + { + "id": "custom.width", + "value": 300 + } + ] + } + ] + }, + "gridPos": { + "h": 11, + "w": 12, + "x": 0, + "y": 24 + }, + "id": 20, + "options": { + "cellHeight": "sm", + "footer": { + "countRows": false, + "fields": "", + "reducer": ["sum"], + "show": false + }, + "showHeader": true, + "sortBy": [] + }, + "pluginVersion": "9.5.2", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "editorMode": "builder", + "exemplar": false, + "expr": "sum by(integration) ($alert_groups_total{slug=~\"$instance\", team=~\"$team\", integration=~\"$integration\"})", + "format": "table", + "instant": true, + "legendFormat": "__auto", + "range": false, + "refId": "A" + } + ], + "title": "Alert groups by Integration", + "transformations": [ + { + "id": "seriesToRows", + "options": {} + }, + { + "id": "organize", + "options": { + "excludeByName": { + "Time": true + }, + "indexByName": {}, + "renameByName": { + "Metric": "Integration", + "Value": "Alert groups", + "integration": "Integration" + } + } + } + ], + "type": "table" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "custom": { + "align": "left", + "cellOptions": { + "mode": "gradient", + "type": "gauge", + "valueDisplayMode": "text" + }, + "filterable": false, + "inspect": false + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 1200 + } + ] + }, + "unit": "s" + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "Integration" + }, + "properties": [ + { + "id": "custom.cellOptions", + "value": { + "type": "auto" + } + }, + { + "id": "custom.width", + "value": 300 + } + ] + } + ] + }, + "gridPos": { + "h": 11, + "w": 12, + "x": 12, + "y": 24 + }, + "id": 21, + "options": { + "cellHeight": "sm", + "footer": { + "countRows": false, + "fields": "", + "reducer": ["sum"], + "show": false + }, + "showHeader": true, + "sortBy": [] + }, + "pluginVersion": "9.5.2", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "editorMode": "code", + "exemplar": false, + "expr": "sort_desc($alert_groups_response_time_seconds_sum{slug=~\"$instance\", team=~\"$team\", integration=~\"$integration\"} / $alert_groups_response_time_seconds_count{slug=~\"$instance\", team=~\"$team\", integration=~\"$integration\"})", + "format": "table", + "instant": true, + "legendFormat": "__auto", + "range": false, + "refId": "A" + } + ], + "title": "Mean time to respond (MTTR) by Integration", + "transformations": [ + { + "id": "seriesToRows", + "options": {} + }, + { + "id": "organize", + "options": { + "excludeByName": { + "Time": true, + "cluster": true, + "id": true, + "instance": true, + "job": true, + "org_id": true, + "slug": true, + "team": true + }, + "indexByName": {}, + "renameByName": { + "Metric": "Integration", + "Value": "Alert groups", + "integration": "Integration" + } + } + } + ], + "type": "table" + }, + { + "collapsed": false, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 35 + }, + "id": 12, + "panels": [], + "title": "Users data", + "type": "row" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "custom": { + "align": "left", + "cellOptions": { + "mode": "gradient", + "type": "gauge", + "valueDisplayMode": "text" + }, + "filterable": false, + "inspect": false + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "blue", + "value": null + } + ] + } + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "Team" + }, + "properties": [ + { + "id": "custom.cellOptions", + "value": { + "type": "auto" + } + }, + { + "id": "custom.width", + "value": 300 + } + ] + } + ] + }, + "gridPos": { + "h": 11, + "w": 12, + "x": 0, + "y": 36 + }, + "id": 22, + "options": { + "cellHeight": "sm", + "footer": { + "countRows": false, + "fields": "", + "reducer": ["sum"], + "show": false + }, + "showHeader": true, + "sortBy": [] + }, + "pluginVersion": "9.5.2", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "editorMode": "code", + "exemplar": false, + "expr": "sum by(team) ($alert_groups_total{slug=~\"$instance\", team=~\"$team\", integration=~\"$integration\"})", + "format": "table", + "instant": true, + "legendFormat": "__auto", + "range": false, + "refId": "A" + } + ], + "title": "Alert groups by Team", + "transformations": [ + { + "id": "seriesToRows", + "options": {} + }, + { + "id": "organize", + "options": { + "excludeByName": { + "Time": true + }, + "indexByName": {}, + "renameByName": { + "Metric": "Integration", + "Value": "Alert groups", + "team": "Team" + } + } + } + ], + "type": "table" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "custom": { + "align": "left", + "cellOptions": { + "mode": "gradient", + "type": "gauge", + "valueDisplayMode": "text" + }, + "filterable": false, + "inspect": false + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + } + ] + }, + "unit": "s" + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "Team" + }, + "properties": [ + { + "id": "custom.cellOptions", + "value": { + "type": "auto" + } + }, + { + "id": "custom.width", + "value": 300 + } + ] + } + ] + }, + "gridPos": { + "h": 11, + "w": 12, + "x": 12, + "y": 36 + }, + "id": 23, + "options": { + "cellHeight": "sm", + "footer": { + "countRows": false, + "fields": "", + "reducer": ["sum"], + "show": false + }, + "showHeader": true, + "sortBy": [] + }, + "pluginVersion": "9.5.2", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "editorMode": "code", + "exemplar": false, + "expr": "sort_desc(sum by(team) ($alert_groups_response_time_seconds_sum{slug=~\"$instance\", team=~\"$team\", integration=~\"$integration\"}) / sum by(team) ($alert_groups_response_time_seconds_count{slug=~\"$instance\", team=~\"$team\", integration=~\"$integration\"}))", + "format": "table", + "instant": true, + "legendFormat": "__auto", + "range": false, + "refId": "A" + } + ], + "title": "Mean time to respond by Team (MTTR)", + "transformations": [ + { + "id": "seriesToRows", + "options": {} + }, + { + "id": "organize", + "options": { + "excludeByName": { + "Time": true + }, + "indexByName": {}, + "renameByName": { + "Metric": "Integration", + "Value": "Alert groups", + "team": "Team" + } + } + } + ], + "type": "table" + } + ], + "refresh": "", + "schemaVersion": 38, + "style": "dark", + "tags": [], + "templating": { + "list": [ + { + "hide": 0, + "includeAll": false, + "label": "Data source", + "multi": false, + "name": "datasource", + "options": [], + "query": "prometheus", + "queryValue": "", + "refresh": 1, + "regex": "", + "skipUrlSync": false, + "type": "datasource" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "definition": "metrics(alert_groups_total)", + "hide": 2, + "includeAll": false, + "label": "alert_groups_total", + "multi": false, + "name": "alert_groups_total", + "options": [], + "query": { + "query": "metrics(alert_groups_total)", + "refId": "PrometheusVariableQueryEditor-VariableQuery" + }, + "refresh": 1, + "regex": "", + "skipUrlSync": false, + "sort": 0, + "type": "query" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "definition": "metrics(alert_groups_response_time_seconds_count)", + "hide": 2, + "includeAll": false, + "label": "alert_groups_response_time_seconds_count", + "multi": false, + "name": "alert_groups_response_time_seconds_count", + "options": [], + "query": { + "query": "metrics(alert_groups_response_time_seconds_count)", + "refId": "PrometheusVariableQueryEditor-VariableQuery" + }, + "refresh": 1, + "regex": "", + "skipUrlSync": false, + "sort": 0, + "type": "query" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "definition": "metrics(alert_groups_response_time_seconds_sum)", + "hide": 2, + "includeAll": false, + "label": "alert_groups_response_time_seconds_sum", + "multi": false, + "name": "alert_groups_response_time_seconds_sum", + "options": [], + "query": { + "query": "metrics(alert_groups_response_time_seconds_sum)", + "refId": "PrometheusVariableQueryEditor-VariableQuery" + }, + "refresh": 1, + "regex": "", + "skipUrlSync": false, + "sort": 0, + "type": "query" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "definition": "metrics(alert_groups_response_time_seconds_bucket)", + "hide": 2, + "includeAll": false, + "label": "alert_groups_response_time_seconds_bucket", + "multi": false, + "name": "alert_groups_response_time_seconds_bucket", + "options": [], + "query": { + "query": "metrics(alert_groups_response_time_seconds_bucket)", + "refId": "PrometheusVariableQueryEditor-VariableQuery" + }, + "refresh": 1, + "regex": "", + "skipUrlSync": false, + "sort": 0, + "type": "query" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "definition": "label_values(${alert_groups_total},slug)", + "hide": 0, + "includeAll": true, + "multi": true, + "name": "instance", + "options": [], + "query": { + "query": "label_values(${alert_groups_total},slug)", + "refId": "PrometheusVariableQueryEditor-VariableQuery" + }, + "refresh": 1, + "regex": "", + "skipUrlSync": false, + "sort": 0, + "type": "query" + }, + { + "current": { + "selected": true, + "text": ["All"], + "value": ["$__all"] + }, + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "definition": "label_values(${alert_groups_total}{slug=~\"$instance\"},team)", + "hide": 0, + "includeAll": true, + "label": "Team", + "multi": true, + "name": "team", + "options": [], + "query": { + "query": "label_values(${alert_groups_total}{slug=~\"$instance\"},team)", + "refId": "PrometheusVariableQueryEditor-VariableQuery" + }, + "refresh": 2, + "regex": "", + "skipUrlSync": false, + "sort": 0, + "type": "query" + }, + { + "current": { + "selected": true, + "text": ["All"], + "value": ["$__all"] + }, + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "definition": "label_values(${alert_groups_total}{team=~\"$team\",slug=~\"$instance\"},integration)", + "hide": 0, + "includeAll": true, + "label": "Integration", + "multi": true, + "name": "integration", + "options": [], + "query": { + "query": "label_values(${alert_groups_total}{team=~\"$team\",slug=~\"$instance\"},integration)", + "refId": "PrometheusVariableQueryEditor-VariableQuery" + }, + "refresh": 2, + "regex": "", + "skipUrlSync": false, + "sort": 0, + "type": "query" + } + ] + }, + "time": { + "from": "now-7d", + "to": "now" + }, + "timepicker": { + "refresh_intervals": ["5s", "10s", "30s", "1m", "5m", "15m", "30m", "1h", "2h", "1d"], + "time_options": ["5m", "15m", "1h", "6h", "12h", "24h", "2d", "7d", "30d"] + }, + "timezone": "browser", + "title": "OnCall metrics", + "version": 1, + "weekStart": "" +} diff --git a/grafana-plugin/src/plugin.json b/grafana-plugin/src/plugin.json index a3a4c28799..5c5013b340 100644 --- a/grafana-plugin/src/plugin.json +++ b/grafana-plugin/src/plugin.json @@ -109,6 +109,12 @@ "role": "Viewer", "action": "grafana-oncall-app.other-settings:read", "addToNav": true + }, + { + "type": "dashboard", + "path": "dashboards/oncall_metrics_dashboard.json", + "addToNav": true, + "name": "OnCall metrics" } ], "routes": [ From 7e998354ff483e3a7e2147d2bdbf78904e3aa3b9 Mon Sep 17 00:00:00 2001 From: Julia Date: Tue, 6 Jun 2023 11:36:25 +0200 Subject: [PATCH 02/12] Update dashboard, fix expressions --- .../dashboards/oncall_metrics_dashboard.json | 20 ++++++++++--------- 1 file changed, 11 insertions(+), 9 deletions(-) diff --git a/grafana-plugin/src/dashboards/oncall_metrics_dashboard.json b/grafana-plugin/src/dashboards/oncall_metrics_dashboard.json index 3e7f3f11c6..003be717e5 100644 --- a/grafana-plugin/src/dashboards/oncall_metrics_dashboard.json +++ b/grafana-plugin/src/dashboards/oncall_metrics_dashboard.json @@ -166,7 +166,7 @@ "editorMode": "builder", "excludeNullMetadata": false, "exemplar": false, - "expr": "sum($alert_groups_total{slug=~\"$instance\", team=~\"$team\", integration=~\"$integration\"})", + "expr": "max_over_time(sum(avg without(pod, instance) ($alert_groups_total{slug=~\"$instance\", team=~\"$team\", integration=~\"$integration\"}))[$__range:])", "format": "time_series", "fullMetaSearch": false, "instant": true, @@ -480,10 +480,6 @@ { "color": "text", "value": null - }, - { - "color": "text", - "value": 80.0001 } ] }, @@ -575,7 +571,6 @@ "decimals": 0, "displayName": "${__field.labels.integration}", "mappings": [], - "noValue": "0", "thresholds": { "mode": "absolute", "steps": [ @@ -617,7 +612,7 @@ "editorMode": "code", "excludeNullMetadata": false, "exemplar": false, - "expr": "increase(sum by (integration) ($alert_groups_total{slug=~\"$instance\", team=~\"$team\", integration=~\"$integration\"})[1h:])", + "expr": "increase(max_over_time(sum by (integration) (avg without(pod, instance) ($alert_groups_total{slug=~\"$instance\", team=~\"$team\", integration=~\"$integration\"}))[1h:])[1h:])", "fullMetaSearch": false, "instant": false, "legendFormat": "__auto", @@ -784,7 +779,7 @@ }, { "color": "red", - "value": 1200 + "value": 5400 } ] }, @@ -863,7 +858,10 @@ "job": true, "org_id": true, "slug": true, - "team": true + "team": true, + "pod": true, + "container": true, + "namespace": true }, "indexByName": {}, "renameByName": { @@ -1028,6 +1026,10 @@ { "color": "green", "value": null + }, + { + "color": "red", + "value": 5400 } ] }, From e1714267329be8b71bc86e45b7ce16fe39a87f19 Mon Sep 17 00:00:00 2001 From: Julia Date: Wed, 7 Jun 2023 11:08:46 +0200 Subject: [PATCH 03/12] Add tag for dashboard --- grafana-plugin/src/dashboards/oncall_metrics_dashboard.json | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/grafana-plugin/src/dashboards/oncall_metrics_dashboard.json b/grafana-plugin/src/dashboards/oncall_metrics_dashboard.json index 003be717e5..a8359cf27f 100644 --- a/grafana-plugin/src/dashboards/oncall_metrics_dashboard.json +++ b/grafana-plugin/src/dashboards/oncall_metrics_dashboard.json @@ -116,7 +116,6 @@ "type": "prometheus", "uid": "${datasource}" }, - "description": "", "fieldConfig": { "defaults": { "color": { @@ -1118,7 +1117,7 @@ "refresh": "", "schemaVersion": 38, "style": "dark", - "tags": [], + "tags": ["oncall"], "templating": { "list": [ { From a3e7dd25f72b321f77abbb07e113b9e6877e53b6 Mon Sep 17 00:00:00 2001 From: Julia Date: Wed, 7 Jun 2023 11:16:35 +0200 Subject: [PATCH 04/12] Update dashboard (add new panels, update old panels) --- .../dashboards/oncall_metrics_dashboard.json | 442 +++++++++++------- 1 file changed, 284 insertions(+), 158 deletions(-) diff --git a/grafana-plugin/src/dashboards/oncall_metrics_dashboard.json b/grafana-plugin/src/dashboards/oncall_metrics_dashboard.json index a8359cf27f..aa3988770e 100644 --- a/grafana-plugin/src/dashboards/oncall_metrics_dashboard.json +++ b/grafana-plugin/src/dashboards/oncall_metrics_dashboard.json @@ -82,7 +82,7 @@ "h": 2, "w": 24, "x": 0, - "y": 3 + "y": 2 }, "id": 18, "options": { @@ -104,7 +104,7 @@ "h": 1, "w": 24, "x": 0, - "y": 5 + "y": 4 }, "id": 19, "panels": [], @@ -139,9 +139,9 @@ "h": 7, "w": 5, "x": 0, - "y": 6 + "y": 5 }, - "id": 4, + "id": 25, "options": { "colorMode": "value", "graphMode": "none", @@ -162,15 +162,15 @@ "uid": "${datasource}" }, "disableTextWrap": false, - "editorMode": "builder", + "editorMode": "code", "excludeNullMetadata": false, "exemplar": false, "expr": "max_over_time(sum(avg without(pod, instance) ($alert_groups_total{slug=~\"$instance\", team=~\"$team\", integration=~\"$integration\"}))[$__range:])", "format": "time_series", "fullMetaSearch": false, - "instant": true, + "instant": false, "legendFormat": "__auto", - "range": false, + "range": true, "refId": "A", "useBackend": false } @@ -224,7 +224,7 @@ { "id": "color", "value": { - "fixedColor": "yellow", + "fixedColor": "dark-yellow", "mode": "fixed" } } @@ -248,16 +248,16 @@ }, "gridPos": { "h": 7, - "w": 7, + "w": 6, "x": 5, - "y": 6 + "y": 5 }, "id": 1, "options": { "displayMode": "gradient", "minVizHeight": 10, "minVizWidth": 0, - "orientation": "horizontal", + "orientation": "vertical", "reduceOptions": { "calcs": ["lastNotNull"], "fields": "", @@ -274,9 +274,9 @@ "uid": "${datasource}" }, "disableTextWrap": false, - "editorMode": "builder", + "editorMode": "code", "excludeNullMetadata": false, - "expr": "sum by(state) ($alert_groups_total{slug=~\"$instance\", team=~\"$team\", integration=~\"$integration\"})", + "expr": "sum by (state) (avg without(pod, instance) ($alert_groups_total{slug=~\"$instance\", team=~\"$team\", integration=~\"$integration\"}))", "fullMetaSearch": false, "legendFormat": "__auto", "range": true, @@ -316,76 +316,101 @@ "fieldConfig": { "defaults": { "color": { - "mode": "palette-classic" - }, - "custom": { - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - } + "mode": "thresholds" }, + "decimals": 0, "mappings": [], - "unit": "none" - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "resolved" - }, - "properties": [ + "thresholds": { + "mode": "absolute", + "steps": [ { - "id": "color", - "value": { - "fixedColor": "green", - "mode": "fixed" - } + "color": "text", + "value": null } ] }, - { - "matcher": { - "id": "byName", - "options": "firing" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "red", - "mode": "fixed" - } - } - ] + "unit": "none" + }, + "overrides": [] + }, + "gridPos": { + "h": 7, + "w": 5, + "x": 11, + "y": 5 + }, + "id": 29, + "options": { + "colorMode": "value", + "graphMode": "none", + "justifyMode": "center", + "orientation": "auto", + "reduceOptions": { + "calcs": ["lastNotNull"], + "fields": "", + "values": false + }, + "textMode": "auto" + }, + "pluginVersion": "9.5.2", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" }, - { - "matcher": { - "id": "byName", - "options": "acknowledged" - }, - "properties": [ + "disableTextWrap": false, + "editorMode": "code", + "excludeNullMetadata": false, + "exemplar": false, + "expr": "increase(max_over_time(sum(avg without(pod, instance) ($alert_groups_total{slug=~\"$instance\", team=~\"$team\", integration=~\"$integration\"}))[$__range:])[$__range:])", + "format": "time_series", + "fullMetaSearch": false, + "includeNullMetadata": true, + "instant": true, + "legendFormat": "__auto", + "range": false, + "refId": "A", + "useBackend": false + } + ], + "timeTo": "5m", + "title": "Total new alert groups for period", + "type": "stat" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "description": "Mean time between the start and first action of all alert groups for the last 7 days", + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ { - "id": "color", - "value": { - "fixedColor": "yellow", - "mode": "fixed" - } + "color": "text", + "value": null } ] }, + "unit": "s" + }, + "overrides": [ { "matcher": { "id": "byName", - "options": "silenced" + "options": "sum(oncall_alert_groups_response_time_seconds_sum{slug=~\"ops\", team=~\"(OnCall Squad Production|Loki)\", integration=~\"(Alert Prod EU Healthcheck|Alert Prod Healthcheck|Amixr Prod AlertManager|Direct paging \\\\(OnCall Squad team\\\\)|Formatted Webhook ❤️👻🍺|Grafana ❤️🍎👻|GrafanaOnCall dev alerts|Loki Critical Webhook|Loki-Prod|Manual incidents \\\\(IRM team\\\\)|Support Escalations|Twilio Price Alerts|\\\\[DON\\\\'T DELETE\\\\] Alert Group Escalation Auditor Heartbeat)\"}) / sum(oncall_alert_groups_response_time_seconds_count{slug=~\"ops\", team=~\"(OnCall Squad Production|Loki)\", integration=~\"(Alert Prod EU Healthcheck|Alert Prod Healthcheck|Amixr Prod AlertManager|Direct paging \\\\(OnCall Squad team\\\\)|Formatted Webhook ❤️👻🍺|Grafana ❤️🍎👻|GrafanaOnCall dev alerts|Loki Critical Webhook|Loki-Prod|Manual incidents \\\\(IRM team\\\\)|Support Escalations|Twilio Price Alerts|\\\\[DON\\\\'T DELETE\\\\] Alert Group Escalation Auditor Heartbeat)\"})" }, "properties": [ { - "id": "color", - "value": { - "fixedColor": "#b6b6ba", - "mode": "fixed" - } + "id": "displayName", + "value": "MTTR" } ] } @@ -393,29 +418,23 @@ }, "gridPos": { "h": 7, - "w": 7, - "x": 12, - "y": 6 + "w": 4, + "x": 16, + "y": 5 }, - "id": 13, + "id": 14, "options": { - "displayLabels": ["percent"], - "legend": { - "displayMode": "list", - "placement": "right", - "showLegend": true, - "values": [] - }, - "pieType": "pie", + "colorMode": "value", + "graphMode": "none", + "justifyMode": "center", + "orientation": "auto", "reduceOptions": { "calcs": ["lastNotNull"], "fields": "", "values": false }, - "tooltip": { - "mode": "single", - "sort": "none" - } + "text": {}, + "textMode": "auto" }, "pluginVersion": "9.5.2", "targets": [ @@ -425,48 +444,23 @@ "uid": "${datasource}" }, "editorMode": "code", - "expr": "sum by(state) ($alert_groups_total{slug=~\"$instance\", team=~\"$team\", integration=~\"$integration\"})", + "exemplar": false, + "expr": "avg_over_time((sum($alert_groups_response_time_seconds_sum{slug=~\"$instance\", team=~\"$team\", integration=~\"$integration\"}) / sum($alert_groups_response_time_seconds_count{slug=~\"$instance\", team=~\"$team\", integration=~\"$integration\"}))[$__range:])", + "instant": true, "legendFormat": "__auto", - "range": true, + "range": false, "refId": "A" } ], - "title": "Total alert groups by states %", - "transformations": [ - { - "id": "joinByLabels", - "options": { - "value": "state" - } - }, - { - "id": "organize", - "options": { - "excludeByName": { - "acknowledged": false, - "firing": false, - "resolved": false - }, - "indexByName": { - "acknowledged": 1, - "firing": 0, - "resolved": 2, - "silenced": 3 - }, - "renameByName": { - "firing": "" - } - } - } - ], - "type": "piechart" + "title": "Mean time to respond (MTTR)", + "type": "stat" }, { "datasource": { "type": "prometheus", "uid": "${datasource}" }, - "description": "Mean time between the start and first action of all alert groups for the last 7 days", + "description": "", "fieldConfig": { "defaults": { "color": { @@ -477,29 +471,50 @@ "mode": "absolute", "steps": [ { - "color": "text", + "color": "blue", "value": null + }, + { + "color": "green", + "value": -10000000 + }, + { + "color": "super-light-yellow", + "value": 0 } ] }, "unit": "s" }, - "overrides": [] + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "avg(sum(oncall_alert_groups_response_time_seconds_sum{slug=~\"ops\", team=~\"(OnCall Squad Production|Loki)\", integration=~\"(Alert Prod EU Healthcheck|Alert Prod Healthcheck|Amixr Prod AlertManager|Direct paging \\\\(OnCall Squad team\\\\)|Formatted Webhook ❤️👻🍺|Grafana ❤️🍎👻|GrafanaOnCall dev alerts|Loki Critical Webhook|Loki-Prod|Manual incidents \\\\(IRM team\\\\)|Support Escalations|Twilio Price Alerts|\\\\[DON\\\\'T DELETE\\\\] Alert Group Escalation Auditor Heartbeat)\"}) / sum(oncall_alert_groups_response_time_seconds_count{slug=~\"ops\", team=~\"(OnCall Squad Production|Loki)\", integration=~\"(Alert Prod EU Healthcheck|Alert Prod Healthcheck|Amixr Prod AlertManager|Direct paging \\\\(OnCall Squad team\\\\)|Formatted Webhook ❤️👻🍺|Grafana ❤️🍎👻|GrafanaOnCall dev alerts|Loki Critical Webhook|Loki-Prod|Manual incidents \\\\(IRM team\\\\)|Support Escalations|Twilio Price Alerts|\\\\[DON\\\\'T DELETE\\\\] Alert Group Escalation Auditor Heartbeat)\"}))" + }, + "properties": [ + { + "id": "displayName", + "value": "MTTR" + } + ] + } + ] }, "gridPos": { "h": 7, - "w": 5, - "x": 19, - "y": 6 + "w": 4, + "x": 20, + "y": 5 }, - "id": 14, + "id": 32, "options": { "colorMode": "value", "graphMode": "none", "justifyMode": "center", "orientation": "auto", "reduceOptions": { - "calcs": ["lastNotNull"], + "calcs": ["diff"], "fields": "", "values": false }, @@ -514,14 +529,14 @@ }, "editorMode": "code", "exemplar": false, - "expr": "sum($alert_groups_response_time_seconds_sum{slug=~\"$instance\", team=~\"$team\", integration=~\"$integration\"}) / sum($alert_groups_response_time_seconds_count{slug=~\"$instance\", team=~\"$team\", integration=~\"$integration\"})", - "instant": true, + "expr": "avg(sum($alert_groups_response_time_seconds_sum{slug=~\"$instance\", team=~\"$team\", integration=~\"$integration\"}) / sum($alert_groups_response_time_seconds_count{slug=~\"$instance\", team=~\"$team\", integration=~\"$integration\"}))", + "instant": false, "legendFormat": "__auto", - "range": false, + "range": true, "refId": "A" } ], - "title": "Mean time to respond (MTTR)", + "title": "MTTR changed for period", "type": "stat" }, { @@ -586,14 +601,14 @@ "h": 10, "w": 24, "x": 0, - "y": 13 + "y": 12 }, - "id": 2, + "id": 24, "options": { "legend": { "calcs": [], "displayMode": "list", - "placement": "right", + "placement": "bottom", "showLegend": true }, "tooltip": { @@ -623,13 +638,124 @@ "title": "New alert groups during time period", "type": "timeseries" }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "fixedColor": "green", + "mode": "fixed", + "seriesBy": "min" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 54, + "gradientMode": "opacity", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineStyle": { + "fill": "solid" + }, + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": true, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "text", + "value": null + } + ] + }, + "unit": "s" + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "Value" + }, + "properties": [ + { + "id": "displayName", + "value": "MTTR" + } + ] + } + ] + }, + "gridPos": { + "h": 9, + "w": 24, + "x": 0, + "y": 22 + }, + "id": 34, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": false + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "pluginVersion": "9.5.2", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "editorMode": "code", + "exemplar": false, + "expr": "avg(sum($alert_groups_response_time_seconds_sum{slug=~\"$instance\", team=~\"$team\", integration=~\"$integration\"}) / sum($alert_groups_response_time_seconds_count{slug=~\"$instance\", team=~\"$team\", integration=~\"$integration\"}))", + "instant": false, + "legendFormat": "__auto", + "range": true, + "refId": "A" + } + ], + "title": "MTTR changed for period", + "type": "timeseries" + }, { "collapsed": false, "gridPos": { "h": 1, "w": 24, "x": 0, - "y": 23 + "y": 31 }, "id": 11, "panels": [], @@ -648,11 +774,11 @@ "mode": "thresholds" }, "custom": { - "align": "left", + "align": "auto", "cellOptions": { "mode": "gradient", "type": "gauge", - "valueDisplayMode": "text" + "valueDisplayMode": "color" }, "filterable": false, "inspect": false @@ -662,7 +788,7 @@ "mode": "absolute", "steps": [ { - "color": "blue", + "color": "green", "value": null } ] @@ -690,14 +816,14 @@ ] }, "gridPos": { - "h": 11, + "h": 23, "w": 12, "x": 0, - "y": 24 + "y": 32 }, "id": 20, "options": { - "cellHeight": "sm", + "cellHeight": "md", "footer": { "countRows": false, "fields": "", @@ -714,9 +840,9 @@ "type": "prometheus", "uid": "${datasource}" }, - "editorMode": "builder", + "editorMode": "code", "exemplar": false, - "expr": "sum by(integration) ($alert_groups_total{slug=~\"$instance\", team=~\"$team\", integration=~\"$integration\"})", + "expr": "sort_desc(max_over_time(sum by(integration) (avg without(pod, instance)($alert_groups_total{slug=~\"$instance\", team=~\"$team\", integration=~\"$integration\"}))[$__range:]))", "format": "table", "instant": true, "legendFormat": "__auto", @@ -759,7 +885,7 @@ "mode": "thresholds" }, "custom": { - "align": "left", + "align": "auto", "cellOptions": { "mode": "gradient", "type": "gauge", @@ -806,10 +932,10 @@ ] }, "gridPos": { - "h": 11, + "h": 23, "w": 12, "x": 12, - "y": 24 + "y": 32 }, "id": 21, "options": { @@ -832,7 +958,7 @@ }, "editorMode": "code", "exemplar": false, - "expr": "sort_desc($alert_groups_response_time_seconds_sum{slug=~\"$instance\", team=~\"$team\", integration=~\"$integration\"} / $alert_groups_response_time_seconds_count{slug=~\"$instance\", team=~\"$team\", integration=~\"$integration\"})", + "expr": "sort_desc(avg_over_time((sum by (integration)($alert_groups_response_time_seconds_sum{slug=~\"$instance\", team=~\"$team\", integration=~\"$integration\"}) / sum by (integration)($alert_groups_response_time_seconds_count{slug=~\"$instance\", team=~\"$team\", integration=~\"$integration\"}))[$__range:]))", "format": "table", "instant": true, "legendFormat": "__auto", @@ -852,20 +978,20 @@ "excludeByName": { "Time": true, "cluster": true, + "container": true, "id": true, "instance": true, "job": true, + "namespace": true, "org_id": true, - "slug": true, - "team": true, "pod": true, - "container": true, - "namespace": true + "slug": true, + "team": true }, "indexByName": {}, "renameByName": { "Metric": "Integration", - "Value": "Alert groups", + "Value": "MTTR", "integration": "Integration" } } @@ -879,7 +1005,7 @@ "h": 1, "w": 24, "x": 0, - "y": 35 + "y": 55 }, "id": 12, "panels": [], @@ -898,11 +1024,11 @@ "mode": "thresholds" }, "custom": { - "align": "left", + "align": "auto", "cellOptions": { - "mode": "gradient", + "mode": "basic", "type": "gauge", - "valueDisplayMode": "text" + "valueDisplayMode": "color" }, "filterable": false, "inspect": false @@ -912,7 +1038,7 @@ "mode": "absolute", "steps": [ { - "color": "blue", + "color": "green", "value": null } ] @@ -943,7 +1069,7 @@ "h": 11, "w": 12, "x": 0, - "y": 36 + "y": 56 }, "id": 22, "options": { @@ -966,7 +1092,7 @@ }, "editorMode": "code", "exemplar": false, - "expr": "sum by(team) ($alert_groups_total{slug=~\"$instance\", team=~\"$team\", integration=~\"$integration\"})", + "expr": "sort_desc(sum by(team) ($alert_groups_total{slug=~\"$instance\", team=~\"$team\", integration=~\"$integration\"}))", "format": "table", "instant": true, "legendFormat": "__auto", @@ -1006,7 +1132,7 @@ "fieldConfig": { "defaults": { "color": { - "mode": "thresholds" + "mode": "continuous-GrYlRd" }, "custom": { "align": "left", @@ -1059,7 +1185,7 @@ "h": 11, "w": 12, "x": 12, - "y": 36 + "y": 56 }, "id": 23, "options": { @@ -1082,7 +1208,7 @@ }, "editorMode": "code", "exemplar": false, - "expr": "sort_desc(sum by(team) ($alert_groups_response_time_seconds_sum{slug=~\"$instance\", team=~\"$team\", integration=~\"$integration\"}) / sum by(team) ($alert_groups_response_time_seconds_count{slug=~\"$instance\", team=~\"$team\", integration=~\"$integration\"}))", + "expr": "sort_desc(avg_over_time((sum by(team) ($alert_groups_response_time_seconds_sum{slug=~\"$instance\", team=~\"$team\", integration=~\"$integration\"}) / sum by(team)($alert_groups_response_time_seconds_count{slug=~\"$instance\", team=~\"$team\", integration=~\"$integration\"}))[$__range:]))", "format": "table", "instant": true, "legendFormat": "__auto", @@ -1105,7 +1231,7 @@ "indexByName": {}, "renameByName": { "Metric": "Integration", - "Value": "Alert groups", + "Value": "MTTR", "team": "Team" } } @@ -1114,7 +1240,7 @@ "type": "table" } ], - "refresh": "", + "refresh": false, "schemaVersion": 38, "style": "dark", "tags": ["oncall"], From 59f218cfc7153a8cbe2eff0d33ed937ac3a1b45e Mon Sep 17 00:00:00 2001 From: Julia Date: Wed, 7 Jun 2023 13:35:58 +0200 Subject: [PATCH 05/12] Add display name for dashboard var "instance" --- grafana-plugin/src/dashboards/oncall_metrics_dashboard.json | 1 + 1 file changed, 1 insertion(+) diff --git a/grafana-plugin/src/dashboards/oncall_metrics_dashboard.json b/grafana-plugin/src/dashboards/oncall_metrics_dashboard.json index aa3988770e..e9347a6044 100644 --- a/grafana-plugin/src/dashboards/oncall_metrics_dashboard.json +++ b/grafana-plugin/src/dashboards/oncall_metrics_dashboard.json @@ -1356,6 +1356,7 @@ "definition": "label_values(${alert_groups_total},slug)", "hide": 0, "includeAll": true, + "label": "Instance", "multi": true, "name": "instance", "options": [], From f2b1962ec4c1cc84e2c4972cd7e12f99c9b6926a Mon Sep 17 00:00:00 2001 From: Julia Date: Wed, 7 Jun 2023 13:56:15 +0200 Subject: [PATCH 06/12] Fix cell height for Alert Groups panel in dashboard --- .../src/dashboards/oncall_metrics_dashboard.json | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/grafana-plugin/src/dashboards/oncall_metrics_dashboard.json b/grafana-plugin/src/dashboards/oncall_metrics_dashboard.json index e9347a6044..fe1fbaf022 100644 --- a/grafana-plugin/src/dashboards/oncall_metrics_dashboard.json +++ b/grafana-plugin/src/dashboards/oncall_metrics_dashboard.json @@ -823,7 +823,7 @@ }, "id": 20, "options": { - "cellHeight": "md", + "cellHeight": "sm", "footer": { "countRows": false, "fields": "", @@ -1247,6 +1247,11 @@ "templating": { "list": [ { + "current": { + "selected": true, + "text": "grafanacloud-usage", + "value": "grafanacloud-usage" + }, "hide": 0, "includeAll": false, "label": "Data source", From a349c78d446db592b0a4456237ae094e39b5237f Mon Sep 17 00:00:00 2001 From: Julia Date: Thu, 8 Jun 2023 09:16:55 +0200 Subject: [PATCH 07/12] Update dashboard --- .../src/dashboards/oncall_metrics_dashboard.json | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/grafana-plugin/src/dashboards/oncall_metrics_dashboard.json b/grafana-plugin/src/dashboards/oncall_metrics_dashboard.json index fe1fbaf022..cee37a514a 100644 --- a/grafana-plugin/src/dashboards/oncall_metrics_dashboard.json +++ b/grafana-plugin/src/dashboards/oncall_metrics_dashboard.json @@ -375,7 +375,7 @@ } ], "timeTo": "5m", - "title": "Total new alert groups for period", + "title": "New alert groups for selected period", "type": "stat" }, { @@ -626,7 +626,7 @@ "editorMode": "code", "excludeNullMetadata": false, "exemplar": false, - "expr": "increase(max_over_time(sum by (integration) (avg without(pod, instance) ($alert_groups_total{slug=~\"$instance\", team=~\"$team\", integration=~\"$integration\"}))[1h:])[1h:])", + "expr": "increase(max_over_time(sum by (integration) (avg without(pod, instance) ($alert_groups_total{slug=~\"$instance\", team=~\"$team\", integration=~\"$integration\"}))[1h:])[1h:]) != 0", "fullMetaSearch": false, "instant": false, "legendFormat": "__auto", @@ -882,7 +882,7 @@ "fieldConfig": { "defaults": { "color": { - "mode": "thresholds" + "mode": "continuous-GrYlRd" }, "custom": { "align": "auto", @@ -1026,7 +1026,7 @@ "custom": { "align": "auto", "cellOptions": { - "mode": "basic", + "mode": "gradient", "type": "gauge", "valueDisplayMode": "color" }, From 2694304198a1e78cec78a6a305c2defab4b259ae Mon Sep 17 00:00:00 2001 From: Julia Date: Thu, 8 Jun 2023 12:14:41 +0200 Subject: [PATCH 08/12] Change dashboard version, fix expressions and overrides --- .../dashboards/oncall_metrics_dashboard.json | 46 +++++++++++++++---- 1 file changed, 36 insertions(+), 10 deletions(-) diff --git a/grafana-plugin/src/dashboards/oncall_metrics_dashboard.json b/grafana-plugin/src/dashboards/oncall_metrics_dashboard.json index cee37a514a..7a86149071 100644 --- a/grafana-plugin/src/dashboards/oncall_metrics_dashboard.json +++ b/grafana-plugin/src/dashboards/oncall_metrics_dashboard.json @@ -133,7 +133,20 @@ }, "unit": "none" }, - "overrides": [] + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "Value" + }, + "properties": [ + { + "id": "displayName", + "value": "Total alert groups" + } + ] + } + ] }, "gridPos": { "h": 7, @@ -165,7 +178,7 @@ "editorMode": "code", "excludeNullMetadata": false, "exemplar": false, - "expr": "max_over_time(sum(avg without(pod, instance) ($alert_groups_total{slug=~\"$instance\", team=~\"$team\", integration=~\"$integration\"}))[$__range:])", + "expr": "max_over_time(sum(avg without(pod, instance) ($alert_groups_total{slug=~\"$instance\", team=~\"$team\", integration=~\"$integration\"}))[1d:])", "format": "time_series", "fullMetaSearch": false, "instant": false, @@ -331,7 +344,20 @@ }, "unit": "none" }, - "overrides": [] + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "Value" + }, + "properties": [ + { + "id": "displayName", + "value": "New alert groups" + } + ] + } + ] }, "gridPos": { "h": 7, @@ -363,7 +389,7 @@ "editorMode": "code", "excludeNullMetadata": false, "exemplar": false, - "expr": "increase(max_over_time(sum(avg without(pod, instance) ($alert_groups_total{slug=~\"$instance\", team=~\"$team\", integration=~\"$integration\"}))[$__range:])[$__range:])", + "expr": "increase(max_over_time(sum(avg without(pod, instance) ($alert_groups_total{slug=~\"$instance\", team=~\"$team\", integration=~\"$integration\"}))[1d:])[$__range:])", "format": "time_series", "fullMetaSearch": false, "includeNullMetadata": true, @@ -405,7 +431,7 @@ { "matcher": { "id": "byName", - "options": "sum(oncall_alert_groups_response_time_seconds_sum{slug=~\"ops\", team=~\"(OnCall Squad Production|Loki)\", integration=~\"(Alert Prod EU Healthcheck|Alert Prod Healthcheck|Amixr Prod AlertManager|Direct paging \\\\(OnCall Squad team\\\\)|Formatted Webhook ❤️👻🍺|Grafana ❤️🍎👻|GrafanaOnCall dev alerts|Loki Critical Webhook|Loki-Prod|Manual incidents \\\\(IRM team\\\\)|Support Escalations|Twilio Price Alerts|\\\\[DON\\\\'T DELETE\\\\] Alert Group Escalation Auditor Heartbeat)\"}) / sum(oncall_alert_groups_response_time_seconds_count{slug=~\"ops\", team=~\"(OnCall Squad Production|Loki)\", integration=~\"(Alert Prod EU Healthcheck|Alert Prod Healthcheck|Amixr Prod AlertManager|Direct paging \\\\(OnCall Squad team\\\\)|Formatted Webhook ❤️👻🍺|Grafana ❤️🍎👻|GrafanaOnCall dev alerts|Loki Critical Webhook|Loki-Prod|Manual incidents \\\\(IRM team\\\\)|Support Escalations|Twilio Price Alerts|\\\\[DON\\\\'T DELETE\\\\] Alert Group Escalation Auditor Heartbeat)\"})" + "options": "Value" }, "properties": [ { @@ -490,7 +516,7 @@ { "matcher": { "id": "byName", - "options": "avg(sum(oncall_alert_groups_response_time_seconds_sum{slug=~\"ops\", team=~\"(OnCall Squad Production|Loki)\", integration=~\"(Alert Prod EU Healthcheck|Alert Prod Healthcheck|Amixr Prod AlertManager|Direct paging \\\\(OnCall Squad team\\\\)|Formatted Webhook ❤️👻🍺|Grafana ❤️🍎👻|GrafanaOnCall dev alerts|Loki Critical Webhook|Loki-Prod|Manual incidents \\\\(IRM team\\\\)|Support Escalations|Twilio Price Alerts|\\\\[DON\\\\'T DELETE\\\\] Alert Group Escalation Auditor Heartbeat)\"}) / sum(oncall_alert_groups_response_time_seconds_count{slug=~\"ops\", team=~\"(OnCall Squad Production|Loki)\", integration=~\"(Alert Prod EU Healthcheck|Alert Prod Healthcheck|Amixr Prod AlertManager|Direct paging \\\\(OnCall Squad team\\\\)|Formatted Webhook ❤️👻🍺|Grafana ❤️🍎👻|GrafanaOnCall dev alerts|Loki Critical Webhook|Loki-Prod|Manual incidents \\\\(IRM team\\\\)|Support Escalations|Twilio Price Alerts|\\\\[DON\\\\'T DELETE\\\\] Alert Group Escalation Auditor Heartbeat)\"}))" + "options": "Value" }, "properties": [ { @@ -626,7 +652,7 @@ "editorMode": "code", "excludeNullMetadata": false, "exemplar": false, - "expr": "increase(max_over_time(sum by (integration) (avg without(pod, instance) ($alert_groups_total{slug=~\"$instance\", team=~\"$team\", integration=~\"$integration\"}))[1h:])[1h:]) != 0", + "expr": "increase(max_over_time(sum by (integration) (avg without(pod, instance) ($alert_groups_total{slug=~\"$instance\", team=~\"$team\", integration=~\"$integration\"}))[1h:])[1h:]) > 0", "fullMetaSearch": false, "instant": false, "legendFormat": "__auto", @@ -842,7 +868,7 @@ }, "editorMode": "code", "exemplar": false, - "expr": "sort_desc(max_over_time(sum by(integration) (avg without(pod, instance)($alert_groups_total{slug=~\"$instance\", team=~\"$team\", integration=~\"$integration\"}))[$__range:]))", + "expr": "sort_desc(max_over_time(sum by(integration) (avg without(pod, instance)($alert_groups_total{slug=~\"$instance\", team=~\"$team\", integration=~\"$integration\"}))[1d:]))", "format": "table", "instant": true, "legendFormat": "__auto", @@ -1092,7 +1118,7 @@ }, "editorMode": "code", "exemplar": false, - "expr": "sort_desc(sum by(team) ($alert_groups_total{slug=~\"$instance\", team=~\"$team\", integration=~\"$integration\"}))", + "expr": "sort_desc(max_over_time(sum by(team) (avg without(pod, instance)($alert_groups_total{slug=~\"$instance\", team=~\"$team\", integration=~\"$integration\"}))[1d:]))", "format": "table", "instant": true, "legendFormat": "__auto", @@ -1441,6 +1467,6 @@ }, "timezone": "browser", "title": "OnCall metrics", - "version": 1, + "version": 0, "weekStart": "" } From e72421d60239a386a106409df2e95dec0a896753 Mon Sep 17 00:00:00 2001 From: Julia Date: Thu, 8 Jun 2023 13:07:52 +0200 Subject: [PATCH 09/12] Fix expressions and overrides --- .../dashboards/oncall_metrics_dashboard.json | 25 +++++++++++++++++-- 1 file changed, 23 insertions(+), 2 deletions(-) diff --git a/grafana-plugin/src/dashboards/oncall_metrics_dashboard.json b/grafana-plugin/src/dashboards/oncall_metrics_dashboard.json index 7a86149071..754a63ef50 100644 --- a/grafana-plugin/src/dashboards/oncall_metrics_dashboard.json +++ b/grafana-plugin/src/dashboards/oncall_metrics_dashboard.json @@ -621,7 +621,28 @@ ] } }, - "overrides": [] + "overrides": [ + { + "matcher": { + "id": "byValue", + "options": { + "op": "gte", + "reducer": "allIsZero", + "value": 0 + } + }, + "properties": [ + { + "id": "custom.hideFrom", + "value": { + "legend": true, + "tooltip": true, + "viz": true + } + } + ] + } + ] }, "gridPos": { "h": 10, @@ -652,7 +673,7 @@ "editorMode": "code", "excludeNullMetadata": false, "exemplar": false, - "expr": "increase(max_over_time(sum by (integration) (avg without(pod, instance) ($alert_groups_total{slug=~\"$instance\", team=~\"$team\", integration=~\"$integration\"}))[1h:])[1h:]) > 0", + "expr": "increase(max_over_time(sum by (integration) (avg without(pod, instance) ($alert_groups_total{slug=~\"$instance\", team=~\"$team\", integration=~\"$integration\"}))[1h:])[1h:])", "fullMetaSearch": false, "instant": false, "legendFormat": "__auto", From 235246c5034333cf9ef7c1a084b77c100f9e9700 Mon Sep 17 00:00:00 2001 From: Julia Date: Wed, 21 Jun 2023 10:14:27 +0200 Subject: [PATCH 10/12] Update CHANGELOG.md --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 28adf7ce47..561b7a1cd4 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -15,6 +15,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - Enable schedule related profile settings oncall [1508](https://github.com/grafana/oncall/issues/1508) - Highlight user shifts oncall [1509](https://github.com/grafana/oncall/issues/1509) - Rename or Description for Schedules Rotations [1460](https://github.com/grafana/oncall/issues/1406) +- Add dashboard for OnCall metrics ## Changed From 5040d394a58d9a3d93104fd0148a02f7fae580c3 Mon Sep 17 00:00:00 2001 From: Yulya Artyukhina Date: Wed, 21 Jun 2023 11:58:08 +0200 Subject: [PATCH 11/12] Change dashboard title Co-authored-by: Joey Orlando --- grafana-plugin/src/plugin.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/grafana-plugin/src/plugin.json b/grafana-plugin/src/plugin.json index 5c5013b340..efc1196d5c 100644 --- a/grafana-plugin/src/plugin.json +++ b/grafana-plugin/src/plugin.json @@ -114,7 +114,7 @@ "type": "dashboard", "path": "dashboards/oncall_metrics_dashboard.json", "addToNav": true, - "name": "OnCall metrics" + "name": "OnCall Metrics" } ], "routes": [ From 5347adee5dd5b49d49550ac4878af14baae5d66b Mon Sep 17 00:00:00 2001 From: Yulya Artyukhina Date: Wed, 21 Jun 2023 11:58:18 +0200 Subject: [PATCH 12/12] Change dashboard title Co-authored-by: Joey Orlando --- grafana-plugin/src/dashboards/oncall_metrics_dashboard.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/grafana-plugin/src/dashboards/oncall_metrics_dashboard.json b/grafana-plugin/src/dashboards/oncall_metrics_dashboard.json index 754a63ef50..d5e7b0b009 100644 --- a/grafana-plugin/src/dashboards/oncall_metrics_dashboard.json +++ b/grafana-plugin/src/dashboards/oncall_metrics_dashboard.json @@ -1487,7 +1487,7 @@ "time_options": ["5m", "15m", "1h", "6h", "12h", "24h", "2d", "7d", "30d"] }, "timezone": "browser", - "title": "OnCall metrics", + "title": "OnCall Metrics", "version": 0, "weekStart": "" }