From cf5f69022f3a208389f0c1331d716f495c276b5c Mon Sep 17 00:00:00 2001 From: Owen Diehl Date: Fri, 8 Oct 2021 14:59:09 -0400 Subject: [PATCH] Revert "loki-mixin: use centralized configuration for dashboard matchers / selectors (#4279)" (#4441) This reverts commit 1a4be0923ea9d36ca37bb4b3aa896503d8944208. --- production/loki-mixin/config.libsonnet | 15 -- .../dashboards/dashboard-utils.libsonnet | 10 +- .../dashboards/loki-chunks.libsonnet | 238 ++++++++++------ .../dashboards/loki-deletion.libsonnet | 17 +- .../dashboards/loki-reads-resources.libsonnet | 12 +- .../dashboards/loki-reads.libsonnet | 253 ++++++++++++------ .../dashboards/loki-retention.libsonnet | 10 +- .../loki-writes-resources.libsonnet | 8 +- .../dashboards/loki-writes.libsonnet | 218 ++++++++++----- production/loki-mixin/jsonnetfile.json | 44 +-- production/loki-mixin/jsonnetfile.lock.json | 26 -- 11 files changed, 538 insertions(+), 313 deletions(-) delete mode 100644 production/loki-mixin/jsonnetfile.lock.json diff --git a/production/loki-mixin/config.libsonnet b/production/loki-mixin/config.libsonnet index f0add47364901..220325c803062 100644 --- a/production/loki-mixin/config.libsonnet +++ b/production/loki-mixin/config.libsonnet @@ -3,25 +3,10 @@ // Tags for dashboards. tags: ['loki'], - singleBinary: false, - // The label used to differentiate between different application instances (i.e. 'pod' in a kubernetes install). per_instance_label: 'pod', // The label used to differentiate between different nodes (i.e. servers). per_node_label: 'instance', - - // These are used by the dashboards and allow for the simultaneous display of - // microservice and single binary loki clusters. - job_names: { - gateway: '(gateway|loki-gw|loki-gw-internal)', - query_frontend: '(query-frontend.*|loki$)', // Match also custom query-frontend deployments. - querier: '(querier.*|loki$)', // Match also custom querier deployments. - ingester: '(ingester.*|loki$)', // Match also custom and per-zone ingester deployments. - distributor: '(distributor.*|loki$)', - index_gateway: '(index-gateway.*|querier.*|loki$)', - ruler: '(ruler|loki$)', - compactor: 'compactor.*', // Match also custom compactor deployments. - }, }, } diff --git a/production/loki-mixin/dashboards/dashboard-utils.libsonnet b/production/loki-mixin/dashboards/dashboard-utils.libsonnet index 5bab90592a120..00dd12cd4f2ff 100644 --- a/production/loki-mixin/dashboards/dashboard-utils.libsonnet +++ b/production/loki-mixin/dashboards/dashboard-utils.libsonnet @@ -47,20 +47,15 @@ local utils = import 'mixin-utils/utils.libsonnet'; d.addMultiTemplate('cluster', 'loki_build_info', 'cluster') .addMultiTemplate('namespace', 'loki_build_info', 'namespace') else - d.addMultiTemplate('cluster', 'loki_build_info', 'cluster'), + d.addTemplate('cluster', 'loki_build_info', 'cluster') + .addTemplate('namespace', 'loki_build_info', 'namespace'), }, - jobSelector(job):: - if $._config.singleBinary - then [utils.selector.noop('cluster'), utils.selector.re('job', '$job')] - else [utils.selector.re('cluster', '$cluster'), utils.selector.re('job', '($namespace)/%s' % job)], - jobMatcher(job):: 'cluster=~"$cluster", job=~"($namespace)/%s"' % job, namespaceMatcher():: 'cluster=~"$cluster", namespace=~"$namespace"', - logPanel(title, selector, datasource='$logs'):: { title: title, type: 'logs', @@ -121,7 +116,6 @@ local utils = import 'mixin-utils/utils.libsonnet'; }, datasource: '$datasource', }, - containerCPUUsagePanel(title, containerName):: $.panel(title) + $.queryPanel([ diff --git a/production/loki-mixin/dashboards/loki-chunks.libsonnet b/production/loki-mixin/dashboards/loki-chunks.libsonnet index 38ead1c14c2aa..d0c9e512febbd 100644 --- a/production/loki-mixin/dashboards/loki-chunks.libsonnet +++ b/production/loki-mixin/dashboards/loki-chunks.libsonnet @@ -1,88 +1,172 @@ local utils = import 'mixin-utils/utils.libsonnet'; -{ +(import 'dashboard-utils.libsonnet') { grafanaDashboards+: { - 'loki-chunks.json': - ($.dashboard('Loki / Chunks')) - .addClusterSelectorTemplates(false) - .addRow( - $.row('Active Series / Chunks') - .addPanel( - $.panel('Series') + - $.queryPanel('sum(loki_ingester_memory_chunks{%s})' % $.jobMatcher($._config.job_names.ingester), 'series'), - ) - .addPanel( - $.panel('Chunks per series') + - $.queryPanel( - 'sum(loki_ingester_memory_chunks{%s}) / sum(loki_ingester_memory_streams{job=~"%s"})' % [ - $.jobMatcher($._config.job_names.ingester), - $.jobMatcher($._config.job_names.ingester), - ], - 'chunks' - ), - ) + local dashboards = self, + + 'loki-chunks.json':{ + local cfg = self, + + showMultiCluster:: true, + clusterLabel:: 'cluster', + clusterMatchers:: + if cfg.showMultiCluster then + [utils.selector.re(cfg.clusterLabel, '$cluster')] + else + [], + + namespaceType:: 'query', + namespaceQuery:: + if cfg.showMultiCluster then + 'kube_pod_container_info{cluster="$cluster", image=~".*loki.*"}' + else + 'kube_pod_container_info{image=~".*loki.*"}', + + assert (cfg.namespaceType == 'custom' || cfg.namespaceType == 'query') : "Only types 'query' and 'custom' are allowed for dashboard variable 'namespace'", + + matchers:: { + ingester: [utils.selector.re('job', '($namespace)/ingester')], + }, + + local selector(matcherId) = + std.join(',', ['%(label)s%(op)s"%(value)s"' % matcher for matcher in (cfg.clusterMatchers + cfg.matchers[matcherId])]), + + ingesterSelector:: selector('ingester'), + ingesterSelectorOnly:: + std.join(',', ['%(label)s%(op)s"%(value)s"' % matcher for matcher in cfg.matchers.ingester]), + + templateLabels:: ( + if cfg.showMultiCluster then [ + { + variable:: 'cluster', + label:: cfg.clusterLabel, + query:: 'kube_pod_container_info{image=~".*loki.*"}', + type:: 'query' + }, + ] else [] + ) + [ + { + variable:: 'namespace', + label:: 'namespace', + query:: cfg.namespaceQuery, + type:: cfg.namespaceType + }, + ], + } + + $.dashboard('Loki / Chunks') + .addClusterSelectorTemplates(false) + .addRow( + $.row('Active Series / Chunks') + .addPanel( + $.panel('Series') + + $.queryPanel('sum(loki_ingester_memory_chunks{%s})' % dashboards['loki-chunks.json'].ingesterSelector, 'series'), ) - .addRow( - $.row('Flush Stats') - .addPanel( - $.panel('Utilization') + - $.latencyPanel('loki_ingester_chunk_utilization', '{%s}' % $.jobMatcher($._config.job_names.ingester), multiplier='1') + - { yaxes: $.yaxes('percentunit') }, - ) - .addPanel( - $.panel('Age') + - $.latencyPanel('loki_ingester_chunk_age_seconds', '{%s}' % $.jobMatcher($._config.job_names.ingester)), + .addPanel( + $.panel('Chunks per series') + + $.queryPanel( + 'sum(loki_ingester_memory_chunks{%s}) / sum(loki_ingester_memory_streams{%s})' % [ + dashboards['loki-chunks.json'].ingesterSelector, + dashboards['loki-chunks.json'].ingesterSelectorOnly, + ], + 'chunks' ), ) - .addRow( - $.row('Flush Stats') - .addPanel( - $.panel('Size') + - $.latencyPanel('loki_ingester_chunk_entries', '{%s}' % $.jobMatcher($._config.job_names.ingester), multiplier='1') + - { yaxes: $.yaxes('short') }, - ) - .addPanel( - $.panel('Entries') + - $.queryPanel( - 'sum(rate(loki_chunk_store_index_entries_per_chunk_sum{%s}[5m])) / sum(rate(loki_chunk_store_index_entries_per_chunk_count{%s}[5m]))' % [ - $.jobMatcher($._config.job_names.ingester), - $.jobMatcher($._config.job_names.ingester), - ], - 'entries' - ), - ), + ) + .addRow( + $.row('Flush Stats') + .addPanel( + $.panel('Utilization') + + $.latencyPanel('loki_ingester_chunk_utilization', '{%s}' % dashboards['loki-chunks.json'].ingesterSelector, multiplier='1') + + { yaxes: $.yaxes('percentunit') }, + ) + .addPanel( + $.panel('Age') + + $.latencyPanel('loki_ingester_chunk_age_seconds', '{%s}' % dashboards['loki-chunks.json'].ingesterSelector), + ), + ) + .addRow( + $.row('Flush Stats') + .addPanel( + $.panel('Size') + + $.latencyPanel('loki_ingester_chunk_entries', '{%s}' % dashboards['loki-chunks.json'].ingesterSelector, multiplier='1') + + { yaxes: $.yaxes('short') }, ) - .addRow( - $.row('Flush Stats') - .addPanel( - $.panel('Queue Length') + - $.queryPanel('cortex_ingester_flush_queue_length{%s}' % $.jobMatcher($._config.job_names.ingester), '{{pod}}'), - ) - .addPanel( - $.panel('Flush Rate') + - $.qpsPanel('loki_ingester_chunk_age_seconds_count{%s}' % $.jobMatcher($._config.job_names.ingester),), + .addPanel( + $.panel('Entries') + + $.queryPanel( + 'sum(rate(loki_chunk_store_index_entries_per_chunk_sum{%s}[5m])) / sum(rate(loki_chunk_store_index_entries_per_chunk_count{%s}[5m]))' % [ + dashboards['loki-chunks.json'].ingesterSelector, + dashboards['loki-chunks.json'].ingesterSelector, + ], + 'entries' ), + ), + ) + .addRow( + $.row('Flush Stats') + .addPanel( + $.panel('Queue Length') + + $.queryPanel('cortex_ingester_flush_queue_length{%s}' % dashboards['loki-chunks.json'].ingesterSelector, '{{pod}}'), ) - .addRow( - $.row('Duration') - .addPanel( - $.panel('Chunk Duration hours (end-start)') + - $.queryPanel( - [ - 'histogram_quantile(0.5, sum(rate(loki_ingester_chunk_bounds_hours_bucket{%s}[5m])) by (le))' % $.jobMatcher($._config.job_names.ingester), - 'histogram_quantile(0.99, sum(rate(loki_ingester_chunk_bounds_hours_bucket{%s}[5m])) by (le))' % $.jobMatcher($._config.job_names.ingester), - 'sum(rate(loki_ingester_chunk_bounds_hours_sum{%s}[5m])) / sum(rate(loki_ingester_chunk_bounds_hours_count{%s}[5m]))' % [ - $.jobMatcher($._config.job_names.ingester), - $.jobMatcher($._config.job_names.ingester), - ], - ], - [ - 'p50', - 'p99', - 'avg', - ], - ), - ) + .addPanel( + $.panel('Flush Rate') + + $.qpsPanel('loki_ingester_chunk_age_seconds_count{%s}' % dashboards['loki-chunks.json'].ingesterSelector,), ), - }, + ) + .addRow( + $.row('Duration') + .addPanel( + $.panel('Chunk Duration hours (end-start)') + + $.queryPanel( + [ + 'histogram_quantile(0.5, sum(rate(loki_ingester_chunk_bounds_hours_bucket{%s}[5m])) by (le))' % dashboards['loki-chunks.json'].ingesterSelector, + 'histogram_quantile(0.99, sum(rate(loki_ingester_chunk_bounds_hours_bucket{%s}[5m])) by (le))' % dashboards['loki-chunks.json'].ingesterSelector, + 'sum(rate(loki_ingester_chunk_bounds_hours_sum{%s}[5m])) / sum(rate(loki_ingester_chunk_bounds_hours_count{%s}[5m]))' % [ + dashboards['loki-chunks.json'].ingesterSelector, + dashboards['loki-chunks.json'].ingesterSelector, + ], + ], + [ + 'p50', + 'p99', + 'avg', + ], + ), + ) + ){ + templating+: { + list+: [ + { + allValue: null, + current: + if l.type == 'custom' then { + text: l.query, + value: l.query, + } else {}, + datasource: '$datasource', + hide: 0, + includeAll: false, + label: l.variable, + multi: false, + name: l.variable, + options: [], + query: + if l.type == 'query' then + 'label_values(%s, %s)' % [l.query, l.label] + else + l.query, + refresh: 1, + regex: '', + sort: 2, + tagValuesQuery: '', + tags: [], + tagsQuery: '', + type: l.type, + useTags: false, + } + for l in dashboards['loki-chunks.json'].templateLabels + ], + }, + }, + } } diff --git a/production/loki-mixin/dashboards/loki-deletion.libsonnet b/production/loki-mixin/dashboards/loki-deletion.libsonnet index 5da136b8511f9..4f9ebb39f9c42 100644 --- a/production/loki-mixin/dashboards/loki-deletion.libsonnet +++ b/production/loki-mixin/dashboards/loki-deletion.libsonnet @@ -1,3 +1,4 @@ +local g = import 'grafana-builder/grafana.libsonnet'; local utils = import 'mixin-utils/utils.libsonnet'; (import 'dashboard-utils.libsonnet') { @@ -22,20 +23,20 @@ local utils = import 'mixin-utils/utils.libsonnet'; ) ) .addRow( - $.row('Churn') + g.row('Churn') .addPanel( - $.panel('Delete Requests Received / Day') + - $.queryPanel('sum(increase(loki_compactor_delete_requests_received_total{%s}[1d]))' % $.namespaceMatcher(), 'received'), + g.panel('Delete Requests Received / Day') + + g.queryPanel('sum(increase(loki_compactor_delete_requests_received_total{%s}[1d]))' % $.namespaceMatcher(), 'received'), ) .addPanel( - $.panel('Delete Requests Processed / Day') + - $.queryPanel('sum(increase(loki_compactor_delete_requests_processed_total{%s}[1d]))' % $.namespaceMatcher(), 'processed'), + g.panel('Delete Requests Processed / Day') + + g.queryPanel('sum(increase(loki_compactor_delete_requests_processed_total{%s}[1d]))' % $.namespaceMatcher(), 'processed'), ) ).addRow( - $.row('Failures') + g.row('Failures') .addPanel( - $.panel('Failures in Loading Delete Requests / Hour') + - $.queryPanel('sum(increase(loki_compactor_load_pending_requests_attempts_total{status="fail", %s}[1h]))' % $.namespaceMatcher(), 'failures'), + g.panel('Failures in Loading Delete Requests / Hour') + + g.queryPanel('sum(increase(loki_compactor_load_pending_requests_attempts_total{status="fail", %s}[1h]))' % $.namespaceMatcher(), 'failures'), ) ), }, diff --git a/production/loki-mixin/dashboards/loki-reads-resources.libsonnet b/production/loki-mixin/dashboards/loki-reads-resources.libsonnet index 65891a82bb625..0541a02d9adef 100644 --- a/production/loki-mixin/dashboards/loki-reads-resources.libsonnet +++ b/production/loki-mixin/dashboards/loki-reads-resources.libsonnet @@ -15,7 +15,7 @@ local utils = import 'mixin-utils/utils.libsonnet'; $.containerMemoryWorkingSetPanel('Memory (workingset)', 'cortex-gw'), ) .addPanel( - $.goHeapInUsePanel('Memory (go heap inuse)', $._config.job_names.gateway), + $.goHeapInUsePanel('Memory (go heap inuse)', 'cortex-gw'), ) ) .addRow( @@ -27,7 +27,7 @@ local utils = import 'mixin-utils/utils.libsonnet'; $.containerMemoryWorkingSetPanel('Memory (workingset)', 'query-frontend'), ) .addPanel( - $.goHeapInUsePanel('Memory (go heap inuse)', $._config.job_names.query_frontend), + $.goHeapInUsePanel('Memory (go heap inuse)', 'query-frontend'), ) ) .addRow( @@ -39,7 +39,7 @@ local utils = import 'mixin-utils/utils.libsonnet'; $.containerMemoryWorkingSetPanel('Memory (workingset)', 'querier'), ) .addPanel( - $.goHeapInUsePanel('Memory (go heap inuse)', $._config.job_names.querier), + $.goHeapInUsePanel('Memory (go heap inuse)', 'querier'), ) ) .addRow( @@ -77,7 +77,7 @@ local utils = import 'mixin-utils/utils.libsonnet'; $.containerMemoryWorkingSetPanel('Memory (workingset)', 'index-gateway'), ) .addPanel( - $.goHeapInUsePanel('Memory (go heap inuse)', $._config.job_names.index_gateway), + $.goHeapInUsePanel('Memory (go heap inuse)', 'index-gateway'), ) ) .addRow( @@ -115,7 +115,7 @@ local utils = import 'mixin-utils/utils.libsonnet'; $.containerMemoryWorkingSetPanel('Memory (workingset)', 'ingester'), ) .addPanel( - $.goHeapInUsePanel('Memory (go heap inuse)', $._config.job_names.ingester), + $.goHeapInUsePanel('Memory (go heap inuse)', 'ingester'), ) ) .addRow( @@ -137,7 +137,7 @@ local utils = import 'mixin-utils/utils.libsonnet'; $.containerMemoryWorkingSetPanel('Memory (workingset)', 'ruler'), ) .addPanel( - $.goHeapInUsePanel('Memory (go heap inuse)', $._config.job_names.ruler), + $.goHeapInUsePanel('Memory (go heap inuse)', 'ruler'), ) ), }, diff --git a/production/loki-mixin/dashboards/loki-reads.libsonnet b/production/loki-mixin/dashboards/loki-reads.libsonnet index d3639819d9d93..68c8d2abbc774 100644 --- a/production/loki-mixin/dashboards/loki-reads.libsonnet +++ b/production/loki-mixin/dashboards/loki-reads.libsonnet @@ -1,99 +1,194 @@ local utils = import 'mixin-utils/utils.libsonnet'; -{ +(import 'dashboard-utils.libsonnet') { grafanaDashboards+: { local dashboards = self, local http_routes = 'loki_api_v1_series|api_prom_series|api_prom_query|api_prom_label|api_prom_label_name_values|loki_api_v1_query|loki_api_v1_query_range|loki_api_v1_labels|loki_api_v1_label_name_values', local grpc_routes = '/logproto.Querier/Query|/logproto.Querier/Label|/logproto.Querier/Series|/logproto.Querier/QuerySample|/logproto.Querier/GetChunkIDs', - 'loki-reads.json': - ($.dashboard('Loki / Reads')) - .addClusterSelectorTemplates() - .addRow( - $.row('Frontend (cortex_gw)') - .addPanel( - $.panel('QPS') + - $.qpsPanel('loki_request_duration_seconds_count{%s, route=~"%s"}' % [$.jobMatcher($._config.job_names.gateway), http_routes]) - ) - .addPanel( - $.panel('Latency') + - utils.latencyRecordingRulePanel( - 'loki_request_duration_seconds', - $.jobSelector($._config.job_names.gateway) + [utils.selector.re('route', http_routes)], - sum_by=['route'] - ) - ) + 'loki-reads.json': { + local cfg = self, + + showMultiCluster:: true, + clusterLabel:: 'cluster', + clusterMatchers:: + if cfg.showMultiCluster then + [utils.selector.re(cfg.clusterLabel, '$cluster')] + else + [], + + namespaceType:: 'query', + namespaceQuery:: + if cfg.showMultiCluster then + 'kube_pod_container_info{cluster="$cluster", image=~".*loki.*"}' + else + 'kube_pod_container_info{image=~".*loki.*"}', + + assert (cfg.namespaceType == 'custom' || cfg.namespaceType == 'query') : "Only types 'query' and 'custom' are allowed for dashboard variable 'namespace'", + + matchers:: { + cortexgateway: [utils.selector.re('job', '($namespace)/cortex-gw')], + queryFrontend: [utils.selector.re('job', '($namespace)/query-frontend')], + querier: [utils.selector.re('job', '($namespace)/querier')], + ingester: [utils.selector.re('job', '($namespace)/ingester')], + querierOrIndexGateway: [utils.selector.re('job', '($namespace)/(querier|index-gateway)')], + }, + + local selector(matcherId) = + local ms = (cfg.clusterMatchers + cfg.matchers[matcherId]); + if std.length(ms) > 0 then + std.join(',', ['%(label)s%(op)s"%(value)s"' % matcher for matcher in ms]) + ',' + else '', + + cortexGwSelector:: selector('cortexgateway'), + queryFrontendSelector:: selector('queryFrontend'), + querierSelector:: selector('querier'), + ingesterSelector:: selector('ingester'), + querierOrIndexGatewaySelector:: selector('querierOrIndexGateway'), + + templateLabels:: ( + if cfg.showMultiCluster then [ + { + variable:: 'cluster', + label:: cfg.clusterLabel, + query:: 'kube_pod_container_info{image=~".*loki.*"}', + type:: 'query', + }, + ] else [] + ) + [ + { + variable:: 'namespace', + label:: 'namespace', + query:: cfg.namespaceQuery, + type:: cfg.namespaceType, + }, + ], + } + + $.dashboard('Loki / Reads') + .addClusterSelectorTemplates(false) + .addRow( + $.row('Frontend (cortex_gw)') + .addPanel( + $.panel('QPS') + + $.qpsPanel('loki_request_duration_seconds_count{%s route=~"%s"}' % [dashboards['loki-reads.json'].cortexGwSelector, http_routes]) ) - .addRow( - $.row('Frontend (query-frontend)') - .addPanel( - $.panel('QPS') + - $.qpsPanel('loki_request_duration_seconds_count{%s, route=~"%s"}' % [$.jobMatcher($._config.job_names.query_frontend), http_routes]) - ) - .addPanel( - $.panel('Latency') + - utils.latencyRecordingRulePanel( - 'loki_request_duration_seconds', - $.jobSelector($._config.job_names.query_frontend) + [utils.selector.re('route', http_routes)], - sum_by=['route'] - ) + .addPanel( + $.panel('Latency') + + utils.latencyRecordingRulePanel( + 'loki_request_duration_seconds', + dashboards['loki-reads.json'].matchers.cortexgateway + [utils.selector.re('route', http_routes)], + extra_selectors=dashboards['loki-reads.json'].clusterMatchers, + sum_by=['route'] ) ) - .addRow( - $.row('Querier') - .addPanel( - $.panel('QPS') + - $.qpsPanel('loki_request_duration_seconds_count{%s, route=~"%s"}' % [$.jobMatcher($._config.job_names.querier), http_routes]) - ) - .addPanel( - $.panel('Latency') + - utils.latencyRecordingRulePanel( - 'loki_request_duration_seconds', - $.jobSelector($._config.job_names.querier) + [utils.selector.re('route', http_routes)], - sum_by=['route'] - ) - ) + ) + .addRow( + $.row('Frontend (query-frontend)') + .addPanel( + $.panel('QPS') + + $.qpsPanel('loki_request_duration_seconds_count{%s route=~"%s"}' % [dashboards['loki-reads.json'].queryFrontendSelector, http_routes]) ) - .addRow( - $.row('Ingester') - .addPanel( - $.panel('QPS') + - $.qpsPanel('loki_request_duration_seconds_count{%s, route=~"%s"}' % [$.jobMatcher($._config.job_names.ingester), grpc_routes]) - ) - .addPanel( - $.panel('Latency') + - utils.latencyRecordingRulePanel( - 'loki_request_duration_seconds', - $.jobSelector($._config.job_names.ingester) + [utils.selector.re('route', grpc_routes)], - sum_by=['route'] - ) + .addPanel( + $.panel('Latency') + + utils.latencyRecordingRulePanel( + 'loki_request_duration_seconds', + dashboards['loki-reads.json'].matchers.queryFrontend + [utils.selector.re('route', http_routes)], + extra_selectors=dashboards['loki-reads.json'].clusterMatchers, + sum_by=['route'] ) ) - .addRow( - $.row('BigTable') - .addPanel( - $.panel('QPS') + - $.qpsPanel('cortex_bigtable_request_duration_seconds_count{%s, operation="/google.bigtable.v2.Bigtable/ReadRows"}' % $.jobMatcher($._config.job_names.querier)) - ) - .addPanel( - $.panel('Latency') + - utils.latencyRecordingRulePanel( - 'cortex_bigtable_request_duration_seconds', - $.jobSelector($._config.job_names.querier) + [utils.selector.eq('operation', '/google.bigtable.v2.Bigtable/ReadRows')] - ) + ) + .addRow( + $.row('Querier') + .addPanel( + $.panel('QPS') + + $.qpsPanel('loki_request_duration_seconds_count{%s route=~"%s"}' % [dashboards['loki-reads.json'].querierSelector, http_routes]) + ) + .addPanel( + $.panel('Latency') + + utils.latencyRecordingRulePanel( + 'loki_request_duration_seconds', + dashboards['loki-reads.json'].matchers.querier + [utils.selector.re('route', http_routes)], + extra_selectors=dashboards['loki-reads.json'].clusterMatchers, + sum_by=['route'] ) ) - .addRow( - $.row('BoltDB Shipper') - .addPanel( - $.panel('QPS') + - $.qpsPanel('loki_boltdb_shipper_request_duration_seconds_count{%s, operation="QUERY"}' % $.jobMatcher($._config.job_names.index_gateway)) + ) + .addRow( + $.row('Ingester') + .addPanel( + $.panel('QPS') + + $.qpsPanel('loki_request_duration_seconds_count{%s route=~"%s"}' % [dashboards['loki-reads.json'].ingesterSelector, grpc_routes]) + ) + .addPanel( + $.panel('Latency') + + utils.latencyRecordingRulePanel( + 'loki_request_duration_seconds', + dashboards['loki-reads.json'].matchers.ingester + [utils.selector.re('route', grpc_routes)], + extra_selectors=dashboards['loki-reads.json'].clusterMatchers, + sum_by=['route'] ) - .addPanel( - $.panel('Latency') + - $.latencyPanel('loki_boltdb_shipper_request_duration_seconds', '{%s, operation="QUERY"}' % $.jobMatcher($._config.job_names.index_gateway)) + ) + ) + .addRow( + $.row('BigTable') + .addPanel( + $.panel('QPS') + + $.qpsPanel('cortex_bigtable_request_duration_seconds_count{%s operation="/google.bigtable.v2.Bigtable/ReadRows"}' % dashboards['loki-reads.json'].querierSelector) + ) + .addPanel( + $.panel('Latency') + + utils.latencyRecordingRulePanel( + 'cortex_bigtable_request_duration_seconds', + dashboards['loki-reads.json'].matchers.querier + [utils.selector.eq('operation', '/google.bigtable.v2.Bigtable/ReadRows')] ) - ), + ) + ) + .addRow( + $.row('BoltDB Shipper') + .addPanel( + $.panel('QPS') + + $.qpsPanel('loki_boltdb_shipper_request_duration_seconds_count{%s operation="QUERY"}' % dashboards['loki-reads.json'].querierOrIndexGatewaySelector) + ) + .addPanel( + $.panel('Latency') + + $.latencyPanel('loki_boltdb_shipper_request_duration_seconds', '{%s operation="QUERY"}' % dashboards['loki-reads.json'].querierOrIndexGatewaySelector) + ) + ){ + templating+: { + list+: [ + { + allValue: null, + current: + if l.type == 'custom' then { + text: l.query, + value: l.query, + } else {}, + datasource: '$datasource', + hide: 0, + includeAll: false, + label: l.variable, + multi: false, + name: l.variable, + options: [], + query: + if l.type == 'query' then + 'label_values(%s, %s)' % [l.query, l.label] + else + l.query, + refresh: 1, + regex: '', + sort: 2, + tagValuesQuery: '', + tags: [], + tagsQuery: '', + type: l.type, + useTags: false, + } + for l in dashboards['loki-reads.json'].templateLabels + ], + }, + }, }, } diff --git a/production/loki-mixin/dashboards/loki-retention.libsonnet b/production/loki-mixin/dashboards/loki-retention.libsonnet index 731c144f19dea..31dd0eff9183f 100644 --- a/production/loki-mixin/dashboards/loki-retention.libsonnet +++ b/production/loki-mixin/dashboards/loki-retention.libsonnet @@ -8,15 +8,15 @@ local utils = import 'mixin-utils/utils.libsonnet'; .addClusterSelectorTemplates(false) .addLog() .addRow( - $.row('Resource Usage') + $.row('Ressource Usage') .addPanel( - $.containerCPUUsagePanel('CPU', $._config.job_names.compactor), + $.containerCPUUsagePanel('CPU', 'compactor'), ) .addPanel( - $.containerMemoryWorkingSetPanel('Memory (workingset)', $._config.job_names.compactor), + $.containerMemoryWorkingSetPanel('Memory (workingset)', 'compactor'), ) .addPanel( - $.goHeapInUsePanel('Memory (go heap inuse)', $._config.job_names.compactor), + $.goHeapInUsePanel('Memory (go heap inuse)', 'compactor'), ) ) @@ -92,7 +92,7 @@ local utils = import 'mixin-utils/utils.libsonnet'; .addRow( $.row('Logs') .addPanel( - $.logPanel('Compactor Logs', '{%s}' % $.jobMatcher($._config.job_names.compactor)), + $.logPanel('Compactor Logs', '{container="compactor", %s}' % $.namespaceMatcher()), ) ), }, diff --git a/production/loki-mixin/dashboards/loki-writes-resources.libsonnet b/production/loki-mixin/dashboards/loki-writes-resources.libsonnet index 53dfdd68ee705..19c888f6e9e45 100644 --- a/production/loki-mixin/dashboards/loki-writes-resources.libsonnet +++ b/production/loki-mixin/dashboards/loki-writes-resources.libsonnet @@ -15,7 +15,7 @@ local utils = import 'mixin-utils/utils.libsonnet'; $.containerMemoryWorkingSetPanel('Memory (workingset)', 'cortex-gw'), ) .addPanel( - $.goHeapInUsePanel('Memory (go heap inuse)', $._config.job_names.gateway), + $.goHeapInUsePanel('Memory (go heap inuse)', 'cortex-gw'), ) ) .addRow( @@ -27,7 +27,7 @@ local utils = import 'mixin-utils/utils.libsonnet'; $.containerMemoryWorkingSetPanel('Memory (workingset)', 'distributor'), ) .addPanel( - $.goHeapInUsePanel('Memory (go heap inuse)', $._config.job_names.distributor), + $.goHeapInUsePanel('Memory (go heap inuse)', 'distributor'), ) ) .addRow( @@ -35,7 +35,7 @@ local utils = import 'mixin-utils/utils.libsonnet'; .addPanel( $.panel('In-memory streams') + $.queryPanel( - 'sum by(%s) (loki_ingester_memory_streams{%s})' % [$._config.per_instance_label, $.jobMatcher($._config.job_names.ingester)], + 'sum by(%s) (loki_ingester_memory_streams{%s})' % [$._config.per_instance_label, $.jobMatcher('ingester')], '{{%s}}' % $._config.per_instance_label ) + { @@ -52,7 +52,7 @@ local utils = import 'mixin-utils/utils.libsonnet'; $.containerMemoryWorkingSetPanel('Memory (workingset)', 'ingester'), ) .addPanel( - $.goHeapInUsePanel('Memory (go heap inuse)', $._config.job_names.ingester), + $.goHeapInUsePanel('Memory (go heap inuse)', 'ingester'), ) ) .addRow( diff --git a/production/loki-mixin/dashboards/loki-writes.libsonnet b/production/loki-mixin/dashboards/loki-writes.libsonnet index cecbf16567bf8..235c679d06383 100644 --- a/production/loki-mixin/dashboards/loki-writes.libsonnet +++ b/production/loki-mixin/dashboards/loki-writes.libsonnet @@ -1,76 +1,168 @@ local utils = import 'mixin-utils/utils.libsonnet'; -{ +(import 'dashboard-utils.libsonnet') { grafanaDashboards+: { - 'loki-writes.json': - ($.dashboard('Loki / Writes')) - .addClusterSelectorTemplates() - .addRow( - $.row('Frontend (cortex_gw)') - .addPanel( - $.panel('QPS') + - $.qpsPanel('loki_request_duration_seconds_count{%s, route=~"api_prom_push|loki_api_v1_push"}' % $.jobMatcher($._config.job_names.gateway)) - ) - .addPanel( - $.panel('Latency') + - utils.latencyRecordingRulePanel( - 'loki_request_duration_seconds', - $.jobSelector($._config.job_names.gateway) + [utils.selector.re('route', 'api_prom_push|loki_api_v1_push')], - ) - ) + local dashboards = self, + + 'loki-writes.json': { + local cfg = self, + + showMultiCluster:: true, + clusterLabel:: 'cluster', + clusterMatchers:: + if cfg.showMultiCluster then + [utils.selector.re(cfg.clusterLabel, '$cluster')] + else + [], + + namespaceType:: 'query', + namespaceQuery:: + if cfg.showMultiCluster then + 'kube_pod_container_info{cluster="$cluster", image=~".*loki.*"}' + else + 'kube_pod_container_info{image=~".*loki.*"}', + + assert (cfg.namespaceType == 'custom' || cfg.namespaceType == 'query') : "Only types 'query' and 'custom' are allowed for dashboard variable 'namespace'", + + matchers:: { + cortexgateway: [utils.selector.re('job', '($namespace)/cortex-gw')], + distributor: [utils.selector.re('job', '($namespace)/distributor')], + ingester: [utils.selector.re('job', '($namespace)/ingester')], + }, + + local selector(matcherId) = + local ms = cfg.clusterMatchers + cfg.matchers[matcherId]; + if std.length(ms) > 0 then + std.join(',', ['%(label)s%(op)s"%(value)s"' % matcher for matcher in ms]) + ',' + else '', + + cortexGwSelector:: selector('cortexgateway'), + distributorSelector:: selector('distributor'), + ingesterSelector:: selector('ingester'), + + templateLabels:: ( + if cfg.showMultiCluster then [ + { + variable:: 'cluster', + label:: cfg.clusterLabel, + query:: 'kube_pod_container_info{image=~".*loki.*"}', + type:: 'query', + }, + ] else [] + ) + [ + { + variable:: 'namespace', + label:: 'namespace', + query:: cfg.namespaceQuery, + type:: cfg.namespaceType, + }, + ], + } + + $.dashboard('Loki / Writes') + .addClusterSelectorTemplates(false) + .addRow( + $.row('Frontend (cortex_gw)') + .addPanel( + $.panel('QPS') + + $.qpsPanel('loki_request_duration_seconds_count{%s route=~"api_prom_push|loki_api_v1_push"}' % dashboards['loki-writes.json'].cortexGwSelector) ) - .addRow( - $.row('Distributor') - .addPanel( - $.panel('QPS') + - $.qpsPanel('loki_request_duration_seconds_count{%s}' % std.rstripChars($.jobMatcher($._config.job_names.distributor), ',')) - ) - .addPanel( - $.panel('Latency') + - utils.latencyRecordingRulePanel( - 'loki_request_duration_seconds', - $.jobSelector($._config.job_names.distributor) - ) + .addPanel( + $.panel('Latency') + + utils.latencyRecordingRulePanel( + 'loki_request_duration_seconds', + dashboards['loki-writes.json'].matchers.cortexgateway + [utils.selector.re('route', 'api_prom_push|loki_api_v1_push')], + extra_selectors=dashboards['loki-writes.json'].clusterMatchers ) ) - .addRow( - $.row('Ingester') - .addPanel( - $.panel('QPS') + - $.qpsPanel('loki_request_duration_seconds_count{%s, route="/logproto.Pusher/Push"}' % $.jobMatcher($._config.job_names.ingester)) - ) - .addPanel( - $.panel('Latency') + - utils.latencyRecordingRulePanel( - 'loki_request_duration_seconds', - $.jobSelector($._config.job_names.ingester) + [utils.selector.eq('route', '/logproto.Pusher/Push')], - ) - ) + ) + .addRow( + $.row('Distributor') + .addPanel( + $.panel('QPS') + + $.qpsPanel('loki_request_duration_seconds_count{%s}' % std.rstripChars(dashboards['loki-writes.json'].distributorSelector, ',')) ) - .addRow( - $.row('BigTable') - .addPanel( - $.panel('QPS') + - $.qpsPanel('cortex_bigtable_request_duration_seconds_count{%s, operation="/google.bigtable.v2.Bigtable/MutateRows"}' % $.jobMatcher($._config.job_names.ingester)) - ) - .addPanel( - $.panel('Latency') + - utils.latencyRecordingRulePanel( - 'cortex_bigtable_request_duration_seconds', - $.jobSelector($._config.job_names.ingester) + [utils.selector.eq('operation', '/google.bigtable.v2.Bigtable/MutateRows')] - ) + .addPanel( + $.panel('Latency') + + utils.latencyRecordingRulePanel( + 'loki_request_duration_seconds', + dashboards['loki-writes.json'].matchers.distributor, + extra_selectors=dashboards['loki-writes.json'].clusterMatchers ) ) - .addRow( - $.row('BoltDB Shipper') - .addPanel( - $.panel('QPS') + - $.qpsPanel('loki_boltdb_shipper_request_duration_seconds_count{%s, operation="WRITE"}' % $.jobMatcher($._config.job_names.ingester)) + ) + .addRow( + $.row('Ingester') + .addPanel( + $.panel('QPS') + + $.qpsPanel('loki_request_duration_seconds_count{%s route="/logproto.Pusher/Push"}' % dashboards['loki-writes.json'].ingesterSelector) + ) + .addPanel( + $.panel('Latency') + + utils.latencyRecordingRulePanel( + 'loki_request_duration_seconds', + dashboards['loki-writes.json'].matchers.ingester + [utils.selector.eq('route', '/logproto.Pusher/Push')], + extra_selectors=dashboards['loki-writes.json'].clusterMatchers ) - .addPanel( - $.panel('Latency') + - $.latencyPanel('loki_boltdb_shipper_request_duration_seconds', '{%s, operation="WRITE"}' % $.jobMatcher($._config.job_names.ingester)) + ) + ) + .addRow( + $.row('BigTable') + .addPanel( + $.panel('QPS') + + $.qpsPanel('cortex_bigtable_request_duration_seconds_count{%s operation="/google.bigtable.v2.Bigtable/MutateRows"}' % dashboards['loki-writes.json'].ingesterSelector) + ) + .addPanel( + $.panel('Latency') + + utils.latencyRecordingRulePanel( + 'cortex_bigtable_request_duration_seconds', + dashboards['loki-writes.json'].clusterMatchers + dashboards['loki-writes.json'].matchers.ingester + [utils.selector.eq('operation', '/google.bigtable.v2.Bigtable/MutateRows')] ) - ), + ) + ) + .addRow( + $.row('BoltDB Shipper') + .addPanel( + $.panel('QPS') + + $.qpsPanel('loki_boltdb_shipper_request_duration_seconds_count{%s operation="WRITE"}' % dashboards['loki-writes.json'].ingesterSelector) + ) + .addPanel( + $.panel('Latency') + + $.latencyPanel('loki_boltdb_shipper_request_duration_seconds', '{%s operation="WRITE"}' % dashboards['loki-writes.json'].ingesterSelector) + ) + ){ + templating+: { + list+: [ + { + allValue: null, + current: + if l.type == 'custom' then { + text: l.query, + value: l.query, + } else {}, + datasource: '$datasource', + hide: 0, + includeAll: false, + label: l.variable, + multi: false, + name: l.variable, + options: [], + query: + if l.type == 'query' then + 'label_values(%s, %s)' % [l.query, l.label] + else + l.query, + refresh: 1, + regex: '', + sort: 2, + tagValuesQuery: '', + tags: [], + tagsQuery: '', + type: l.type, + useTags: false, + } + for l in dashboards['loki-writes.json'].templateLabels + ], + }, + }, }, } diff --git a/production/loki-mixin/jsonnetfile.json b/production/loki-mixin/jsonnetfile.json index 3f1547aaebd9f..d62efed215e9f 100644 --- a/production/loki-mixin/jsonnetfile.json +++ b/production/loki-mixin/jsonnetfile.json @@ -1,24 +1,24 @@ { - "version": 1, - "dependencies": [ - { - "source": { - "git": { - "remote": "https://github.com/grafana/jsonnet-libs.git", - "subdir": "grafana-builder" + "dependencies": [ + { + "name": "grafana-builder", + "source": { + "git": { + "remote": "https://github.com/grafana/jsonnet-libs", + "subdir": "grafana-builder" + } + }, + "version": "master" + }, + { + "name": "mixin-utils", + "source": { + "git": { + "remote": "https://github.com/grafana/jsonnet-libs", + "subdir": "mixin-utils" + } + }, + "version": "master" } - }, - "version": "master" - }, - { - "source": { - "git": { - "remote": "https://github.com/grafana/jsonnet-libs.git", - "subdir": "mixin-utils" - } - }, - "version": "master" - } - ], - "legacyImports": true -} + ] +} \ No newline at end of file diff --git a/production/loki-mixin/jsonnetfile.lock.json b/production/loki-mixin/jsonnetfile.lock.json deleted file mode 100644 index 3f415021b4e58..0000000000000 --- a/production/loki-mixin/jsonnetfile.lock.json +++ /dev/null @@ -1,26 +0,0 @@ -{ - "version": 1, - "dependencies": [ - { - "source": { - "git": { - "remote": "https://github.com/grafana/jsonnet-libs.git", - "subdir": "grafana-builder" - } - }, - "version": "ff22d1d6698573e7cb76228198edfa2b2f632dcc", - "sum": "GRf2GvwEU4jhXV+JOonXSZ4wdDv8mnHBPCQ6TUVd+g8=" - }, - { - "source": { - "git": { - "remote": "https://github.com/grafana/jsonnet-libs.git", - "subdir": "mixin-utils" - } - }, - "version": "ff22d1d6698573e7cb76228198edfa2b2f632dcc", - "sum": "v6fuqqQp9rHZbsxN9o79QzOpUlwYZEJ84DxTCZMCYeU=" - } - ], - "legacyImports": false -}