diff --git a/README.md b/README.md index 0b924f4..769e875 100644 --- a/README.md +++ b/README.md @@ -334,7 +334,7 @@ Alerts when a node storage usage goes over 85% for five minutes. ``` ALERT node_disk_usage - IF ((node_filesystem_size{mountpoint="/"} - node_filesystem_free{mountpoint="/"}) * 100 / node_filesystem_size{mountpoint="/"}) * on(instance) group_left(node_name) node_meta > 85 + IF ((node_filesystem_size{mountpoint="/rootfs"} - node_filesystem_free{mountpoint="/rootfs"}) * 100 / node_filesystem_size{mountpoint="/rootfs"}) * on(instance) group_left(node_name) node_meta > 85 FOR 5m LABELS { severity="warning" } ANNOTATIONS { @@ -349,7 +349,7 @@ Alerts when a node storage is going to remain out of free space in six hours. ``` ALERT node_disk_fill_rate_6h - IF predict_linear(node_filesystem_free{mountpoint="/"}[1h], 6*3600) * on(instance) group_left(node_name) node_meta < 0 + IF predict_linear(node_filesystem_free{mountpoint="/rootfs"}[1h], 6*3600) * on(instance) group_left(node_name) node_meta < 0 FOR 1h LABELS { severity="critical" } ANNOTATIONS { diff --git a/grafana/dashboards/swarmprom-nodes-dash.json b/grafana/dashboards/swarmprom-nodes-dash.json index 2c8e368..1b142ec 100644 --- a/grafana/dashboards/swarmprom-nodes-dash.json +++ b/grafana/dashboards/swarmprom-nodes-dash.json @@ -764,7 +764,7 @@ "tableColumn": "", "targets": [ { - "expr": "sum(node_filesystem_size_bytes{mountpoint=\"/\"} * on(instance) group_left(node_name) node_meta{node_id=~\"$node_id\"})", + "expr": "sum(node_filesystem_size_bytes{mountpoint=\"/rootfs\"} * on(instance) group_left(node_name) node_meta{node_id=~\"$node_id\"})", "format": "time_series", "intervalFactor": 2, "legendFormat": "", @@ -848,7 +848,7 @@ "tableColumn": "", "targets": [ { - "expr": "sum((node_filesystem_free_bytes{mountpoint=\"/\"} / node_filesystem_size_bytes{mountpoint=\"/\"}) * on(instance) group_left(node_name) node_meta{node_id=~\"$node_id\"} * 100) / count(node_meta * on(instance) group_left(node_name) node_meta{node_id=~\"$node_id\"})", + "expr": "sum((node_filesystem_free_bytes{mountpoint=\"/rootfs\"} / node_filesystem_size_bytes{mountpoint=\"/rootfs\"}) * on(instance) group_left(node_name) node_meta{node_id=~\"$node_id\"} * 100) / count(node_meta * on(instance) group_left(node_name) node_meta{node_id=~\"$node_id\"})", "format": "time_series", "intervalFactor": 2, "legendFormat": "", diff --git a/grafana/dashboards/swarmprom-services-dash.json b/grafana/dashboards/swarmprom-services-dash.json index 97c0136..e6fc8e5 100644 --- a/grafana/dashboards/swarmprom-services-dash.json +++ b/grafana/dashboards/swarmprom-services-dash.json @@ -1479,7 +1479,7 @@ "tableColumn": "", "targets": [ { - "expr": "sum((node_filesystem_free_bytes{mountpoint=\"/\"} / node_filesystem_size_bytes{mountpoint=\"/\"}) * on(instance) group_left(node_name) node_meta{node_id=~\"$node_id\"} * 100) / count(node_meta * on(instance) group_left(node_name) node_meta{node_id=~\"$node_id\"})", + "expr": "sum((node_filesystem_free_bytes{mountpoint=\"/rootfs\"} / node_filesystem_size_bytes{mountpoint=\"/rootfs\"}) * on(instance) group_left(node_name) node_meta{node_id=~\"$node_id\"} * 100) / count(node_meta * on(instance) group_left(node_name) node_meta{node_id=~\"$node_id\"})", "format": "time_series", "intervalFactor": 2, "legendFormat": "", diff --git a/prometheus/rules/swarm_node.rules.yml b/prometheus/rules/swarm_node.rules.yml index cfd08ff..5b0eaaf 100644 --- a/prometheus/rules/swarm_node.rules.yml +++ b/prometheus/rules/swarm_node.rules.yml @@ -22,8 +22,8 @@ groups: $value}}%. summary: Memory alert for Swarm node '{{ $labels.node_name }}' - alert: node_disk_usage - expr: ((node_filesystem_size_bytes{mountpoint="/"} - node_filesystem_free_bytes{mountpoint="/"}) - * 100 / node_filesystem_size_bytes{mountpoint="/"}) * ON(instance) GROUP_LEFT(node_name) + expr: ((node_filesystem_size_bytes{mountpoint="/rootfs"} - node_filesystem_free_bytes{mountpoint="/rootfs"}) + * 100 / node_filesystem_size_bytes{mountpoint="/rootfs"}) * ON(instance) GROUP_LEFT(node_name) node_meta > 85 for: 1m labels: @@ -33,7 +33,7 @@ groups: $value}}%. summary: Disk alert for Swarm node '{{ $labels.node_name }}' - alert: node_disk_fill_rate_6h - expr: predict_linear(node_filesystem_free_bytes{mountpoint="/"}[1h], 6 * 3600) * ON(instance) + expr: predict_linear(node_filesystem_free_bytes{mountpoint="/rootfs"}[1h], 6 * 3600) * ON(instance) GROUP_LEFT(node_name) node_meta < 0 for: 1h labels: