From 81d48f819898d3d5658e69580129e7abc057f389 Mon Sep 17 00:00:00 2001
From: itisallgood <25401000+itisallgood@users.noreply.github.com>
Date: Thu, 21 Nov 2024 12:46:13 +0100
Subject: [PATCH] Fix oomkill graphs for slack (#1636)

* Updated memory utilization queries for slack (for pods and for containers)

* Moved limit chart 6 pixels up in create_chart_from_prometheus_query

* Added limit to prevent moving limit chart Y-axis range too much

* Added a case when there is no data in memory graph for slack

* Added explanation for delta distance in create_chart_from_prometheus_query
---
 .../playbooks/prometheus_enrichment_utils.py  | 43 +++++++++++++++++--
 1 file changed, 39 insertions(+), 4 deletions(-)

diff --git a/src/robusta/core/playbooks/prometheus_enrichment_utils.py b/src/robusta/core/playbooks/prometheus_enrichment_utils.py
index afd77174e..6c9fe49c2 100644
--- a/src/robusta/core/playbooks/prometheus_enrichment_utils.py
+++ b/src/robusta/core/playbooks/prometheus_enrichment_utils.py
@@ -30,6 +30,7 @@
 # for performance the series result is a dict of the format of the obj PrometheusSeries
 PrometheusSeriesDict = Dict[str, any]
 
+
 class XAxisLine(BaseModel):
     label: str
     value: float
@@ -125,7 +126,9 @@ def get_series_job(series: PrometheusSeriesDict) -> Optional[str]:
     return series["metric"]["job"] if "job" in series["metric"] else None
 
 
-def filter_prom_jobs_results(series_list_result: Optional[List[PrometheusSeriesDict]]) -> Optional[List[PrometheusSeriesDict]]:
+def filter_prom_jobs_results(
+    series_list_result: Optional[List[PrometheusSeriesDict]],
+) -> Optional[List[PrometheusSeriesDict]]:
     if not series_list_result or len(series_list_result) == 1:
         return series_list_result
 
@@ -134,7 +137,9 @@ def filter_prom_jobs_results(series_list_result: Optional[List[PrometheusSeriesD
 
     # takes kubelet job if exists, return first job alphabetically if it doesn't
     for target_name in target_names:
-        relevant_series: List[PrometheusSeriesDict] = [series for series in series_list_result if get_target_name(series) == target_name]
+        relevant_series: List[PrometheusSeriesDict] = [
+            series for series in series_list_result if get_target_name(series) == target_name
+        ]
         relevant_kubelet_metric = [series for series in relevant_series if get_series_job(series) == "kubelet"]
         if len(relevant_kubelet_metric) == 1:
             return_list.append(relevant_kubelet_metric[0])
@@ -244,6 +249,15 @@ def create_chart_from_prometheus_query(
         min_time = starts_at.timestamp()
         max_time = ends_at.timestamp()
 
+    limit_line = None
+    request_line = None
+
+    for line in lines:
+        if "Limit" in line.label:
+            limit_line = line
+        elif "Request" in line.label:
+            request_line = line
+
     vertical_lines = []
     horizontal_lines = []
     for line in lines:
@@ -298,6 +312,10 @@ def create_chart_from_prometheus_query(
         show_legend=hide_legends is not True,
     )
 
+    # delta and limit line adjustment for case when there is no data
+    limit_line_adjusted = False
+    delta = 0
+
     if len(plot_data_list):
         y_axis_division = 5
         # Calculate the maximum Y value with an added 20% padding
@@ -308,6 +326,15 @@ def create_chart_from_prometheus_query(
 
         chart.range = (0, max_y_value_with_padding)
 
+        # Fix for the case when the request and limit has the same value.
+        # 6 pixels where chosen as minimum distance based on current width and height of the slack graph
+        delta = (chart.range[1] - chart.range[0]) * 6 / chart.config.height
+        # Limit delta to a maximum of 2% of the Y-axis range (to prefent significant deviation)
+        delta = min(delta, (chart.range[1] - chart.range[0]) * 0.02)
+
+        if limit_line and request_line and limit_line.value == request_line.value:
+            limit_line_adjusted = True
+
         if values_format == ChartValuesFormat.Percentage:
             # Calculate the Y-axis labels, shift to percentage, and round to the nearest whole number percentage
             chart.y_labels = [round((i * interval) * 100) / 100 for i in range(y_axis_division)]
@@ -346,6 +373,14 @@ def create_chart_from_prometheus_query(
     else:
         chart.title = promql_query
 
+    # Adjust the "Limit" line's plotting data if limit and request lines are equal
+    if limit_line_adjusted:
+        for plot_data in plot_data_list:
+            if plot_data.plot[0] == limit_line.label:
+                adjusted_values = [(x, y + delta) for x, y in plot_data.plot[1]]
+                plot_data.plot = (plot_data.plot[0], adjusted_values)
+                break
+
     for p in plot_data_list:
         chart.add(
             p.plot[0],
@@ -487,7 +522,7 @@ def create_resource_enrichment(
             values_format=ChartValuesFormat.CPUUsage,
         ),
         (ResourceChartResourceType.Memory, ResourceChartItemType.Pod): ChartOptions(
-            query='sum(container_memory_working_set_bytes{namespace="$namespace", pod=~"$pod", container!="", image!=""}) by (pod, job)',
+            query='sum(max(container_memory_working_set_bytes{namespace="$namespace", pod=~"$pod", container!="", image!=""}) by (container, pod, job)) by (container, pod, job)',
             values_format=ChartValuesFormat.Bytes,
         ),
         (ResourceChartResourceType.Memory, ResourceChartItemType.Node): ChartOptions(
@@ -495,7 +530,7 @@ def create_resource_enrichment(
             values_format=ChartValuesFormat.Percentage,
         ),
         (ResourceChartResourceType.Memory, ResourceChartItemType.Container): ChartOptions(
-            query='sum(container_memory_working_set_bytes{namespace="$namespace", pod=~"$pod", container=~"$container", image!=""}) by (container, pod, job)',
+            query='sum(max(container_memory_working_set_bytes{namespace="$namespace", pod=~"$pod", container=~"$container", image!=""}) by (container, pod, job)) by (container, pod, job)',
             values_format=ChartValuesFormat.Bytes,
         ),
         (ResourceChartResourceType.Disk, ResourceChartItemType.Pod): None,