Skip to content

Commit

Permalink
Fix oomkill graphs for slack (#1636)
Browse files Browse the repository at this point in the history
* Updated memory utilization queries for slack (for pods and for containers)

* Moved limit chart 6 pixels up in create_chart_from_prometheus_query

* Added limit to prevent moving limit chart Y-axis range too much

* Added a case when there is no data in memory graph for slack

* Added explanation for delta distance in create_chart_from_prometheus_query
  • Loading branch information
itisallgood authored Nov 21, 2024
1 parent 8ff3e28 commit 81d48f8
Showing 1 changed file with 39 additions and 4 deletions.
43 changes: 39 additions & 4 deletions src/robusta/core/playbooks/prometheus_enrichment_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@
# for performance the series result is a dict of the format of the obj PrometheusSeries
PrometheusSeriesDict = Dict[str, any]


class XAxisLine(BaseModel):
label: str
value: float
Expand Down Expand Up @@ -125,7 +126,9 @@ def get_series_job(series: PrometheusSeriesDict) -> Optional[str]:
return series["metric"]["job"] if "job" in series["metric"] else None


def filter_prom_jobs_results(series_list_result: Optional[List[PrometheusSeriesDict]]) -> Optional[List[PrometheusSeriesDict]]:
def filter_prom_jobs_results(
series_list_result: Optional[List[PrometheusSeriesDict]],
) -> Optional[List[PrometheusSeriesDict]]:
if not series_list_result or len(series_list_result) == 1:
return series_list_result

Expand All @@ -134,7 +137,9 @@ def filter_prom_jobs_results(series_list_result: Optional[List[PrometheusSeriesD

# takes kubelet job if exists, return first job alphabetically if it doesn't
for target_name in target_names:
relevant_series: List[PrometheusSeriesDict] = [series for series in series_list_result if get_target_name(series) == target_name]
relevant_series: List[PrometheusSeriesDict] = [
series for series in series_list_result if get_target_name(series) == target_name
]
relevant_kubelet_metric = [series for series in relevant_series if get_series_job(series) == "kubelet"]
if len(relevant_kubelet_metric) == 1:
return_list.append(relevant_kubelet_metric[0])
Expand Down Expand Up @@ -244,6 +249,15 @@ def create_chart_from_prometheus_query(
min_time = starts_at.timestamp()
max_time = ends_at.timestamp()

limit_line = None
request_line = None

for line in lines:
if "Limit" in line.label:
limit_line = line
elif "Request" in line.label:
request_line = line

vertical_lines = []
horizontal_lines = []
for line in lines:
Expand Down Expand Up @@ -298,6 +312,10 @@ def create_chart_from_prometheus_query(
show_legend=hide_legends is not True,
)

# delta and limit line adjustment for case when there is no data
limit_line_adjusted = False
delta = 0

if len(plot_data_list):
y_axis_division = 5
# Calculate the maximum Y value with an added 20% padding
Expand All @@ -308,6 +326,15 @@ def create_chart_from_prometheus_query(

chart.range = (0, max_y_value_with_padding)

# Fix for the case when the request and limit has the same value.
# 6 pixels where chosen as minimum distance based on current width and height of the slack graph
delta = (chart.range[1] - chart.range[0]) * 6 / chart.config.height
# Limit delta to a maximum of 2% of the Y-axis range (to prefent significant deviation)
delta = min(delta, (chart.range[1] - chart.range[0]) * 0.02)

if limit_line and request_line and limit_line.value == request_line.value:
limit_line_adjusted = True

if values_format == ChartValuesFormat.Percentage:
# Calculate the Y-axis labels, shift to percentage, and round to the nearest whole number percentage
chart.y_labels = [round((i * interval) * 100) / 100 for i in range(y_axis_division)]
Expand Down Expand Up @@ -346,6 +373,14 @@ def create_chart_from_prometheus_query(
else:
chart.title = promql_query

# Adjust the "Limit" line's plotting data if limit and request lines are equal
if limit_line_adjusted:
for plot_data in plot_data_list:
if plot_data.plot[0] == limit_line.label:
adjusted_values = [(x, y + delta) for x, y in plot_data.plot[1]]
plot_data.plot = (plot_data.plot[0], adjusted_values)
break

for p in plot_data_list:
chart.add(
p.plot[0],
Expand Down Expand Up @@ -487,15 +522,15 @@ def create_resource_enrichment(
values_format=ChartValuesFormat.CPUUsage,
),
(ResourceChartResourceType.Memory, ResourceChartItemType.Pod): ChartOptions(
query='sum(container_memory_working_set_bytes{namespace="$namespace", pod=~"$pod", container!="", image!=""}) by (pod, job)',
query='sum(max(container_memory_working_set_bytes{namespace="$namespace", pod=~"$pod", container!="", image!=""}) by (container, pod, job)) by (container, pod, job)',
values_format=ChartValuesFormat.Bytes,
),
(ResourceChartResourceType.Memory, ResourceChartItemType.Node): ChartOptions(
query='1 - ((node_memory_MemAvailable_bytes{job="node-exporter", instance=~"$node_internal_ip:[0-9]+"} or (node_memory_Buffers_bytes{job="node-exporter", instance=~"$node_internal_ip:[0-9]+"} + node_memory_Cached_bytes{job="node-exporter", instance=~"$node_internal_ip:[0-9]+"} + node_memory_MemFree_bytes{job="node-exporter", instance=~"$node_internal_ip:[0-9]+"} + node_memory_Slab_bytes{job="node-exporter", instance=~"$node_internal_ip:[0-9]+"} ) ) / node_memory_MemTotal_bytes{job="node-exporter", instance=~"$node_internal_ip:[0-9]+"}) != 0',
values_format=ChartValuesFormat.Percentage,
),
(ResourceChartResourceType.Memory, ResourceChartItemType.Container): ChartOptions(
query='sum(container_memory_working_set_bytes{namespace="$namespace", pod=~"$pod", container=~"$container", image!=""}) by (container, pod, job)',
query='sum(max(container_memory_working_set_bytes{namespace="$namespace", pod=~"$pod", container=~"$container", image!=""}) by (container, pod, job)) by (container, pod, job)',
values_format=ChartValuesFormat.Bytes,
),
(ResourceChartResourceType.Disk, ResourceChartItemType.Pod): None,
Expand Down

0 comments on commit 81d48f8

Please sign in to comment.