Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix oomkill graphs for slack #1636

Merged
merged 8 commits into from
Nov 21, 2024
43 changes: 39 additions & 4 deletions src/robusta/core/playbooks/prometheus_enrichment_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@
# for performance the series result is a dict of the format of the obj PrometheusSeries
PrometheusSeriesDict = Dict[str, any]


class XAxisLine(BaseModel):
label: str
value: float
Expand Down Expand Up @@ -125,7 +126,9 @@ def get_series_job(series: PrometheusSeriesDict) -> Optional[str]:
return series["metric"]["job"] if "job" in series["metric"] else None


def filter_prom_jobs_results(series_list_result: Optional[List[PrometheusSeriesDict]]) -> Optional[List[PrometheusSeriesDict]]:
def filter_prom_jobs_results(
series_list_result: Optional[List[PrometheusSeriesDict]],
) -> Optional[List[PrometheusSeriesDict]]:
if not series_list_result or len(series_list_result) == 1:
return series_list_result

Expand All @@ -134,7 +137,9 @@ def filter_prom_jobs_results(series_list_result: Optional[List[PrometheusSeriesD

# takes kubelet job if exists, return first job alphabetically if it doesn't
for target_name in target_names:
relevant_series: List[PrometheusSeriesDict] = [series for series in series_list_result if get_target_name(series) == target_name]
relevant_series: List[PrometheusSeriesDict] = [
series for series in series_list_result if get_target_name(series) == target_name
]
relevant_kubelet_metric = [series for series in relevant_series if get_series_job(series) == "kubelet"]
if len(relevant_kubelet_metric) == 1:
return_list.append(relevant_kubelet_metric[0])
Expand Down Expand Up @@ -244,6 +249,15 @@ def create_chart_from_prometheus_query(
min_time = starts_at.timestamp()
max_time = ends_at.timestamp()

limit_line = None
request_line = None

for line in lines:
if "Limit" in line.label:
limit_line = line
elif "Request" in line.label:
request_line = line

vertical_lines = []
horizontal_lines = []
for line in lines:
Expand Down Expand Up @@ -298,6 +312,10 @@ def create_chart_from_prometheus_query(
show_legend=hide_legends is not True,
)

# delta and limit line adjustment for case when there is no data
limit_line_adjusted = False
delta = 0

if len(plot_data_list):
y_axis_division = 5
# Calculate the maximum Y value with an added 20% padding
Expand All @@ -308,6 +326,15 @@ def create_chart_from_prometheus_query(

chart.range = (0, max_y_value_with_padding)

# Fix for the case when the request and limit has the same value.
# 6 pixels where chosen as minimum distance based on current width and height of the slack graph
delta = (chart.range[1] - chart.range[0]) * 6 / chart.config.height
nherment marked this conversation as resolved.
Show resolved Hide resolved
# Limit delta to a maximum of 2% of the Y-axis range (to prefent significant deviation)
delta = min(delta, (chart.range[1] - chart.range[0]) * 0.02)

if limit_line and request_line and limit_line.value == request_line.value:
limit_line_adjusted = True

if values_format == ChartValuesFormat.Percentage:
# Calculate the Y-axis labels, shift to percentage, and round to the nearest whole number percentage
chart.y_labels = [round((i * interval) * 100) / 100 for i in range(y_axis_division)]
Expand Down Expand Up @@ -346,6 +373,14 @@ def create_chart_from_prometheus_query(
else:
chart.title = promql_query

# Adjust the "Limit" line's plotting data if limit and request lines are equal
if limit_line_adjusted:
for plot_data in plot_data_list:
if plot_data.plot[0] == limit_line.label:
adjusted_values = [(x, y + delta) for x, y in plot_data.plot[1]]
plot_data.plot = (plot_data.plot[0], adjusted_values)
break

for p in plot_data_list:
chart.add(
p.plot[0],
Expand Down Expand Up @@ -487,15 +522,15 @@ def create_resource_enrichment(
values_format=ChartValuesFormat.CPUUsage,
),
(ResourceChartResourceType.Memory, ResourceChartItemType.Pod): ChartOptions(
query='sum(container_memory_working_set_bytes{namespace="$namespace", pod=~"$pod", container!="", image!=""}) by (pod, job)',
query='sum(max(container_memory_working_set_bytes{namespace="$namespace", pod=~"$pod", container!="", image!=""}) by (container, pod, job)) by (container, pod, job)',
values_format=ChartValuesFormat.Bytes,
),
(ResourceChartResourceType.Memory, ResourceChartItemType.Node): ChartOptions(
query='1 - ((node_memory_MemAvailable_bytes{job="node-exporter", instance=~"$node_internal_ip:[0-9]+"} or (node_memory_Buffers_bytes{job="node-exporter", instance=~"$node_internal_ip:[0-9]+"} + node_memory_Cached_bytes{job="node-exporter", instance=~"$node_internal_ip:[0-9]+"} + node_memory_MemFree_bytes{job="node-exporter", instance=~"$node_internal_ip:[0-9]+"} + node_memory_Slab_bytes{job="node-exporter", instance=~"$node_internal_ip:[0-9]+"} ) ) / node_memory_MemTotal_bytes{job="node-exporter", instance=~"$node_internal_ip:[0-9]+"}) != 0',
values_format=ChartValuesFormat.Percentage,
),
(ResourceChartResourceType.Memory, ResourceChartItemType.Container): ChartOptions(
query='sum(container_memory_working_set_bytes{namespace="$namespace", pod=~"$pod", container=~"$container", image!=""}) by (container, pod, job)',
query='sum(max(container_memory_working_set_bytes{namespace="$namespace", pod=~"$pod", container=~"$container", image!=""}) by (container, pod, job)) by (container, pod, job)',
values_format=ChartValuesFormat.Bytes,
),
(ResourceChartResourceType.Disk, ResourceChartItemType.Pod): None,
Expand Down
Loading