Skip to content

Commit

Permalink
Fix markdown block quote truncation (#1457)
Browse files Browse the repository at this point in the history
Fix markdown block quote truncation
  • Loading branch information
Robert Szefler authored Jul 9, 2024
1 parent fcc8658 commit d6d7230
Show file tree
Hide file tree
Showing 6 changed files with 394 additions and 231 deletions.
484 changes: 255 additions & 229 deletions poetry.lock

Large diffs are not rendered by default.

2 changes: 2 additions & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,8 @@ boto3 = "1.28.72"
prometheus-api-client = "0.5.4"
requests = "^2.32.3"
certifi = "^2023.7.22"
regex = "2024.5.15"
more_itertools = { version = "^10.3", python = "<3.12" }

[tool.poetry.dev-dependencies]
pre-commit = "^2.13.0"
Expand Down
9 changes: 8 additions & 1 deletion src/robusta/core/sinks/transformer.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
import logging
import re
import urllib.parse
from collections import defaultdict
from typing import List, Optional, Union

import markdown2
Expand All @@ -28,6 +27,7 @@ def tabulate(*args, **kwargs):
ScanReportBlock,
TableBlock,
)
from robusta.utils.trim_markdown import trim_markdown


class Transformer:
Expand Down Expand Up @@ -58,6 +58,13 @@ def apply_length_limit(msg: str, max_length: int, truncator: Optional[str] = Non
truncator = truncator or "..."
return msg[: max_length - len(truncator)] + truncator

@staticmethod
def apply_length_limit_to_markdown(msg: str, max_length: int, truncator: str = "...") -> str:
try:
return trim_markdown(msg, max_length, truncator)
except:
return Transformer.apply_length_limit(msg, max_length, truncator)

@staticmethod
def to_markdown_diff(block: KubernetesDiffBlock, use_emoji_sign: bool = False) -> List[ListBlock]:
# this can happen when a block.old=None or block.new=None - e.g. the resource was added or deleted
Expand Down
2 changes: 1 addition & 1 deletion src/robusta/integrations/slack/sender.py
Original file line number Diff line number Diff line change
Expand Up @@ -146,7 +146,7 @@ def __to_slack_markdown(self, block: MarkdownBlock) -> List[SlackBlock]:
"type": "section",
"text": {
"type": "mrkdwn",
"text": Transformer.apply_length_limit(block.text, MAX_BLOCK_CHARS),
"text": Transformer.apply_length_limit_to_markdown(block.text, MAX_BLOCK_CHARS),
},
}
]
Expand Down
57 changes: 57 additions & 0 deletions src/robusta/utils/trim_markdown.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
try:
from itertools import batched
except ImportError: # Python < 3.12
from more_itertools import batched

import regex


def trim_markdown(text: str, max_length: int, suffix: str = "...") -> str:
# This method of trimming markdown is not universal. It only takes care of correctly
# trimming block sections. Implementing a general truncation method for markdown that
# would handle all the possible tags in a correct way would be rather complex.

trim_idx = max_length - len(suffix)

if trim_idx <= 0: # The pathological cases.
return suffix[:max_length]

# Process block quotes backwards in the input
for match_open, match_close in batched(regex.finditer("```", text, regex.REVERSE), 2):
open_start, open_end = match_close.span()
close_start, close_end = match_open.span()
if trim_idx >= close_end:
# Trimming point after this block quote
return text[:trim_idx] + suffix
elif trim_idx < open_start:
# Trimming point before this block quote - continue to the preceding block
continue
elif trim_idx >= open_start and trim_idx < open_start + 3:
# Trimming point inside the opening block quote tag
return text[:trim_idx].rstrip("`") + suffix
elif trim_idx >= close_start and trim_idx < close_end:
# Trimming point inside the closing block quote tag
if trim_idx - open_end >= 3: # Enough space to insert the closing tag
return text[:trim_idx - 3] + "```" + suffix
else: # Not enough space, strip the whole block
return text[:open_start] + suffix
elif trim_idx >= open_end and trim_idx < close_start:
# Trimming point inside the block quote
if trim_idx - open_end >= 3: # Enough space to insert the closing tag
return text[:trim_idx - 3] + "```" + suffix
else: # Not enough space, strip the whole block
return text[:open_start] + suffix
else:
# This should never happen
raise Exception(
f'Internal error in trim_markdown, text="{text[:12]}"(...), {max_length=}, suffix="{suffix}", '
f'matched code block {open_start}..{close_end}'
)

# Cases when there were no code blocks in the input
if len(text) <= trim_idx:
return text
elif len(text) < max_length:
return (text[:trim_idx] + suffix)[:max_length]
else:
return text[:trim_idx] + suffix
71 changes: 71 additions & 0 deletions tests/test_trim_markdown.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,71 @@
import pytest

from robusta.utils.trim_markdown import trim_markdown


@pytest.mark.parametrize(
"max_length,expected_output", [
(0, ""),
(1, "#"),
(2, "##"),
(3, "##"),
(4, "##"),
(5, "##"),
(6, "##"),
(7, "##"),
(8, "``````##"),
(9, "```o```##"),
(10, "```oh```##"),
(13, "```oh``` he##"),
(16, "```oh``` hello##"),
(17, "```oh``` hello ##"),
(18, "```oh``` hello ##"),
(19, "```oh``` hello ##"),
(20, "```oh``` hello ##"),
(21, "```oh``` hello ##"),
(22, "```oh``` hello ##"),
(23, "```oh``` hello ``````##"),
(24, "```oh``` hello ```w```##"),
(25, "```oh``` hello ```wo```##"),
(27, "```oh``` hello ```worl```##"),
(28, "```oh``` hello ```world```##"),
(29, "```oh``` hello ```world``` ##"),
(31, "```oh``` hello ```world``` an##"),
(39, "```oh``` hello ```world``` and then ##"),
(42, "```oh``` hello ```world``` and then ##"),
(44, "```oh``` hello ```world``` and then ``````##"),
(48, "```oh``` hello ```world``` and then ```some```##"),
(52, "```oh``` hello ```world``` and then ```somethin```##"),
(53, "```oh``` hello ```world``` and then ```something```##"),
(54, "```oh``` hello ```world``` and then ```something```##"),
(111, "```oh``` hello ```world``` and then ```something```##"),
])
def test_trim_markdown(max_length: int, expected_output: str):
text = "```oh``` hello ```world``` and then ```something```"
trimmed = trim_markdown(text, max_length, "##")
assert trimmed == expected_output
assert len(trimmed) <= max_length


@pytest.mark.parametrize(
"max_length,expected_output", [
(0, ""),
(1, "$"),
(2, "$$"),
(3, "$$$"),
(4, "N$$$"),
(5, "No$$$"),
(10, "No code$$$"),
(38, "No code blocks whatsoever in this t$$$"),
(39, "No code blocks whatsoever in this te$$$"),
(40, "No code blocks whatsoever in this tex$$$"),
(41, "No code blocks whatsoever in this text"),
(42, "No code blocks whatsoever in this text"),
(111, "No code blocks whatsoever in this text"),
]
)
def test_trim_markdown_no_code_blocks(max_length: int, expected_output: str):
text = "No code blocks whatsoever in this text"
trimmed = trim_markdown(text, max_length, "$$$")
assert trimmed == expected_output
assert len(trimmed) <= max_length

0 comments on commit d6d7230

Please sign in to comment.