Fix markdown block quote truncation

robusta-dev · Jun 14, 2024 · 74d5c93 · 74d5c93
1 parent 7001ddc
commit 74d5c93
Show file tree

Hide file tree

Showing 6 changed files with 191 additions and 93 deletions.
diff --git a/poetry.lock b/poetry.lock
diff --git a/pyproject.toml b/pyproject.toml
@@ -71,6 +71,8 @@ botocore = "1.31.72"
 boto3 = "1.28.72"
 requests = "^2.32.3"
 certifi = "^2023.7.22"
+regex = "2024.5.15"
+more_itertools = { version = "^10.3", python = "<3.12" }
 
 [tool.poetry.dev-dependencies]
 pre-commit = "^2.13.0"

diff --git a/src/robusta/core/sinks/transformer.py b/src/robusta/core/sinks/transformer.py
@@ -1,7 +1,6 @@
 import logging
 import re
 import urllib.parse
-from collections import defaultdict
 from typing import List, Optional, Union
 
 import markdown2
@@ -28,6 +27,7 @@ def tabulate(*args, **kwargs):
     ScanReportBlock,
     TableBlock,
 )
+from robusta.utils.trim_markdown import trim_markdown
 
 
 class Transformer:
@@ -58,6 +58,10 @@ def apply_length_limit(msg: str, max_length: int, truncator: Optional[str] = Non
         truncator = truncator or "..."
         return msg[: max_length - len(truncator)] + truncator
 
+    @staticmethod
+    def apply_length_limit_to_markdown(msg: str, max_length: int, truncator: str = "...") -> str:
+        return trim_markdown(msg, max_length, truncator)
+
     @staticmethod
     def to_markdown_diff(block: KubernetesDiffBlock, use_emoji_sign: bool = False) -> List[ListBlock]:
         # this can happen when a block.old=None or block.new=None - e.g. the resource was added or deleted

diff --git a/src/robusta/integrations/slack/sender.py b/src/robusta/integrations/slack/sender.py
@@ -143,7 +143,7 @@ def __to_slack_markdown(self, block: MarkdownBlock) -> List[SlackBlock]:
                 "type": "section",
                 "text": {
                     "type": "mrkdwn",
-                    "text": Transformer.apply_length_limit(block.text, MAX_BLOCK_CHARS),
+                    "text": Transformer.apply_length_limit_to_markdown(block.text, MAX_BLOCK_CHARS),
                 },
             }
         ]

diff --git a/src/robusta/utils/trim_markdown.py b/src/robusta/utils/trim_markdown.py
@@ -0,0 +1,43 @@
+try:
+    from itertools import batched
+except ImportError:  # Python < 3.12
+    from more_itertools import batched
+
+import regex
+
+
+def trim_markdown(text: str, max_length: int, suffix: str = "...") -> str:
+    # This method of trimming markdown is not universal. It only takes care of correctly
+    # trimming block sections. Implementing a general truncation method for markdown that
+    # would handle all the possible tags in a correct way would be rather complex.
+
+    trim_idx = max_length - len(suffix)
+
+    if trim_idx <= 0:  # The pathological cases.
+        return suffix[:max_length]
+
+    # Process block quotes backwards in the input
+    for match_open, match_close in batched(regex.finditer("```", text, regex.REVERSE), 2):
+        open_start, open_end = match_close.span()
+        close_start, close_end = match_open.span()
+        if trim_idx >= close_end:
+            # Trimming point after this block quote
+            return text[:trim_idx] + suffix
+        if trim_idx < open_start:
+            # Trimming point before this block quote - continue to the preceding block
+            continue
+        if trim_idx >= open_start and trim_idx < open_start + 3:
+            # Trimming point inside the opening block quote tag
+            return text[:trim_idx].rstrip("`") + suffix
+        if trim_idx >= close_start and trim_idx < close_end:
+            # Trimming point inside the closing block quote tag
+            if trim_idx - open_end >= 3:  # Enough space to insert the closing tag
+                return text[:trim_idx - 3] + "```" + suffix
+            else:  # Not enough space, strip the whole block
+                return text[:open_start] + suffix
+        if trim_idx >= open_end and trim_idx < close_start:
+            # Trimming point inside the block quote
+            if trim_idx - open_end >= 3:  # Enough space to insert the closing tag
+                return text[:trim_idx - 3] + "```" + suffix
+            else:  # Not enough space, strip the whole block
+                return text[:open_start] + suffix
diff --git a/tests/test_trim_markdown.py b/tests/test_trim_markdown.py
@@ -0,0 +1,47 @@
+import pytest
+
+from robusta.utils.trim_markdown import trim_markdown
+
+
+@pytest.mark.parametrize(
+    "max_length,expected_output", [
+        (0, ""),
+        (1, "#"),
+        (2, "#"),
+        (3, "#"),
+        (4, "#"),
+        (5, "#"),
+        (6, "#"),
+        (7, "``````#"),
+        (8, "```o```#"),
+        (9, "```oh```#"),
+        (10, "```oh``` #"),
+        (12, "```oh``` he#"),
+        (15, "```oh``` hello#"),
+        (16, "```oh``` hello #"),
+        (17, "```oh``` hello #"),
+        (18, "```oh``` hello #"),
+        (19, "```oh``` hello #"),
+        (20, "```oh``` hello #"),
+        (21, "```oh``` hello #"),
+        (22, "```oh``` hello ``````#"),
+        (23, "```oh``` hello ```w```#"),
+        (24, "```oh``` hello ```wo```#"),
+        (26, "```oh``` hello ```worl```#"),
+        (27, "```oh``` hello ```world```#"),
+        (28, "```oh``` hello ```world``` #"),
+        (30, "```oh``` hello ```world``` an#"),
+        (38, "```oh``` hello ```world``` and then #"),
+        (41, "```oh``` hello ```world``` and then #"),
+        (43, "```oh``` hello ```world``` and then ``````#"),
+        (47, "```oh``` hello ```world``` and then ```some```#"),
+        (51, "```oh``` hello ```world``` and then ```somethin```#"),
+        (52, "```oh``` hello ```world``` and then ```something```#"),
+        (53, "```oh``` hello ```world``` and then ```something```#"),
+        (111, "```oh``` hello ```world``` and then ```something```#"),
+    ])
+def test_trim_markdown(max_length: int, expected_output: str):
+    text = "```oh``` hello ```world``` and then ```something```"
+    trimmed = trim_markdown(text, max_length, '#')
+    assert trimmed == expected_output
+    assert len(trimmed) <= max_length