Skip to content

Commit

Permalink
Fix markdown block quote truncation
Browse files Browse the repository at this point in the history
  • Loading branch information
Robert Szefler committed Jun 14, 2024
1 parent 7001ddc commit 74d5c93
Show file tree
Hide file tree
Showing 6 changed files with 191 additions and 93 deletions.
184 changes: 93 additions & 91 deletions poetry.lock

Large diffs are not rendered by default.

2 changes: 2 additions & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,8 @@ botocore = "1.31.72"
boto3 = "1.28.72"
requests = "^2.32.3"
certifi = "^2023.7.22"
regex = "2024.5.15"
more_itertools = { version = "^10.3", python = "<3.12" }

[tool.poetry.dev-dependencies]
pre-commit = "^2.13.0"
Expand Down
6 changes: 5 additions & 1 deletion src/robusta/core/sinks/transformer.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
import logging
import re
import urllib.parse
from collections import defaultdict
from typing import List, Optional, Union

import markdown2
Expand All @@ -28,6 +27,7 @@ def tabulate(*args, **kwargs):
ScanReportBlock,
TableBlock,
)
from robusta.utils.trim_markdown import trim_markdown


class Transformer:
Expand Down Expand Up @@ -58,6 +58,10 @@ def apply_length_limit(msg: str, max_length: int, truncator: Optional[str] = Non
truncator = truncator or "..."
return msg[: max_length - len(truncator)] + truncator

@staticmethod
def apply_length_limit_to_markdown(msg: str, max_length: int, truncator: str = "...") -> str:
return trim_markdown(msg, max_length, truncator)

@staticmethod
def to_markdown_diff(block: KubernetesDiffBlock, use_emoji_sign: bool = False) -> List[ListBlock]:
# this can happen when a block.old=None or block.new=None - e.g. the resource was added or deleted
Expand Down
2 changes: 1 addition & 1 deletion src/robusta/integrations/slack/sender.py
Original file line number Diff line number Diff line change
Expand Up @@ -143,7 +143,7 @@ def __to_slack_markdown(self, block: MarkdownBlock) -> List[SlackBlock]:
"type": "section",
"text": {
"type": "mrkdwn",
"text": Transformer.apply_length_limit(block.text, MAX_BLOCK_CHARS),
"text": Transformer.apply_length_limit_to_markdown(block.text, MAX_BLOCK_CHARS),
},
}
]
Expand Down
43 changes: 43 additions & 0 deletions src/robusta/utils/trim_markdown.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
try:
from itertools import batched
except ImportError: # Python < 3.12
from more_itertools import batched

import regex


def trim_markdown(text: str, max_length: int, suffix: str = "...") -> str:
# This method of trimming markdown is not universal. It only takes care of correctly
# trimming block sections. Implementing a general truncation method for markdown that
# would handle all the possible tags in a correct way would be rather complex.

trim_idx = max_length - len(suffix)

if trim_idx <= 0: # The pathological cases.
return suffix[:max_length]

# Process block quotes backwards in the input
for match_open, match_close in batched(regex.finditer("```", text, regex.REVERSE), 2):
open_start, open_end = match_close.span()
close_start, close_end = match_open.span()
if trim_idx >= close_end:
# Trimming point after this block quote
return text[:trim_idx] + suffix
if trim_idx < open_start:
# Trimming point before this block quote - continue to the preceding block
continue
if trim_idx >= open_start and trim_idx < open_start + 3:
# Trimming point inside the opening block quote tag
return text[:trim_idx].rstrip("`") + suffix
if trim_idx >= close_start and trim_idx < close_end:
# Trimming point inside the closing block quote tag
if trim_idx - open_end >= 3: # Enough space to insert the closing tag
return text[:trim_idx - 3] + "```" + suffix
else: # Not enough space, strip the whole block
return text[:open_start] + suffix
if trim_idx >= open_end and trim_idx < close_start:
# Trimming point inside the block quote
if trim_idx - open_end >= 3: # Enough space to insert the closing tag
return text[:trim_idx - 3] + "```" + suffix
else: # Not enough space, strip the whole block
return text[:open_start] + suffix
47 changes: 47 additions & 0 deletions tests/test_trim_markdown.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
import pytest

from robusta.utils.trim_markdown import trim_markdown


@pytest.mark.parametrize(
"max_length,expected_output", [
(0, ""),
(1, "#"),
(2, "#"),
(3, "#"),
(4, "#"),
(5, "#"),
(6, "#"),
(7, "``````#"),
(8, "```o```#"),
(9, "```oh```#"),
(10, "```oh``` #"),
(12, "```oh``` he#"),
(15, "```oh``` hello#"),
(16, "```oh``` hello #"),
(17, "```oh``` hello #"),
(18, "```oh``` hello #"),
(19, "```oh``` hello #"),
(20, "```oh``` hello #"),
(21, "```oh``` hello #"),
(22, "```oh``` hello ``````#"),
(23, "```oh``` hello ```w```#"),
(24, "```oh``` hello ```wo```#"),
(26, "```oh``` hello ```worl```#"),
(27, "```oh``` hello ```world```#"),
(28, "```oh``` hello ```world``` #"),
(30, "```oh``` hello ```world``` an#"),
(38, "```oh``` hello ```world``` and then #"),
(41, "```oh``` hello ```world``` and then #"),
(43, "```oh``` hello ```world``` and then ``````#"),
(47, "```oh``` hello ```world``` and then ```some```#"),
(51, "```oh``` hello ```world``` and then ```somethin```#"),
(52, "```oh``` hello ```world``` and then ```something```#"),
(53, "```oh``` hello ```world``` and then ```something```#"),
(111, "```oh``` hello ```world``` and then ```something```#"),
])
def test_trim_markdown(max_length: int, expected_output: str):
text = "```oh``` hello ```world``` and then ```something```"
trimmed = trim_markdown(text, max_length, '#')
assert trimmed == expected_output
assert len(trimmed) <= max_length

0 comments on commit 74d5c93

Please sign in to comment.