-
Notifications
You must be signed in to change notification settings - Fork 264
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Robert Szefler
committed
Jun 14, 2024
1 parent
7001ddc
commit 74d5c93
Showing
6 changed files
with
191 additions
and
93 deletions.
There are no files selected for viewing
Large diffs are not rendered by default.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,43 @@ | ||
try: | ||
from itertools import batched | ||
except ImportError: # Python < 3.12 | ||
from more_itertools import batched | ||
|
||
import regex | ||
|
||
|
||
def trim_markdown(text: str, max_length: int, suffix: str = "...") -> str: | ||
# This method of trimming markdown is not universal. It only takes care of correctly | ||
# trimming block sections. Implementing a general truncation method for markdown that | ||
# would handle all the possible tags in a correct way would be rather complex. | ||
|
||
trim_idx = max_length - len(suffix) | ||
|
||
if trim_idx <= 0: # The pathological cases. | ||
return suffix[:max_length] | ||
|
||
# Process block quotes backwards in the input | ||
for match_open, match_close in batched(regex.finditer("```", text, regex.REVERSE), 2): | ||
open_start, open_end = match_close.span() | ||
close_start, close_end = match_open.span() | ||
if trim_idx >= close_end: | ||
# Trimming point after this block quote | ||
return text[:trim_idx] + suffix | ||
if trim_idx < open_start: | ||
# Trimming point before this block quote - continue to the preceding block | ||
continue | ||
if trim_idx >= open_start and trim_idx < open_start + 3: | ||
# Trimming point inside the opening block quote tag | ||
return text[:trim_idx].rstrip("`") + suffix | ||
if trim_idx >= close_start and trim_idx < close_end: | ||
# Trimming point inside the closing block quote tag | ||
if trim_idx - open_end >= 3: # Enough space to insert the closing tag | ||
return text[:trim_idx - 3] + "```" + suffix | ||
else: # Not enough space, strip the whole block | ||
return text[:open_start] + suffix | ||
if trim_idx >= open_end and trim_idx < close_start: | ||
# Trimming point inside the block quote | ||
if trim_idx - open_end >= 3: # Enough space to insert the closing tag | ||
return text[:trim_idx - 3] + "```" + suffix | ||
else: # Not enough space, strip the whole block | ||
return text[:open_start] + suffix |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,47 @@ | ||
import pytest | ||
|
||
from robusta.utils.trim_markdown import trim_markdown | ||
|
||
|
||
@pytest.mark.parametrize( | ||
"max_length,expected_output", [ | ||
(0, ""), | ||
(1, "#"), | ||
(2, "#"), | ||
(3, "#"), | ||
(4, "#"), | ||
(5, "#"), | ||
(6, "#"), | ||
(7, "``````#"), | ||
(8, "```o```#"), | ||
(9, "```oh```#"), | ||
(10, "```oh``` #"), | ||
(12, "```oh``` he#"), | ||
(15, "```oh``` hello#"), | ||
(16, "```oh``` hello #"), | ||
(17, "```oh``` hello #"), | ||
(18, "```oh``` hello #"), | ||
(19, "```oh``` hello #"), | ||
(20, "```oh``` hello #"), | ||
(21, "```oh``` hello #"), | ||
(22, "```oh``` hello ``````#"), | ||
(23, "```oh``` hello ```w```#"), | ||
(24, "```oh``` hello ```wo```#"), | ||
(26, "```oh``` hello ```worl```#"), | ||
(27, "```oh``` hello ```world```#"), | ||
(28, "```oh``` hello ```world``` #"), | ||
(30, "```oh``` hello ```world``` an#"), | ||
(38, "```oh``` hello ```world``` and then #"), | ||
(41, "```oh``` hello ```world``` and then #"), | ||
(43, "```oh``` hello ```world``` and then ``````#"), | ||
(47, "```oh``` hello ```world``` and then ```some```#"), | ||
(51, "```oh``` hello ```world``` and then ```somethin```#"), | ||
(52, "```oh``` hello ```world``` and then ```something```#"), | ||
(53, "```oh``` hello ```world``` and then ```something```#"), | ||
(111, "```oh``` hello ```world``` and then ```something```#"), | ||
]) | ||
def test_trim_markdown(max_length: int, expected_output: str): | ||
text = "```oh``` hello ```world``` and then ```something```" | ||
trimmed = trim_markdown(text, max_length, '#') | ||
assert trimmed == expected_output | ||
assert len(trimmed) <= max_length |