From a63e6f3416f2e4fc7c2d5edb2768ceed9218df20 Mon Sep 17 00:00:00 2001
From: Oleh Prypin
Date: Fri, 3 Nov 2023 15:20:00 +0100
Subject: [PATCH] Fix edge-case crash in InlineProcessor
If an inlineprocessor returns an AtomicString (even though that is pointless, a plain string is atomic in that context), there can be an exception in 2 separate places. The added test case was crashing before this change.
---
docs/changelog.md | 1 +
markdown/treeprocessors.py | 4 ++--
tests/test_apis.py | 25 +++++++++++++++++++++++--
3 files changed, 26 insertions(+), 4 deletions(-)
diff --git a/docs/changelog.md b/docs/changelog.md
index c55c9dda5..84f0bfaaf 100644
--- a/docs/changelog.md
+++ b/docs/changelog.md
@@ -17,6 +17,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
* Remove legacy import needed only in Python 2 (#1403)
* Fix typo that left the attribute `AdmonitionProcessor.content_indent` unset
(#1404)
+* Fix edge-case crash in `InlineProcessor` with `AtomicString` (#1406).
* Fix edge-case crash in `codehilite` with an empty `code` tag (#1405).
* Improve and expand type annotations in the code base (#1401).
diff --git a/markdown/treeprocessors.py b/markdown/treeprocessors.py
index dc857204b..83630999e 100644
--- a/markdown/treeprocessors.py
+++ b/markdown/treeprocessors.py
@@ -218,7 +218,7 @@ def linkText(text: str | None) -> None:
text = data[strartIndex:index]
linkText(text)
- if not isString(node): # it's Element
+ if not isinstance(node, str): # it's Element
for child in [node] + list(node):
if child.tail:
if child.tail.strip():
@@ -304,7 +304,7 @@ def __applyPattern(
if node is None:
return data, True, end
- if not isString(node):
+ if not isinstance(node, str):
if not isinstance(node.text, util.AtomicString):
# We need to process current node too
for child in [node] + list(node):
diff --git a/tests/test_apis.py b/tests/test_apis.py
index d613a822f..55e2cdb66 100644
--- a/tests/test_apis.py
+++ b/tests/test_apis.py
@@ -30,6 +30,7 @@
import markdown
import warnings
from markdown.__main__ import parse_options
+from markdown import inlinepatterns
from logging import DEBUG, WARNING, CRITICAL
import yaml
import tempfile
@@ -664,8 +665,8 @@ class testAtomicString(unittest.TestCase):
""" Test that `AtomicStrings` are honored (not parsed). """
def setUp(self):
- md = markdown.Markdown()
- self.inlineprocessor = md.treeprocessors['inline']
+ self.md = markdown.Markdown()
+ self.inlineprocessor = self.md.treeprocessors['inline']
def testString(self):
""" Test that a regular string is parsed. """
@@ -710,6 +711,26 @@ def testNestedAtomicString(self):
'*to* *test* *with*
'
)
+ def testInlineProcessorDoesntCrashWithWrongAtomicString(self):
+ """ Test that an `AtomicString` returned from a Pattern doesn't cause a crash. """
+ tree = etree.Element('div')
+ p = etree.SubElement(tree, 'p')
+ p.text = 'a marker c'
+ self.md.inlinePatterns.register(
+ _InlineProcessorThatReturnsAtomicString(r'marker', self.md), 'test', 100
+ )
+ new = self.inlineprocessor.run(tree)
+ self.assertEqual(
+ markdown.serializers.to_html_string(new),
+ ''
+ )
+
+
+class _InlineProcessorThatReturnsAtomicString(inlinepatterns.InlineProcessor):
+ """ Return a simple text of `group(1)` of a Pattern. """
+ def handleMatch(self, m, data):
+ return markdown.util.AtomicString('atomic'), m.start(0), m.end(0)
+
class TestConfigParsing(unittest.TestCase):
def assertParses(self, value, result):