-
-
Notifications
You must be signed in to change notification settings - Fork 10
/
Copy pathreferences.py
214 lines (164 loc) · 7.6 KB
/
references.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
"""Cross-references module."""
from __future__ import annotations
import re
from html import escape, unescape
from typing import TYPE_CHECKING, Any, Callable, Match, Tuple
from urllib.parse import urlsplit
from xml.etree.ElementTree import Element
from markdown.extensions import Extension
from markdown.inlinepatterns import REFERENCE_RE, ReferenceInlineProcessor
from markdown.util import INLINE_PLACEHOLDER_RE
if TYPE_CHECKING:
from markdown import Markdown
AUTO_REF_RE = re.compile(
r"<span data-(?P<kind>autorefs-identifier|autorefs-optional|autorefs-optional-hover)="
r'("?)(?P<identifier>[^"<>]*)\2>(?P<title>.*?)</span>',
)
"""A regular expression to match mkdocs-autorefs' special reference markers
in the [`on_post_page` hook][mkdocs_autorefs.plugin.AutorefsPlugin.on_post_page].
"""
EvalIDType = Tuple[Any, Any, Any]
class AutoRefInlineProcessor(ReferenceInlineProcessor):
"""A Markdown extension."""
def __init__(self, *args: Any, **kwargs: Any) -> None: # noqa: D107
super().__init__(REFERENCE_RE, *args, **kwargs)
# Code based on
# https://github.com/Python-Markdown/markdown/blob/8e7528fa5c98bf4652deb13206d6e6241d61630b/markdown/inlinepatterns.py#L780
def handleMatch(self, m: Match[str], data: Any) -> Element | EvalIDType: # type: ignore[override] # noqa: N802
"""Handle an element that matched.
Arguments:
m: The match object.
data: The matched data.
Returns:
A new element or a tuple.
"""
text, index, handled = self.getText(data, m.end(0))
if not handled:
return None, None, None
identifier, end, handled = self.evalId(data, index, text)
if not handled:
return None, None, None
if re.search(r"[/ \x00-\x1f]", identifier):
# Do nothing if the matched reference contains:
# - a space, slash or control character (considered unintended);
# - specifically \x01 is used by Python-Markdown HTML stash when there's inline formatting,
# but references with Markdown formatting are not possible anyway.
return None, m.start(0), end
return self.makeTag(identifier, text), m.start(0), end
def evalId(self, data: str, index: int, text: str) -> EvalIDType: # noqa: N802 (parent's casing)
"""Evaluate the id portion of `[ref][id]`.
If `[ref][]` use `[ref]`.
Arguments:
data: The data to evaluate.
index: The starting position.
text: The text to use when no identifier.
Returns:
A tuple containing the identifier, its end position, and whether it matched.
"""
m = self.RE_LINK.match(data, pos=index)
if not m:
return None, index, False
identifier = m.group(1)
if not identifier:
identifier = text
# Allow the entire content to be one placeholder, with the intent of catching things like [`Foo`][].
# It doesn't catch [*Foo*][] though, just due to the priority order.
# https://github.com/Python-Markdown/markdown/blob/1858c1b601ead62ed49646ae0d99298f41b1a271/markdown/inlinepatterns.py#L78
if INLINE_PLACEHOLDER_RE.fullmatch(identifier):
identifier = self.unescape(identifier)
end = m.end(0)
return identifier, end, True
def makeTag(self, identifier: str, text: str) -> Element: # type: ignore[override] # noqa: N802
"""Create a tag that can be matched by `AUTO_REF_RE`.
Arguments:
identifier: The identifier to use in the HTML property.
text: The text to use in the HTML tag.
Returns:
A new element.
"""
el = Element("span")
el.set("data-autorefs-identifier", identifier)
el.text = text
return el
def relative_url(url_a: str, url_b: str) -> str:
"""Compute the relative path from URL A to URL B.
Arguments:
url_a: URL A.
url_b: URL B.
Returns:
The relative URL to go from A to B.
"""
parts_a = url_a.split("/")
url_b, anchor = url_b.split("#", 1)
parts_b = url_b.split("/")
# remove common left parts
while parts_a and parts_b and parts_a[0] == parts_b[0]:
parts_a.pop(0)
parts_b.pop(0)
# go up as many times as remaining a parts' depth
levels = len(parts_a) - 1
parts_relative = [".."] * levels + parts_b
relative = "/".join(parts_relative)
return f"{relative}#{anchor}"
def fix_ref(url_mapper: Callable[[str], str], unmapped: list[str]) -> Callable:
"""Return a `repl` function for [`re.sub`](https://docs.python.org/3/library/re.html#re.sub).
In our context, we match Markdown references and replace them with HTML links.
When the matched reference's identifier was not mapped to an URL, we append the identifier to the outer
`unmapped` list. It generally means the user is trying to cross-reference an object that was not collected
and rendered, making it impossible to link to it. We catch this exception in the caller to issue a warning.
Arguments:
url_mapper: A callable that gets an object's site URL by its identifier,
such as [mkdocs_autorefs.plugin.AutorefsPlugin.get_item_url][].
unmapped: A list to store unmapped identifiers.
Returns:
The actual function accepting a [`Match` object](https://docs.python.org/3/library/re.html#match-objects)
and returning the replacement strings.
"""
def inner(match: Match) -> str:
identifier = match["identifier"]
title = match["title"]
kind = match["kind"]
try:
url = url_mapper(unescape(identifier))
except KeyError:
if kind == "autorefs-optional":
return title
if kind == "autorefs-optional-hover":
return f'<span title="{identifier}">{title}</span>'
unmapped.append(identifier)
if title == identifier:
return f"[{identifier}][]"
return f"[{title}][{identifier}]"
parsed = urlsplit(url)
external = parsed.scheme or parsed.netloc
classes = ["autorefs", "autorefs-external" if external else "autorefs-internal"]
class_attr = " ".join(classes)
if kind == "autorefs-optional-hover":
return f'<a class="{class_attr}" title="{identifier}" href="{escape(url)}">{title}</a>'
return f'<a class="{class_attr}" href="{escape(url)}">{title}</a>'
return inner
def fix_refs(html: str, url_mapper: Callable[[str], str]) -> tuple[str, list[str]]:
"""Fix all references in the given HTML text.
Arguments:
html: The text to fix.
url_mapper: A callable that gets an object's site URL by its identifier,
such as [mkdocs_autorefs.plugin.AutorefsPlugin.get_item_url][].
Returns:
The fixed HTML.
"""
unmapped: list[str] = []
html = AUTO_REF_RE.sub(fix_ref(url_mapper, unmapped), html)
return html, unmapped
class AutorefsExtension(Extension):
"""Extension that inserts auto-references in Markdown."""
def extendMarkdown(self, md: Markdown) -> None: # noqa: N802 (casing: parent method's name)
"""Register the extension.
Add an instance of our [`AutoRefInlineProcessor`][mkdocs_autorefs.references.AutoRefInlineProcessor] to the Markdown parser.
Arguments:
md: A `markdown.Markdown` instance.
"""
md.inlinePatterns.register(
AutoRefInlineProcessor(md),
"mkdocs-autorefs",
priority=168, # Right after markdown.inlinepatterns.ReferenceInlineProcessor
)