Skip to content

Commit

Permalink
feat: Separate URLs in two groups, primary and secondary
Browse files Browse the repository at this point in the history
This will be useful in mkdocstrings, where we want to register URLs for all aliases of a rendered object's identifier early in the process, so that we can drop the fallback mechanism in autorefs.

Primary URLs will take precedence when resolving cross-references, to avoid logging warnings about multiple URLs found.

For example:

- Object `a.b.c.d` has aliases `a.b.d` and `a.d`
- Object `a.b.c.d` is rendered.
- We register `a.b.c.d` -> page#a.b.c.d as primary
- We register `a.b.d` -> page#a.b.c.d as secondary
- We register `a.d` -> page#a.b.c.d as secondary
- Later, if `a.b.d` or `a.d` are rendered, we will register primary and secondary URLs the same way
- This way we are sure that each of `a.b.c.d`, `a.b.d` or `a.d` will link to their primary URL, if any, or their secondary URL, accordingly

Related-to-issue-61: #61
  • Loading branch information
pawamoy committed Jan 10, 2025
1 parent 88f1dc9 commit 559c723
Show file tree
Hide file tree
Showing 4 changed files with 70 additions and 23 deletions.
70 changes: 55 additions & 15 deletions src/mkdocs_autorefs/plugin.py
Original file line number Diff line number Diff line change
Expand Up @@ -81,23 +81,54 @@ class AutorefsPlugin(BasePlugin[AutorefsConfig]):
def __init__(self) -> None:
"""Initialize the object."""
super().__init__()
self._url_map: dict[str, list[str]] = {}

# The plugin uses three URL maps, one for "primary" URLs, one for "secondary" URLs,
# and one for "absolute" URLs.
#
# - A primary URL is an identifier that links to a specific anchor on a page.
# - A secondary URL is an alias of an identifier that links to the same anchor as the identifier's primary URL.
# Primary URLs with these aliases as identifiers may or may not be rendered later.
# - An absolute URL is an identifier that links to an external resource.
# These URLs are typically registered by mkdocstrings when loading object inventories.
#
# For example, mkdocstrings registers a primary URL for each heading rendered in a page.
# Then, for each alias of this heading's identifier, it registers a secondary URL.
#
# We need to keep track of whether an identifier is primary or secondary,
# to give it precedence when resolving cross-references.
# We wouldn't want to log a warning if there is a single primary URL and one or more secondary URLs,
# instead we want to use the primary URL without any warning.
#
# - A single primary URL mapped to an identifer? Use it.
# - Multiple primary URLs mapped to an identifier? Use the first one, or closest one if configured as such.
# - No primary URL mapped to an identifier, but a secondary URL mapped? Use it.
# - Multiple secondary URLs mapped to an identifier? Use the first one, or closest one if configured as such.
# - No secondary URL mapped to an identifier? Try using absolute URLs
# (typically registered by loading inventories in mkdocstrings).
#
# This logic unfolds in `_get_item_url`.
self._primary_url_map: dict[str, list[str]] = {}
self._secondary_url_map: dict[str, list[str]] = {}
self._abs_url_map: dict[str, str] = {}

self.get_fallback_anchor: Callable[[str], tuple[str, ...]] | None = None

def register_anchor(self, page: str, identifier: str, anchor: str | None = None) -> None:
def register_anchor(self, page: str, identifier: str, anchor: str | None = None, *, primary: bool = True) -> None:
"""Register that an anchor corresponding to an identifier was encountered when rendering the page.
Arguments:
page: The relative URL of the current page. Examples: `'foo/bar/'`, `'foo/index.html'`
identifier: The HTML anchor (without '#') as a string.
identifier: The identifier to register.
anchor: The anchor on the page, without `#`. If not provided, defaults to the identifier.
primary: Whether this anchor is the primary one for the identifier.
"""
page_anchor = f"{page}#{anchor or identifier}"
if identifier in self._url_map:
if page_anchor not in self._url_map[identifier]:
self._url_map[identifier].append(page_anchor)
url_map = self._primary_url_map if primary else self._secondary_url_map
if identifier in url_map:
if page_anchor not in url_map[identifier]:
url_map[identifier].append(page_anchor)
else:
self._url_map[identifier] = [page_anchor]
url_map[identifier] = [page_anchor]

def register_url(self, identifier: str, url: str) -> None:
"""Register that the identifier should be turned into a link to this URL.
Expand All @@ -109,12 +140,13 @@ def register_url(self, identifier: str, url: str) -> None:
self._abs_url_map[identifier] = url

@staticmethod
def _get_closest_url(from_url: str, urls: list[str]) -> str:
def _get_closest_url(from_url: str, urls: list[str], qualifier: str) -> str:
"""Return the closest URL to the current page.
Arguments:
from_url: The URL of the base page, from which we link towards the targeted pages.
urls: A list of URLs to choose from.
qualifier: The type of URLs we are choosing from.
Returns:
The closest URL to the current page.
Expand All @@ -130,8 +162,9 @@ def _get_closest_url(from_url: str, urls: list[str]) -> str:

if not candidates:
log.warning(
"Could not find closest URL (from %s, candidates: %s). "
"Could not find closest %s URL (from %s, candidates: %s). "
"Make sure to use unique headings, identifiers, or Markdown anchors (see our docs).",
qualifier,
from_url,
urls,
)
Expand All @@ -141,14 +174,20 @@ def _get_closest_url(from_url: str, urls: list[str]) -> str:
log.debug("Closest URL found: %s (from %s, candidates: %s)", winner, from_url, urls)
return winner

def _get_urls(self, identifier: str) -> tuple[list[str], str]:
try:
return self._primary_url_map[identifier], "primary"
except KeyError:
return self._secondary_url_map[identifier], "secondary"

def _get_item_url(
self,
identifier: str,
fallback: Callable[[str], Sequence[str]] | None = None,
from_url: str | None = None,
) -> str:
try:
urls = self._url_map[identifier]
urls, qualifier = self._get_urls(identifier)
except KeyError:
if identifier in self._abs_url_map:
return self._abs_url_map[identifier]
Expand All @@ -157,16 +196,17 @@ def _get_item_url(
for new_identifier in new_identifiers:
with contextlib.suppress(KeyError):
url = self._get_item_url(new_identifier)
self._url_map[identifier] = [url]
self._secondary_url_map[identifier] = [url]
return url
raise

if len(urls) > 1:
if self.config.resolve_closest and from_url is not None:
return self._get_closest_url(from_url, urls)
return self._get_closest_url(from_url, urls, qualifier)
log.warning(
"Multiple URLs found for '%s': %s. "
"Multiple %s URLs found for '%s': %s. "
"Make sure to use unique headings, identifiers, or Markdown anchors (see our docs).",
qualifier,
identifier,
urls,
)
Expand Down Expand Up @@ -252,13 +292,13 @@ def on_page_content(self, html: str, page: Page, **kwargs: Any) -> str: # noqa:
def map_urls(self, base_url: str, anchor: AnchorLink) -> None:
"""Recurse on every anchor to map its ID to its absolute URL.
This method populates `self.url_map` by side-effect.
This method populates `self._primary_url_map` by side-effect.
Arguments:
base_url: The base URL to use as a prefix for each anchor's relative URL.
anchor: The anchor to process and to recurse on.
"""
self.register_anchor(base_url, anchor.id)
self.register_anchor(base_url, anchor.id, primary=True)
for child in anchor.children:
self.map_urls(base_url, child)

Expand Down
2 changes: 1 addition & 1 deletion src/mkdocs_autorefs/references.py
Original file line number Diff line number Diff line change
Expand Up @@ -506,7 +506,7 @@ def append(self, anchor: str) -> None:

def flush(self, alias_to: str | None = None) -> None:
for anchor in self.anchors:
self.plugin.register_anchor(self.current_page, anchor, alias_to)
self.plugin.register_anchor(self.current_page, anchor, alias_to, primary=True)
self.anchors.clear()


Expand Down
17 changes: 12 additions & 5 deletions tests/test_plugin.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
def test_url_registration() -> None:
"""Check that URLs can be registered, then obtained."""
plugin = AutorefsPlugin()
plugin.register_anchor(identifier="foo", page="foo1.html")
plugin.register_anchor(identifier="foo", page="foo1.html", primary=True)
plugin.register_url(identifier="bar", url="https://example.org/bar.html")

assert plugin.get_item_url("foo") == "foo1.html#foo"
Expand All @@ -22,7 +22,7 @@ def test_url_registration() -> None:
def test_url_registration_with_from_url() -> None:
"""Check that URLs can be registered, then obtained, relative to a page."""
plugin = AutorefsPlugin()
plugin.register_anchor(identifier="foo", page="foo1.html")
plugin.register_anchor(identifier="foo", page="foo1.html", primary=True)
plugin.register_url(identifier="bar", url="https://example.org/bar.html")

assert plugin.get_item_url("foo", from_url="a/b.html") == "../foo1.html#foo"
Expand All @@ -34,7 +34,7 @@ def test_url_registration_with_from_url() -> None:
def test_url_registration_with_fallback() -> None:
"""Check that URLs can be registered, then obtained through a fallback."""
plugin = AutorefsPlugin()
plugin.register_anchor(identifier="foo", page="foo1.html")
plugin.register_anchor(identifier="foo", page="foo1.html", primary=True)
plugin.register_url(identifier="bar", url="https://example.org/bar.html")

# URL map will be updated with baz -> foo1.html#foo
Expand All @@ -53,7 +53,7 @@ def test_url_registration_with_fallback() -> None:
def test_dont_make_relative_urls_relative_again() -> None:
"""Check that URLs are not made relative more than once."""
plugin = AutorefsPlugin()
plugin.register_anchor(identifier="foo.bar.baz", page="foo/bar/baz.html")
plugin.register_anchor(identifier="foo.bar.baz", page="foo/bar/baz.html", primary=True)

for _ in range(2):
assert (
Expand Down Expand Up @@ -83,4 +83,11 @@ def test_dont_make_relative_urls_relative_again() -> None:
)
def test_find_closest_url(base: str, urls: list[str], expected: str) -> None:
"""Find closest URLs given a list of URLs."""
assert AutorefsPlugin._get_closest_url(base, urls) == expected
assert AutorefsPlugin._get_closest_url(base, urls, "test") == expected


def test_register_secondary_url() -> None:
"""Test registering secondary URLs."""
plugin = AutorefsPlugin()
plugin.register_anchor(identifier="foo", page="foo.html", primary=False)
assert plugin._secondary_url_map == {"foo": ["foo.html#foo"]}
4 changes: 2 additions & 2 deletions tests/test_references.py
Original file line number Diff line number Diff line change
Expand Up @@ -342,7 +342,7 @@ def test_register_markdown_anchors() -> None:
""",
),
)
assert plugin._url_map == {
assert plugin._primary_url_map == {
"foo": ["page#heading-foo"],
"bar": ["page#bar"],
"alias1": ["page#heading-bar"],
Expand Down Expand Up @@ -379,7 +379,7 @@ def test_register_markdown_anchors_with_admonition() -> None:
""",
),
)
assert plugin._url_map == {
assert plugin._primary_url_map == {
"alias1": ["page#alias1"],
"alias2": ["page#heading-bar"],
"alias3": ["page#alias3"],
Expand Down

0 comments on commit 559c723

Please sign in to comment.