feat: Separate URLs in two groups, primary and secondary

This will be useful in mkdocstrings, where we want to register URLs for all aliases of a rendered object's identifier early in the process, so that we can drop the fallback mechanism in autorefs. Primary URLs will take precedence when resolving cross-references, to avoid logging warnings about multiple URLs found. For example: - Object `a.b.c.d` has aliases `a.b.d` and `a.d` - Object `a.b.c.d` is rendered. - We register `a.b.c.d` -> page#a.b.c.d as primary - We register `a.b.d` -> page#a.b.c.d as secondary - We register `a.d` -> page#a.b.c.d as secondary - Later, if `a.b.d` or `a.d` are rendered, we will register primary and secondary URLs the same way - This way we are sure that each of `a.b.c.d`, `a.b.d` or `a.d` will link to their primary URL, if any, or their secondary URL, accordingly Related-to-issue-61: #61
mkdocstrings · Jan 10, 2025 · 559c723 · 559c723
1 parent 88f1dc9
commit 559c723
Show file tree

Hide file tree

Showing 4 changed files with 70 additions and 23 deletions.
diff --git a/src/mkdocs_autorefs/plugin.py b/src/mkdocs_autorefs/plugin.py
@@ -81,23 +81,54 @@ class AutorefsPlugin(BasePlugin[AutorefsConfig]):
     def __init__(self) -> None:
         """Initialize the object."""
         super().__init__()
-        self._url_map: dict[str, list[str]] = {}
+
+        # The plugin uses three URL maps, one for "primary" URLs, one for "secondary" URLs,
+        # and one for "absolute" URLs.
+        #
+        # - A primary URL is an identifier that links to a specific anchor on a page.
+        # - A secondary URL is an alias of an identifier that links to the same anchor as the identifier's primary URL.
+        #   Primary URLs with these aliases as identifiers may or may not be rendered later.
+        # - An absolute URL is an identifier that links to an external resource.
+        #   These URLs are typically registered by mkdocstrings when loading object inventories.
+        #
+        # For example, mkdocstrings registers a primary URL for each heading rendered in a page.
+        # Then, for each alias of this heading's identifier, it registers a secondary URL.
+        #
+        # We need to keep track of whether an identifier is primary or secondary,
+        # to give it precedence when resolving cross-references.
+        # We wouldn't want to log a warning if there is a single primary URL and one or more secondary URLs,
+        # instead we want to use the primary URL without any warning.
+        #
+        # - A single primary URL mapped to an identifer? Use it.
+        # - Multiple primary URLs mapped to an identifier? Use the first one, or closest one if configured as such.
+        # - No primary URL mapped to an identifier, but a secondary URL mapped? Use it.
+        # - Multiple secondary URLs mapped to an identifier? Use the first one, or closest one if configured as such.
+        # - No secondary URL mapped to an identifier? Try using absolute URLs
+        #   (typically registered by loading inventories in mkdocstrings).
+        #
+        # This logic unfolds in `_get_item_url`.
+        self._primary_url_map: dict[str, list[str]] = {}
+        self._secondary_url_map: dict[str, list[str]] = {}
         self._abs_url_map: dict[str, str] = {}
+
         self.get_fallback_anchor: Callable[[str], tuple[str, ...]] | None = None
 
-    def register_anchor(self, page: str, identifier: str, anchor: str | None = None) -> None:
+    def register_anchor(self, page: str, identifier: str, anchor: str | None = None, *, primary: bool = True) -> None:
         """Register that an anchor corresponding to an identifier was encountered when rendering the page.
 
         Arguments:
             page: The relative URL of the current page. Examples: `'foo/bar/'`, `'foo/index.html'`
-            identifier: The HTML anchor (without '#') as a string.
+            identifier: The identifier to register.
+            anchor: The anchor on the page, without `#`. If not provided, defaults to the identifier.
+            primary: Whether this anchor is the primary one for the identifier.
         """
         page_anchor = f"{page}#{anchor or identifier}"
-        if identifier in self._url_map:
-            if page_anchor not in self._url_map[identifier]:
-                self._url_map[identifier].append(page_anchor)
+        url_map = self._primary_url_map if primary else self._secondary_url_map
+        if identifier in url_map:
+            if page_anchor not in url_map[identifier]:
+                url_map[identifier].append(page_anchor)
         else:
-            self._url_map[identifier] = [page_anchor]
+            url_map[identifier] = [page_anchor]
 
     def register_url(self, identifier: str, url: str) -> None:
         """Register that the identifier should be turned into a link to this URL.
@@ -109,12 +140,13 @@ def register_url(self, identifier: str, url: str) -> None:
         self._abs_url_map[identifier] = url
 
     @staticmethod
-    def _get_closest_url(from_url: str, urls: list[str]) -> str:
+    def _get_closest_url(from_url: str, urls: list[str], qualifier: str) -> str:
         """Return the closest URL to the current page.
 
         Arguments:
             from_url: The URL of the base page, from which we link towards the targeted pages.
             urls: A list of URLs to choose from.
+            qualifier: The type of URLs we are choosing from.
 
         Returns:
             The closest URL to the current page.
@@ -130,8 +162,9 @@ def _get_closest_url(from_url: str, urls: list[str]) -> str:
 
         if not candidates:
             log.warning(
-                "Could not find closest URL (from %s, candidates: %s). "
+                "Could not find closest %s URL (from %s, candidates: %s). "
                 "Make sure to use unique headings, identifiers, or Markdown anchors (see our docs).",
+                qualifier,
                 from_url,
                 urls,
             )
@@ -141,14 +174,20 @@ def _get_closest_url(from_url: str, urls: list[str]) -> str:
         log.debug("Closest URL found: %s (from %s, candidates: %s)", winner, from_url, urls)
         return winner
 
+    def _get_urls(self, identifier: str) -> tuple[list[str], str]:
+        try:
+            return self._primary_url_map[identifier], "primary"
+        except KeyError:
+            return self._secondary_url_map[identifier], "secondary"
+
     def _get_item_url(
         self,
         identifier: str,
         fallback: Callable[[str], Sequence[str]] | None = None,
         from_url: str | None = None,
     ) -> str:
         try:
-            urls = self._url_map[identifier]
+            urls, qualifier = self._get_urls(identifier)
         except KeyError:
             if identifier in self._abs_url_map:
                 return self._abs_url_map[identifier]
@@ -157,16 +196,17 @@ def _get_item_url(
                 for new_identifier in new_identifiers:
                     with contextlib.suppress(KeyError):
                         url = self._get_item_url(new_identifier)
-                        self._url_map[identifier] = [url]
+                        self._secondary_url_map[identifier] = [url]
                         return url
             raise
 
         if len(urls) > 1:
             if self.config.resolve_closest and from_url is not None:
-                return self._get_closest_url(from_url, urls)
+                return self._get_closest_url(from_url, urls, qualifier)
             log.warning(
-                "Multiple URLs found for '%s': %s. "
+                "Multiple %s URLs found for '%s': %s. "
                 "Make sure to use unique headings, identifiers, or Markdown anchors (see our docs).",
+                qualifier,
                 identifier,
                 urls,
             )
@@ -252,13 +292,13 @@ def on_page_content(self, html: str, page: Page, **kwargs: Any) -> str:  # noqa:
     def map_urls(self, base_url: str, anchor: AnchorLink) -> None:
         """Recurse on every anchor to map its ID to its absolute URL.
 
-        This method populates `self.url_map` by side-effect.
+        This method populates `self._primary_url_map` by side-effect.
 
         Arguments:
             base_url: The base URL to use as a prefix for each anchor's relative URL.
             anchor: The anchor to process and to recurse on.
         """
-        self.register_anchor(base_url, anchor.id)
+        self.register_anchor(base_url, anchor.id, primary=True)
         for child in anchor.children:
             self.map_urls(base_url, child)
 

diff --git a/src/mkdocs_autorefs/references.py b/src/mkdocs_autorefs/references.py
@@ -506,7 +506,7 @@ def append(self, anchor: str) -> None:
 
     def flush(self, alias_to: str | None = None) -> None:
         for anchor in self.anchors:
-            self.plugin.register_anchor(self.current_page, anchor, alias_to)
+            self.plugin.register_anchor(self.current_page, anchor, alias_to, primary=True)
         self.anchors.clear()
 
 

diff --git a/tests/test_plugin.py b/tests/test_plugin.py
@@ -10,7 +10,7 @@
 def test_url_registration() -> None:
     """Check that URLs can be registered, then obtained."""
     plugin = AutorefsPlugin()
-    plugin.register_anchor(identifier="foo", page="foo1.html")
+    plugin.register_anchor(identifier="foo", page="foo1.html", primary=True)
     plugin.register_url(identifier="bar", url="https://example.org/bar.html")
 
     assert plugin.get_item_url("foo") == "foo1.html#foo"
@@ -22,7 +22,7 @@ def test_url_registration() -> None:
 def test_url_registration_with_from_url() -> None:
     """Check that URLs can be registered, then obtained, relative to a page."""
     plugin = AutorefsPlugin()
-    plugin.register_anchor(identifier="foo", page="foo1.html")
+    plugin.register_anchor(identifier="foo", page="foo1.html", primary=True)
     plugin.register_url(identifier="bar", url="https://example.org/bar.html")
 
     assert plugin.get_item_url("foo", from_url="a/b.html") == "../foo1.html#foo"
@@ -34,7 +34,7 @@ def test_url_registration_with_from_url() -> None:
 def test_url_registration_with_fallback() -> None:
     """Check that URLs can be registered, then obtained through a fallback."""
     plugin = AutorefsPlugin()
-    plugin.register_anchor(identifier="foo", page="foo1.html")
+    plugin.register_anchor(identifier="foo", page="foo1.html", primary=True)
     plugin.register_url(identifier="bar", url="https://example.org/bar.html")
 
     # URL map will be updated with baz -> foo1.html#foo
@@ -53,7 +53,7 @@ def test_url_registration_with_fallback() -> None:
 def test_dont_make_relative_urls_relative_again() -> None:
     """Check that URLs are not made relative more than once."""
     plugin = AutorefsPlugin()
-    plugin.register_anchor(identifier="foo.bar.baz", page="foo/bar/baz.html")
+    plugin.register_anchor(identifier="foo.bar.baz", page="foo/bar/baz.html", primary=True)
 
     for _ in range(2):
         assert (
@@ -83,4 +83,11 @@ def test_dont_make_relative_urls_relative_again() -> None:
 )
 def test_find_closest_url(base: str, urls: list[str], expected: str) -> None:
     """Find closest URLs given a list of URLs."""
-    assert AutorefsPlugin._get_closest_url(base, urls) == expected
+    assert AutorefsPlugin._get_closest_url(base, urls, "test") == expected
+
+
+def test_register_secondary_url() -> None:
+    """Test registering secondary URLs."""
+    plugin = AutorefsPlugin()
+    plugin.register_anchor(identifier="foo", page="foo.html", primary=False)
+    assert plugin._secondary_url_map == {"foo": ["foo.html#foo"]}
diff --git a/tests/test_references.py b/tests/test_references.py
@@ -342,7 +342,7 @@ def test_register_markdown_anchors() -> None:
             """,
         ),
     )
-    assert plugin._url_map == {
+    assert plugin._primary_url_map == {
         "foo": ["page#heading-foo"],
         "bar": ["page#bar"],
         "alias1": ["page#heading-bar"],
@@ -379,7 +379,7 @@ def test_register_markdown_anchors_with_admonition() -> None:
             """,
         ),
     )
-    assert plugin._url_map == {
+    assert plugin._primary_url_map == {
         "alias1": ["page#alias1"],
         "alias2": ["page#heading-bar"],
         "alias3": ["page#alias3"],