From 03bdec61bbba86b1fa1b98cb890c034bbfcd44c3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Timoth=C3=A9e=20Mazzucotelli?= Date: Fri, 9 Aug 2024 16:10:48 +0200 Subject: [PATCH] refactor: Finish preparing docstring style auto-detection feature Issue-5: https://github.com/mkdocstrings/griffe/issues/5 --- docs/insiders/changelog.md | 8 ++ docs/insiders/goals.yml | 11 ++- docs/reference/api/docstrings/parsers.md | 6 ++ docs/reference/docstrings.md | 26 ++++++ mkdocs.yml | 1 + src/_griffe/docstrings/parsers.py | 100 ++++++++++++++++++++++- src/_griffe/enumerations.py | 6 ++ src/griffe/__init__.py | 6 ++ 8 files changed, 158 insertions(+), 6 deletions(-) diff --git a/docs/insiders/changelog.md b/docs/insiders/changelog.md index 2d172868..6b5d40b8 100644 --- a/docs/insiders/changelog.md +++ b/docs/insiders/changelog.md @@ -2,14 +2,22 @@ ## Griffe Insiders +[](){#insiders-1.3.0} +### 1.3.0 August 09, 2024 { id="1.3.0" } + +- [Automatic docstring style detection](../reference/docstrings.md#auto-style) + +[](){#insiders-1.2.0} ### 1.2.0 March 11, 2024 { id="1.2.0" } - [Expressions modernization](../guide/users/navigating.md#modernization) +[](){#insiders-1.1.0} ### 1.1.0 March 02, 2024 { id="1.1.0" } - Check API of Python packages by [downloading them from PyPI](../guide/users/checking.md#using-pypi) +[](){#insiders-1.0.0} ### 1.0.0 January 16, 2024 { id="1.0.0" } - Add [Markdown][markdown] and [GitHub][github] output formats to the check command diff --git a/docs/insiders/goals.yml b/docs/insiders/goals.yml index 02090d82..7b2dfc70 100644 --- a/docs/insiders/goals.yml +++ b/docs/insiders/goals.yml @@ -6,20 +6,23 @@ goals: name: GraviFridge Fluid Renewal features: - name: "Markdown output format for the `griffe check` command" - ref: /checking/#markdown + ref: /guide/users/checking/#markdown since: 2024/01/16 - name: "GitHub output format for the `griffe check` command" - ref: /checking/#github + ref: /guide/users/checking/#github since: 2024/01/16 1500: name: HyperLamp Navigation Tips features: - name: "Check API of Python packages from PyPI" - ref: /checking/#using-pypi + ref: /guide/users/checking/#using-pypi since: 2024/03/02 - name: "Expressions modernization" - ref: /expressions/#modernization + ref: /guide/users/navigating/#modernization since: 2024/03/11 + - name: "Automatic detection of docstring style" + ref: /reference/docstrings/#auto-style + since: 2024/08/09 2000: name: FusionDrive Ejection Configuration features: [] diff --git a/docs/reference/api/docstrings/parsers.md b/docs/reference/api/docstrings/parsers.md index 55c8b549..10166ad2 100644 --- a/docs/reference/api/docstrings/parsers.md +++ b/docs/reference/api/docstrings/parsers.md @@ -4,6 +4,8 @@ ::: griffe.parse +::: griffe.parse_auto + ::: griffe.parse_google ::: griffe.parse_numpy @@ -23,3 +25,7 @@ ::: griffe.docstring_warning ::: griffe.DocstringWarningCallable + +::: griffe.DocstringDetectionMethod + +::: griffe.infer_docstring_style \ No newline at end of file diff --git a/docs/reference/docstrings.md b/docs/reference/docstrings.md index cb1c835a..cac12a4d 100644 --- a/docs/reference/docstrings.md +++ b/docs/reference/docstrings.md @@ -8,6 +8,7 @@ The available parsers are: - `google`, to parse Google-style docstrings, see [Napoleon's documentation][napoleon] - `numpy`, to parse Numpydoc docstrings, see [Numpydoc's documentation][numpydoc] - `sphinx`, to parse Sphinx-style docstrings, see [Sphinx's documentation][sphinx] +- `auto` (sponsors only), to automatically detect the docstring style, see [Auto-style](#auto-style) Most of the time, the syntax specified in the aforementioned docs is supported. In some cases, the original syntax is not supported, or is supported but with subtle differences. @@ -1504,6 +1505,31 @@ precision : Decimal TIP: **Types in docstrings are resolved using the docstrings' function scope.** See previous tips for types in docstrings. +## Auto-style + +[:octicons-heart-fill-24:{ .pulse } Sponsors only](../insiders/index.md){ .insiders } — +[:octicons-tag-24: Insiders 1.3.0](../insiders/changelog.md#1.3.0). + +Automatic style detection. This parser will first try to detect the style used in the docstring, and call the corresponding parser on it. + +### Parser options {#auto-options} + +The parser accepts a few options: + +- `method`: The method to use to detect the style and infer the parser. + Method 'heuristics' will use regular expressions, while method 'max_sections' will parse the docstring + with all parsers specified in `style_order` and return the one who parsed the most sections. + Default: `"heuristics"`. +- `style_order`: If multiple parsers parsed the same number of sections, + `style_order` is used to decide which one to return. Default: `["sphinx", "google", "numpy"]`. +- `default`: If heuristics fail, the `default` parser is returned. + The `default` parser is never used with the 'max_sections' method. Default: `None`. +- Any other option is passed down to the detected parser, if any. + +For non-Insiders versions, `default` is returned if specified, else the first +parser in `style_order` is returned. If `style_order` is not specified, +`None` is returned. + ## Parsers features !!! tip "Want to contribute?" diff --git a/mkdocs.yml b/mkdocs.yml index 31c4707f..c747ab3d 100644 --- a/mkdocs.yml +++ b/mkdocs.yml @@ -200,6 +200,7 @@ plugins: options: docstring_options: ignore_init_summary: true + docstring_style: google docstring_section_style: list extensions: - griffe_inherited_docstrings diff --git a/src/_griffe/docstrings/parsers.py b/src/_griffe/docstrings/parsers.py index 7df3a5a4..9114b018 100644 --- a/src/_griffe/docstrings/parsers.py +++ b/src/_griffe/docstrings/parsers.py @@ -3,7 +3,7 @@ from __future__ import annotations -from typing import TYPE_CHECKING, Any, Literal +from typing import TYPE_CHECKING, Any, Callable, Literal from _griffe.docstrings.google import parse_google from _griffe.docstrings.models import DocstringSection, DocstringSectionText @@ -14,8 +14,104 @@ if TYPE_CHECKING: from _griffe.models import Docstring -DocstringStyle = Literal["google", "numpy", "sphinx"] + +# This is not our preferred order, but the safest order for proper detection +# using heuristics. Indeed, Google style sections sometimes appear in otherwise +# plain markup docstrings, which could lead to false positives. Same for Numpy +# sections, whose syntax is regular rST markup, and which can therefore appear +# in plain markup docstrings too, even more often than Google sections. +_default_style_order = [Parser.sphinx, Parser.google, Parser.numpy] + + +DocstringStyle = Literal["google", "numpy", "sphinx", "auto"] """The supported docstring styles (literal values of the Parser enumeration).""" +DocstringDetectionMethod = Literal["heuristics", "max_sections"] +"""The supported methods to infer docstring styles.""" + + +def infer_docstring_style( + docstring: Docstring, # noqa: ARG001 + *, + method: DocstringDetectionMethod = "heuristics", # noqa: ARG001 + style_order: list[Parser] | list[DocstringStyle] | None = None, + default: Parser | DocstringStyle | None = None, + **options: Any, # noqa: ARG001 +) -> tuple[Parser | None, list[DocstringSection] | None]: + """Infer the parser to use for the docstring. + + [:octicons-heart-fill-24:{ .pulse } Sponsors only](../../../insiders/index.md){ .insiders } — + [:octicons-tag-24: Insiders 1.3.0](../../../insiders/changelog.md#1.3.0). + + The 'heuristics' method uses regular expressions. The 'max_sections' method + parses the docstring with all parsers specified in `style_order` and returns + the one who parsed the most sections. + + If heuristics fail, the `default` parser is returned. If multiple parsers + parsed the same number of sections, `style_order` is used to decide which + one to return. The `default` parser is never used with the 'max_sections' method. + + For non-Insiders versions, `default` is returned if specified, else the first + parser in `style_order` is returned. If `style_order` is not specified, + `None` is returned. + + Additional options are parsed to the detected parser, if any. + + Parameters: + docstring: The docstring to parse. + method: The method to use to infer the parser. + style_order: The order of the styles to try when inferring the parser. + default: The default parser to use if the inference fails. + **options: Additional parsing options. + + Returns: + The inferred parser, and optionally parsed sections (when method is 'max_sections'). + """ + if default: + return default if isinstance(default, Parser) else Parser(default), None + if style_order: + style = style_order[0] + return style if isinstance(style, Parser) else Parser(style), None + return None, None + + +def parse_auto( + docstring: Docstring, + *, + method: DocstringDetectionMethod = "heuristics", + style_order: list[Parser] | list[DocstringStyle] | None = None, + default: Parser | DocstringStyle | None = None, + **options: Any, +) -> list[DocstringSection]: + """Parse a docstring by automatically detecting the style it uses. + + [:octicons-heart-fill-24:{ .pulse } Sponsors only](../../../insiders/index.md){ .insiders } — + [:octicons-tag-24: Insiders 1.3.0](../../../insiders/changelog.md#1.3.0). + + See [`infer_docstring_style`][griffe.infer_docstring_style] for more information + on the available parameters. + + Parameters: + docstring: The docstring to parse. + method: The method to use to infer the parser. + style_order: The order of the styles to try when inferring the parser. + default: The default parser to use if the inference fails. + **options: Additional parsing options. + + Returns: + A list of docstring sections. + """ + style, sections = infer_docstring_style( + docstring, + method=method, + style_order=style_order, + default=default, + **options, + ) + if sections is None: + return parse(docstring, style, **options) + return sections + + parsers: dict[Parser, Callable[[Docstring], list[DocstringSection]]] = { Parser.auto: parse_auto, Parser.google: parse_google, diff --git a/src/_griffe/enumerations.py b/src/_griffe/enumerations.py index 291c1f5c..1d5ce16e 100644 --- a/src/_griffe/enumerations.py +++ b/src/_griffe/enumerations.py @@ -134,6 +134,12 @@ class BreakageKind(str, Enum): class Parser(str, Enum): """Enumeration of the different docstring parsers.""" + auto = "auto" + """Infer docstring parser. + + [:octicons-heart-fill-24:{ .pulse } Sponsors only](../../../insiders/index.md){ .insiders } — + [:octicons-tag-24: Insiders 1.3.0](../../../insiders/changelog.md#1.3.0). + """ google = "google" """Google-style docstrings parser.""" sphinx = "sphinx" diff --git a/src/griffe/__init__.py b/src/griffe/__init__.py index 71f314fe..f9cf2b1e 100644 --- a/src/griffe/__init__.py +++ b/src/griffe/__init__.py @@ -98,8 +98,11 @@ ) from _griffe.docstrings.numpy import parse_numpy from _griffe.docstrings.parsers import ( + DocstringDetectionMethod, DocstringStyle, + infer_docstring_style, parse, + parse_auto, parsers, ) from _griffe.docstrings.sphinx import parse_sphinx @@ -257,6 +260,7 @@ "DocstringAttribute", "DocstringClass", "DocstringDeprecated", + "DocstringDetectionMethod", "DocstringElement", "DocstringFunction", "DocstringModule", @@ -418,6 +422,7 @@ "get_repo_root", "get_value", "htree", + "infer_docstring_style", "inspect", "json_decoder", "load", @@ -429,6 +434,7 @@ "merge_stubs", "module_vtree", "parse", + "parse_auto", "parse_docstring_annotation", "parse_google", "parse_numpy",