diff --git a/Doc/library/importlib.metadata.rst b/Doc/library/importlib.metadata.rst index d2cc769e2c8400..73645a4bb38a4f 100644 --- a/Doc/library/importlib.metadata.rst +++ b/Doc/library/importlib.metadata.rst @@ -206,6 +206,16 @@ all the metadata in a JSON-compatible form per :PEP:`566`:: >>> wheel_metadata.json['requires_python'] '>=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*' +The attributes ``authors`` and ``maintainers`` unify and provide minimal +parsing for the respective core metadata fields ("Author", "Author-email") +and ("Maintainer", "Maintainer-email"):: + + >>> wheel_metadata.authors + [Ident(name='Daniel Holth', email='dholth@fastmail.fm')] + + >>> wheel_metadata.maintainers + [Ident(name='Alex Grönholm', email='alex.gronholm@nextday.fi')] + .. note:: The actual type of the object returned by ``metadata()`` is an @@ -220,6 +230,9 @@ all the metadata in a JSON-compatible form per :PEP:`566`:: .. versionadded:: 3.10 The ``json`` attribute was added. +.. versionadded:: 3.13 + The ``authors`` and ``maintainers`` attributes were added. + .. _version: diff --git a/Lib/importlib/metadata/__init__.py b/Lib/importlib/metadata/__init__.py index 5c09666b6a40d9..a66d41abb57d57 100644 --- a/Lib/importlib/metadata/__init__.py +++ b/Lib/importlib/metadata/__init__.py @@ -20,6 +20,7 @@ from ._functools import method_cache, pass_none from ._itertools import always_iterable, unique_everseen from ._meta import PackageMetadata, SimplePath +from ._adapters import Ident from contextlib import suppress from importlib import import_module @@ -31,6 +32,7 @@ __all__ = [ 'Distribution', 'DistributionFinder', + 'Ident', 'PackageMetadata', 'PackageNotFoundError', 'distribution', diff --git a/Lib/importlib/metadata/_adapters.py b/Lib/importlib/metadata/_adapters.py index 6aed69a30857e4..83bd0b15d453d3 100644 --- a/Lib/importlib/metadata/_adapters.py +++ b/Lib/importlib/metadata/_adapters.py @@ -3,6 +3,7 @@ import re import textwrap import email.message +import dataclasses from ._text import FoldedCase @@ -15,6 +16,232 @@ stacklevel=2, ) +# The formatting of the identity fields ("Author", "Maintainer", "Author-email" +# and "Maintainer-email") in the core metadata specification and related +# 'pyproject.toml' specification is inspired by RFC5233 but not precisely +# defined. In practice conflicting definitions are used by many packages and +# even examples from the specification. For a permissive parser the key +# takeaway from RFC5233 is that special characters such as "," and "@" must be +# quoted when used as text. + + +def _entries_findall(string): + """ + Return a list of entries given an RFC5233-inspired string. Entries are + separated by ", " and contents of quoted strings are ignored. Each + entry will be a non-empty string. + + >>> _entries_findall('a, b , c, "d, e, f"') + ['a', 'b ', ' c', '"d, e, f"'] + + >>> _entries_findall('a') + ['a'] + + >>> _entries_findall('') + [] + + >>> _entries_findall(", ") + [] + """ + + # Split an RFC5233-ish list: + # 1. Require a list separator, or beginning-of-string. + # 2. Alt 1: match single or double quotes and handle escape characters. + # 3. Alt 2: match anything except ',' followed by a space. If quote + # characters are unbalanced, they will be matched here. + # 4. Match the alternatives at least once, in any order... + # 5. ... and capture them. + # Result: + # group 1 (list entry): None or non-empty string. + _entries = re.compile( + r""" + (?: (?<=,\ ) | (?<=^) ) # 1 + ( (?: (["']) (?:(?!\2|\\).|\\.)* \2 # 2 + | (?!,\ ). # 3 + )+ # 4 + ) # 5 + """, + re.VERBOSE, + ) + + return [entry[0] for entry in _entries.findall(string)] + + +def _name_email_split(string): + """ + Split an RFC5233-inspired entry into a name and email address tuple. Each + component will be either None or a non-empty string. Split the form "name + local@domain" on the first unquoted "@" such that: + + * local may not be empty and may not contain any unquoted spaces + * domain may not be empty + * spaces between name and address are consumed + * space between name and address is optional if name ends in "@" + * first opening "<" of local is consumed only if local remains non-empty + * last closing ">" of domain is consumed only if domain remains non-empty + + >>> _name_email_split("name local@domain") + ('name', 'local@domain') + + >>> _name_email_split('@"unlocal@undomain" @ >> _name_email_split('@@ local@domain') + ('@@', 'local@domain') + + >>> _name_email_split('@nameonly@') + ('@nameonly@', None) + + >>> _name_email_split('@domain@ ') + ('@', 'domain@ ') + + >>> _name_email_split(' domain@only') + (None, 'domain@only') + + >>> _name_email_split(' ') + (' ', None) + + >>> _name_email_split('') + (None, None) + """ + + # Split an RFC5233-inspired name-address entry: + # 01. Start at the beginning. + # 02. Capture at least one name component, but optionally so the result + # will be 'None' rather than an empty string. + # 03. Stop matching against name components if the lookahead matches an + # address. An address can be preceded by spaces, which are optional if + # the name is missing. + # 04. Simulate a possessive quantifier for Python < 3.11 given the + # equivalence between "(...)++" and "(?=( (...)+ ))\1". The contained + # alternatives are not exclusive and the possessive quantifier prevents + # the second alternative from stealing quoted components during + # backtracking. + # 05. Alt 1.1: Match single-quoted or double-quoted components and handle + # escape characters. + # 06. Alt 1.2: Match any character except the local component delimiters + # " " or "@". If quote characters are unbalanced, they will be matched + # here. + # 07. Match the alternatives at least once - the local part of the address + # cannot be empty. + # 08. (See 04) + # 09. Match "@" followed by something - the domain cannot be empty either. + # 10. (See 03) + # 11. Alt 2.1: Match a quoted component... + # 12. Alt 2.2: ... or match a single character. + # ... + # 14. (See 02) + # 15. (See 02) + # 16. If the name portion is missing or ends with an "@", there may or may + # not be whitespace before the address. The opening angle bracket is + # always optional. + # ... + # 20. Match everything after "@" with a non-greedy quantifier to allow for + # the optional closing angle bracket. + # 21. Allow for no address component. + # 22. Match the optional closing angle bracket. + # 23. Finish at the end. + # Summary: + # ^ ( ( not: space* (quote | not:space-or-at)++ @ anything + # quote | anything + # )+ + # )? + # space* ? $ + # Result: + # group 1 (name): None or non-empty string. + # group 5 (email): None or non-empty string. + _name_email = re.compile( + r""" + ^ # 01 + ( (?: # 02 + (?! \ * # 03 + (?=( # 04 + (?: (["']) (?:(?!\3|\\).|\\.)* \3 # 05 + | [^ @] # 06 + )+ # 07 + ))\2 # 08 + @ . # 09 + ) # 10 + (?: (["']) (?:(?!\4|\\).|\\.)* \4 # 11 + | . # 12 + ) # 13 + )+ # 14 + )? # 15 + \ * ? # 22 + $ # 23 + """, + re.VERBOSE, + ) + + # Equivalent, simpler, version using possessive quantifiers, for + # Python >= 3.11. + # _name_email = re.compile( + # r""" + # ^ ( (?: (?! \ * + # (?: (["']) (?:(?!\2|\\).|\\.)* \2 + # | [^ @] + # )++ + # @ . + # ) + # (?: (["']) (?:(?!\3|\\).|\\.)* \3 + # | . + # ) + # )+ + # )? + # \ * ? + # $ + # """, + # re.VERBOSE, + # ) + + return _name_email.match(string).groups()[::4] + + +def _uniq(values): + """ + Return a list omitting duplicate values. + + >>> _uniq([1, 2, 1, 2, 3, 1, 4]) + [1, 2, 3, 4] + + >>> _uniq(()) + [] + """ + unique = set() + result = [] + for value in values: + if value in unique: + continue + unique.add(value) + result.append(value) + return result + + +@dataclasses.dataclass(eq=True, frozen=True) +class Ident: + """ + A container for identity attributes, used by the author or + maintainer fields. + """ + + name: str | None + email: str | None + + def __iter__(self): + return (getattr(self, field.name) for field in dataclasses.fields(self)) + class Message(email.message.Message): multiple_use_keys = set( @@ -87,3 +314,29 @@ def transform(key): return tk, value return dict(map(transform, map(FoldedCase, self))) + + def _parse_idents(self, string): + entries = (_name_email_split(entry) for entry in _entries_findall(string)) + return _uniq(Ident(*entry) for entry in entries if entry != (None, None)) + + def _parse_names(self, string): + return _uniq(Ident(entry, None) for entry in _entries_findall(string)) + + def _parse_names_idents(self, names_field, idents_field): + names = self._parse_names(self.get(names_field, "")) + idents = self._parse_idents(self.get(idents_field, "")) + return _uniq((*names, *idents)) + + @property + def authors(self): + """ + Minimal parsing for "Author" and "Author-email" fields. + """ + return self._parse_names_idents("Author", "Author-email") + + @property + def maintainers(self): + """ + Minimal parsing for "Maintainer" and "Maintainer-email" fields. + """ + return self._parse_names_idents("Maintainer", "Maintainer-email") diff --git a/Lib/importlib/metadata/_meta.py b/Lib/importlib/metadata/_meta.py index c9a7ef906a8a8c..17baa7e43e3f1d 100644 --- a/Lib/importlib/metadata/_meta.py +++ b/Lib/importlib/metadata/_meta.py @@ -1,6 +1,8 @@ from typing import Protocol from typing import Any, Dict, Iterator, List, Optional, TypeVar, Union, overload +from ._adapters import Ident + _T = TypeVar("_T") @@ -43,6 +45,18 @@ def json(self) -> Dict[str, Union[str, List[str]]]: A JSON-compatible form of the metadata. """ + @property + def authors(self) -> list[Ident]: + """ + Minimal parsing for "Author" and "Author-email" fields. + """ + + @property + def maintainers(self) -> list[Ident]: + """ + Minimal parsing for "Maintainer" and "Maintainer-email" fields. + """ + class SimplePath(Protocol[_T]): """ diff --git a/Lib/test/support/_hypothesis_stubs/__init__.py b/Lib/test/support/_hypothesis_stubs/__init__.py index 6ba5bb814b92f7..56696c5a763370 100644 --- a/Lib/test/support/_hypothesis_stubs/__init__.py +++ b/Lib/test/support/_hypothesis_stubs/__init__.py @@ -11,6 +11,7 @@ "strategies", "HealthCheck", "settings", + "note", "Verbosity", ] @@ -82,6 +83,10 @@ def settings(*args, **kwargs): return lambda f: f # pragma: nocover +def note(*args, **kwargs): + pass # pragma: no cover + + class HealthCheck(Enum): data_too_large = 1 filter_too_much = 2 diff --git a/Lib/test/test_importlib/identity.py b/Lib/test/test_importlib/identity.py new file mode 100644 index 00000000000000..d76b0355e6e80a --- /dev/null +++ b/Lib/test/test_importlib/identity.py @@ -0,0 +1,551 @@ +""" +Provides ``identities_strategy``, a Hypothesis strategy for generating an +underlying list of identity ``Entries`` with corresponding structured and +unstructured identity metadata. +""" + +import re +import types +import typing +import itertools +import dataclasses +import collections.abc + +from test.support.hypothesis_helper import hypothesis + +st = hypothesis.strategies + + +class OrderedSet(collections.abc.MutableSet): + """ + Represent an ordered set using a dictionary attribute ``data``. + """ + + def __init__(self, values=[]): + self.data = {value: None for value in values} + + def __contains__(self, value): + return value in self.data + + def __iter__(self): + return iter(self.data) + + def __len__(self): + return len(self.data) + + def add(self, value): + self.data[value] = None + + def discard(self, value): + del self.data[value] + + def __repr__(self): + data = ", ".join(repr(value) for value in self) + return f"{type(self).__name__}([{data}])" + + def __eq__(self, other): + try: + return tuple(self) == tuple(other) + except TypeError: + return NotImplemented + + +@dataclasses.dataclass +class Token: + """ + Represents a generated identity component. See ``Entry`` for more + information. + """ + + category: typing.Any + value: str + delim: bool = False + ignore: bool = False + + +class Entry(list): + """ + Represent an identity of the form ``(name, email)`` as a list of + ``Token`` instances. Delimiter (``delim``) tokens separate adjacent + entries or name-email components. Tokens with ``delim`` or ``ignore`` + have their value omitted from the structured representation. + """ + + @classmethod + def from_empty(cls, length, category, value=""): + """ + Generate an empty identity with ``length`` components - usually + two, representing name and email - and ``length - 1`` interleaved + delimiters. + """ + tokens = [] + for index in range(0, length): + if index > 0: + tokens.append(Token(category, value, True)) + tokens.append(Token(category, value, False)) + return cls(tokens) + + def __repr__(self): + return type(self).__name__ + "(" + super().__repr__() + ")" + + @property + def as_text(self): + """ + Convert to an unstructured textual format - a string. No values + are omitted. + """ + return "".join(token.value for token in self) + + @property + def as_data(self): + """ + Convert to a structured format - a tuple, split on groups of + ``delim`` tokens. Values from ``delim`` and ``ignore`` tokens + are omitted. Empty components are converted to ``None``. + """ + + def keyfunc(token): + return token.delim + + data = [] + for key, group in itertools.groupby(self, key=keyfunc): + if key: + continue + text = "".join(token.value for token in group if not token.ignore) + data.append(text if text else None) + return tuple(data) + + +def replace(seq, index, item): + """Return a new sequence such that ``seq[index] == item``.""" + if not 0 <= index < len(seq): + raise IndexError + return seq[:index] + item + seq[index + 1 :] + + +def characters_exclude(exclude_chars, simple_chars=False): + """ + A characters strategy built on the baseline required by + ``identities_strategy``. Excludes ``exclude_chars``, and if + ``simple_chars``, all "Other" unicode code points. + """ + return st.characters( + # if simple_chars, smaller search space and easier visualization + exclude_categories=("C",) if simple_chars else ("Cs", "Co", "Cn"), + exclude_characters=exclude_chars + + # universal newlines: not compatible with toolchain + "\n\r\v\f\x1c\x1d\x1e\x85\u2028\u2029" + # backspace: side-effects + "\x08", + ) + + +def text_exclude(exclude_chars="", min_size=0, max_size=None, simple_chars=False): + """A text strategy that uses ``characters_exclude`` as alphabet.""" + return st.text( + alphabet=characters_exclude(exclude_chars, simple_chars=simple_chars), + min_size=min_size, + max_size=max_size, + ) + + +def generate_draw_function(draw, strategy): + return lambda _: draw(strategy) + + +@st.composite +def merge_sub(draw, state, sample, category, sub_func=None, **kwargs): + """ + Given a ``sample`` list of tokens, for adjacent tokens of the + given ``category`` merge and then apply the substitution function + ``sub_func`` with ``kwargs``. + """ + + def keyfunc(token): + return dataclasses.replace(token, value=None) + + merged = [] + for key, group in itertools.groupby(sample, key=keyfunc): + if key.category is category: + key.value = "".join(token.value for token in group) + merged.append(key) + merged.extend(group) + + if not sub_func: + return merged + + for i, token in enumerate(merged): + opener = i == 0 + closer = i == len(merged) - 1 + if token.category is not category: + continue + token.value = draw(sub_func(state, token.value, opener, closer, **kwargs)) + + return merged + + +@st.composite +def sub_entry(draw, state, text, opener, closer): + """An entry cannot contain ", ".""" + repl_chars = characters_exclude("\"', ", simple_chars=state.simple_chars) + repl_func = generate_draw_function(draw, repl_chars) + text = re.sub(r"(?<=,) ", repl_func, text) + return text + + +@st.composite +def sub_name(draw, state, text, opener, closer, name_only): + """ + A name entry cannot: + + * contain [^ ]@. + * contain ", " + * end with "," or " " + """ + repl_chars = characters_exclude("\"', @", simple_chars=state.simple_chars) + repl_func = generate_draw_function(draw, repl_chars) + # "@" is legal if: + # * nothing or space before + # -- or -- + # * nothing after + # + # Inversely (a or b -> -a and -b): + # 1. not space before + # -- and -- + # 2. anything after + # + # The inverse equivalent: + # [^ ]@. + escape_at = re.compile( + rf""" + (?: [^ ] {"| ^" if not opener else ""}) + (?= @. {"| @$" if not closer or not name_only else ""}) + """, + re.VERBOSE, + ) + text = escape_at.sub(" ", text) + # ", t" -> replace " " + text = re.sub(r"(?<=,) (?!@)", repl_func, text) + # ", @" -> replace "," + text = re.sub(r",(?= @)", repl_func, text) + if closer and not name_only: + text = re.sub(r"(?<=[, ]$)", repl_func, text) + return text + + +def get_name_qchar_idxs(text, succeeded): + """ + Given any legal input (ie, not matching "[^ ]@."), yield the + character indexes for which the text remains valid when the + index is substituted with a quote character. + """ + replace_at = re.compile( + rf""" + # Cannot replace " " before "@" if "@" succeeded by anything. + (?! $ | [ ]@. {"| [ ]@$" if succeeded else ""}) + """, + re.VERBOSE, + ) + return (m.start() for m in replace_at.finditer(text)) + + +@st.composite +def identities_strategy(draw, simple_chars=True, debug_entries=False): + """ + For each identity field of the core metadata specification ("Author", + "Maintainer", "Author-email" and "Maintainer-email"), generate a list + of ``Entry`` instances, each corresponding to a possibly-empty + identity. The "-email" fields have stricter requirements and generate + from ``ident_entry`` whereas the non-email fields use ``name_entry``. + Empty lists are dropped. Process and combine each non-empty list into + an unstructured string conforming to the core metadata specification + as well as into a more succinct structured format - two lists of + (name, email) tuples, one for authors and the other for maintainers. + + If ``simple_chars``, restrict the list of allowed characters to + reduce the Hypothesis search space and permit easier visualization. + If ``debug_entries``, return the underlying lists of ``Entry`` + instances in addition to the unstructured string and structured list + representations. + + This strategy will generate the most permissible identities possible: + unbalanced quote characters, unescaped special characters, omitted + identity information, empty identities, and so forth. + """ + text = "" + authors = OrderedSet() + maintainers = OrderedSet() + entry_dict = dict() + state = types.SimpleNamespace(simple_chars=simple_chars) + + combinations = zip( + ((i, j) for i in (name_entry, ident_entry) for j in (authors, maintainers)), + ("Author", "Maintainer", "Author-email", "Maintainer-email"), + ) + + for (entry_type, entry_target), entry_prefix in combinations: + entries, data_set, data_str = draw(identity_entries(state, entry_type)) + entry_dict[entry_prefix] = entries + if not entries: + continue + text += entry_prefix + ": " + data_str + "\n" + entry_target |= data_set + + result = (text, authors, maintainers) + if debug_entries: + result = (entry_dict, *result) + return result + + +@st.composite +def identity_entries(draw, state, entry_strategy): + """ + Generate a list of processed identity entries using + ``entry_strategy``. + """ + sample = draw(st.lists(entry_strategy(state), min_size=0, max_size=10)) + for entry in sample[:-1]: + entry.append(Token(identity_entries, ", ", True)) + return process(lstrip(draw(unbalance(sample)))) + + +def process(entries): + """ + Process a list of entries into structured and unstructured + formats. Filter empty (``None``) entries from the structured + format. Convert token categories to strings for simpler + visualization. + """ + text = "" + data = OrderedSet() + + for entry in entries: + entry_text = entry.as_text + entry_data = entry.as_data + for token in entry: + token.category = token.category.__name__ + text += entry_text + if all(elem is None for elem in entry_data): + continue + data.add(entry_data) + + return (entries, data, text) + + +def lstrip(entries): + """ + The PackageMetadata implementation removes leading whitespace + from the first entry, so we do the same. This should not alter + the parsing of the entry. Return possibly-mutated ``entries``. + """ + entry = entries[0] if entries else [] + for token in entry: + token.value = token.value.lstrip() + if token.value: + break + return entries + + +def unbalance_indexes(entries): + """Return a list of valid replacement indices for quote characters.""" + index_candidates = [] + type_candidates = [ + ident_addr_domain_other, + ident_addr_local_other, + ident_name_other, + name_value_other, + ] + stop = False + i = len(entries) - 1 + while i >= 0 and not stop: + j = len(entries[i]) - 1 + while j >= 0 and not (stop := entries[i][j].category is quote): + token = entries[i][j] + if token.category not in type_candidates: + repls = () + elif token.category is ident_name_other: + final = entries[i][j + 1] is ident_name_only + repls = get_name_qchar_idxs(token.value, not final) + else: + repls = range(len(token.value)) + for k in repls: + index_candidates.append((i, j, k)) + j -= 1 + i -= 1 + return index_candidates + + +@st.composite +def unbalance(draw, entries): + """ + Unbalance quote characters in the list of entries without altering + the parsing of the list. Mutate if possible, returning ``entries``. + """ + qchrs = draw(st.sets(st.sampled_from("\"'"))) + index_candidates = unbalance_indexes(entries) + if not qchrs or not index_candidates: + return entries + for qchr in sorted(qchrs): + idx_entry, idx_token, idx_char = draw(st.sampled_from(index_candidates)) + token = entries[idx_entry][idx_token] + token.value = replace(token.value, idx_char, qchr) + return entries + + +# Non-leaf rules for "Author" and "Maintainer" fields: + + +def name_entry(state): + return name_empty() | name_value(state) + + +def name_empty(): + return st.builds(Entry.from_empty, st.just(2), st.just(name_empty)) + + +@st.composite +def name_value(draw, state): + choice = name_value_other(state) | quote(state) + sample = draw(st.lists(choice, min_size=1, max_size=10)) + entry = Entry.from_empty(2, name_value) + entry[:1] = draw(merge_sub(state, sample, name_value_other, sub_entry)) + return entry + + +# Non-leaf rules for "Author-email" and "Maintainer-email" fields: + + +def ident_entry(state): + return ( + ident_empty() + | ident_name_only(state) + | ident_addr_only(state) + | ident_name_addr(state) + ) + + +def ident_empty(): + return st.builds(Entry.from_empty, st.just(2), st.just(ident_empty)) + + +@st.composite +def ident_name_only(draw, state): + entry = Entry.from_empty(2, ident_name_only) + entry[:1] = draw(ident_name(state, name_only=True)) + return entry + + +@st.composite +def ident_addr_only(draw, state): + entry = Entry.from_empty(2, ident_addr_only) + addr = draw(ident_addr(state)) + if draw(st.booleans()): + entry[1:] = [draw(ident_name_addr_delim("")), *addr] + else: + entry[2:] = addr + return entry + + +@st.composite +def ident_name_addr(draw, state): + name = draw(ident_name(state, name_only=False)) + delim = draw(ident_name_addr_delim(name[-1].value)) + addr = draw(ident_addr(state)) + return Entry((*name, delim, *addr)) + + +@st.composite +def ident_name_addr_delim(draw, name): + min_size = 0 if name.endswith("@") else 1 + delim = draw(st.text(alphabet=" ", min_size=min_size, max_size=10)) + return Token(ident_name_addr_delim, delim, True) + + +@st.composite +def ident_name(draw, state, name_only): + choice = ident_name_other(state) | quote(state) + sample = draw(st.lists(choice, min_size=1, max_size=10)) + return draw( + merge_sub(state, sample, ident_name_other, sub_name, name_only=name_only) + ) + + +@st.composite +def ident_addr(draw, state): + local = draw(ident_addr_local(state)) + domain = draw(ident_addr_domain(state)) + # affix possibly duplicate leading/trailing angle brackets + if draw(st.booleans()): + local = [Token(ident_addr, "<"), *local] + if draw(st.booleans()): + domain = [*domain, Token(ident_addr, ">")] + # split any coupled leading/trailing angle brackets + value = local[0].value + if value.startswith("<") and len(value) > 1: + split = dataclasses.replace(local[0], value="<", category=ident_addr) + local[0].value = value[1:] + local = [split, *local] + value = domain[-1].value + if value.endswith(">") and len(value) > 1: + split = dataclasses.replace(domain[-1], value=">", category=ident_addr) + domain[-1].value = value[:-1] + domain = [*domain, split] + # if not solitary, ignore single leading/trailing angle brackets + if len(local) > 1 and local[0].value == "<": + local[0].ignore = True + if len(domain) > 1 and domain[-1].value == ">": + domain[-1].ignore = True + # combine the results + result = [*local, Token(ident_addr, "@"), *domain] + return result + + +@st.composite +def ident_addr_local(draw, state): + choice = ident_addr_local_other(state) | quote(state) + sample = draw(st.lists(choice, min_size=1, max_size=10)) + return draw(merge_sub(state, sample, ident_addr_local_other)) + + +@st.composite +def ident_addr_domain(draw, state): + choice = ident_addr_domain_other(state) | quote(state) + sample = draw(st.lists(choice, min_size=1, max_size=10)) + return draw(merge_sub(state, sample, ident_addr_domain_other, sub_entry)) + + +# Leaf rules: + + +@st.composite +def quote(draw, state): + qchr = draw(st.sampled_from("'\"")) + escape_trail = re.compile(r"(?, Tzu-Ping Chung , different.person@example.com + Maintainer-email: Brett Cannon + """, # noqa: E501 + [ + ("Another person", None), + ("Yet Another name", None), + ("Pradyun Gedam", "pradyun@example.com"), + ("Tzu-Ping Chung", "tzu-ping@example.com"), + (None, "different.person@example.com"), + ], + [("Brett Cannon", "brett@python.org")], + ) + ) + def test_structured_identity(self, arg): + """ + Verify that the unstructured identity metadata is parsed and + converted to the expected corresponding structure. + """ + metadata_text, expected_authors, expected_maintainers = arg + md = self.metadata_from_text(metadata_text) + authors = list(map(tuple, md.authors)) + maintainers = list(map(tuple, md.maintainers)) + + authors_diff = self.diff(authors, expected_authors) + maintainers_diff = self.diff(maintainers, expected_maintainers) + + hypothesis.note(f"Authors diff: {self.diff_fmt(authors_diff)}") + hypothesis.note(f"Maintainers diff: {self.diff_fmt(maintainers_diff)}") + + assert authors == expected_authors + assert maintainers == expected_maintainers diff --git a/Misc/NEWS.d/next/Library/2023-08-28-16-37-27.gh-issue-108580.BE0loO.rst b/Misc/NEWS.d/next/Library/2023-08-28-16-37-27.gh-issue-108580.BE0loO.rst new file mode 100644 index 00000000000000..fcc618b51e1daf --- /dev/null +++ b/Misc/NEWS.d/next/Library/2023-08-28-16-37-27.gh-issue-108580.BE0loO.rst @@ -0,0 +1,4 @@ +Add the properties ``PackageMetadata.authors`` and +``PackageMetadata.maintainers`` to the ``importlib.metadata`` module. These +unify and provide minimal parsing for the respective core metadata fields +("Author", "Author-email"), and ("Maintainer", "Maintainer-email"). diff --git a/Tools/requirements-hypothesis.txt b/Tools/requirements-hypothesis.txt index 9db2b74c87cfb0..a7d0f6df31271a 100644 --- a/Tools/requirements-hypothesis.txt +++ b/Tools/requirements-hypothesis.txt @@ -1,4 +1,4 @@ # Requirements file for hypothesis that # we use to run our property-based tests in CI. -hypothesis==6.84.0 +hypothesis==6.85.0