Skip to content

Commit

Permalink
pythongh-108580: Structure for importlib metadata idents
Browse files Browse the repository at this point in the history
Add `PackageMetadata.authors` and `PackageMetadata.maintainers` to the
`importlib.metadata` module. These unify and provide minimal parsing for
the respective core metadata fields ("Author", "Author-email"), and
("Maintainer", "Maintainer-email").
  • Loading branch information
orbisvicis committed Aug 28, 2023
1 parent cf7ba83 commit 256426d
Show file tree
Hide file tree
Showing 2 changed files with 116 additions and 0 deletions.
102 changes: 102 additions & 0 deletions Lib/importlib/metadata/_adapters.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
import re
import textwrap
import email.message
import dataclasses

from ._text import FoldedCase

Expand All @@ -15,6 +16,77 @@
stacklevel=2,
)

# It looks like RFC5322 but it's much much worse. The only takeaway from
# RFC5233 is that special characters such as "," and "<" must be quoted
# when used as text.

# Split an RFC5233-ish list:
# 1. Alt 1: match single or double quotes and handle escape characters.
# 2. Alt 2: match anything except ',' followed by a space. If quote
# characters are unbalanced, they will be matched here.
# 3. Match the alternatives at least once, in any order...
# 4. ... and capture them.
# 5. Match the list separator, or end-of-string.
# Result:
# group 1 (list entry): None or non-empty string.

_entries = re.compile(r"""
( (?: (["']) (?:(?!\2|\\).|\\.)* \2 # 1
| (?!,\ ). # 2
)+ # 3
) # 4
(?:,\ |$) # 5
""", re.VERBOSE)

# Split an RFC5233-ish name-email entry:
# 01. Start at the beginning.
# 02. If it starts with '<', skip this name-capturing regex.
# 03. Alt 1: match single or double quotes and handle escape characters.
# 04. Alt 2: match anything except one or more spaces followed by '<'. If
# quote characters are unbalanced, they will be matched here.
# 05. Match the alternatives at least once, in any order...
# 06. ... but optionally so the result will be 'None' rather than an empty
# string.
# 07. If the name portion is missing there may not be whitespace before
# '<'.
# 08. Capture everything after '<' with a non-greedy quantifier to allow #
# for the next regex. Use '+','?' to force an empty string to become
# 'None'.
# 09. Strip the final '>', if it exists.
# 10. Allow for missing email section.
# 11. Finish at the end.
# Result:
# group 1 (name): None or non-empty string.
# group 3 (email): None or non-empty string.

_name_email = re.compile(r"""
^ # 01
( (?!<) # 02
(?: (["']) (?:(?!\2|\\).|\\.)* \2 # 03
| (?!\ +<). # 04
)+ # 05
)? # 06
(?: \ *< # 07
(.+?)? # 08
>? # 09
)? # 10
$ # 11
""", re.VERBOSE)


@dataclasses.dataclass(eq=True, frozen=True)
class Ident:
"""
A container for identity attributes, used by the author or
maintainer fields.
"""
name: str|None
email: str|None

def __iter__(self):
return (getattr(self, field.name) for
field in dataclasses.fields(self))


class Message(email.message.Message):
multiple_use_keys = set(
Expand Down Expand Up @@ -87,3 +159,33 @@ def transform(key):
return tk, value

return dict(map(transform, map(FoldedCase, self)))

def _parse_idents(self, s):
es = (i[0] for i in _entries.findall(s))
es = (_name_email.match(i)[::2] for i in es)
es = {Ident(*i) for i in es if i != (None, None)}
return es

def _parse_names(self, s):
es = (i[0] for i in _entries.findall(s))
es = {Ident(i, None) for i in es}
return es

def _parse_names_idents(self, fn, fi):
sn = self.get(fn, "")
si = self.get(fi, "")
return self._parse_names(sn) | self._parse_idents(si)

@property
def authors(self):
"""
Minimal parsing for "Author" and "Author-email" fields.
"""
return self._parse_names_idents("Author", "Author-email")

@property
def maintainers(self):
"""
Minimal parsing for "Maintainer" and "Maintainer-email" fields.
"""
return self._parse_names_idents("Maintainer", "Maintainer-email")
14 changes: 14 additions & 0 deletions Lib/importlib/metadata/_meta.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
from typing import Protocol
from typing import Any, Dict, Iterator, List, Optional, TypeVar, Union, overload

from ._adapters import Ident


_T = TypeVar("_T")

Expand Down Expand Up @@ -43,6 +45,18 @@ def json(self) -> Dict[str, Union[str, List[str]]]:
A JSON-compatible form of the metadata.
"""

@property
def authors(self) -> set[Ident]:
"""
Minimal parsing for "Author" and "Author-email" fields.
"""

@property
def maintainers(self) -> set[Ident]:
"""
Minimal parsing for "Maintainer" and "Maintainer-email" fields.
"""


class SimplePath(Protocol[_T]):
"""
Expand Down

0 comments on commit 256426d

Please sign in to comment.