From cbfd39247983309a9ef0ae6da6c61cc71665b967 Mon Sep 17 00:00:00 2001 From: Adam Turner <9087854+AA-Turner@users.noreply.github.com> Date: Wed, 9 Oct 2024 21:50:03 +0100 Subject: [PATCH 01/32] GH-121970: Extract ``availability`` into a new extension (#125082) --- Doc/conf.py | 1 + Doc/tools/extensions/availability.py | 125 +++++++++++++++++++++++++++ Doc/tools/extensions/pyspecific.py | 76 ---------------- 3 files changed, 126 insertions(+), 76 deletions(-) create mode 100644 Doc/tools/extensions/availability.py diff --git a/Doc/conf.py b/Doc/conf.py index 5f22340ac434c9..287e0da46eb11c 100644 --- a/Doc/conf.py +++ b/Doc/conf.py @@ -21,6 +21,7 @@ extensions = [ 'audit_events', + 'availability', 'c_annotations', 'glossary_search', 'lexers', diff --git a/Doc/tools/extensions/availability.py b/Doc/tools/extensions/availability.py new file mode 100644 index 00000000000000..47833fdcb87590 --- /dev/null +++ b/Doc/tools/extensions/availability.py @@ -0,0 +1,125 @@ +"""Support for documenting platform availability""" + +from __future__ import annotations + +from typing import TYPE_CHECKING + +from docutils import nodes +from sphinx import addnodes +from sphinx.util import logging +from sphinx.util.docutils import SphinxDirective + +if TYPE_CHECKING: + from sphinx.application import Sphinx + from sphinx.util.typing import ExtensionMetadata + +logger = logging.getLogger("availability") + +# known platform, libc, and threading implementations +_PLATFORMS = frozenset({ + "AIX", + "Android", + "BSD", + "DragonFlyBSD", + "Emscripten", + "FreeBSD", + "GNU/kFreeBSD", + "iOS", + "Linux", + "macOS", + "NetBSD", + "OpenBSD", + "POSIX", + "Solaris", + "Unix", + "VxWorks", + "WASI", + "Windows", +}) +_LIBC = frozenset({ + "BSD libc", + "glibc", + "musl", +}) +_THREADING = frozenset({ + # POSIX platforms with pthreads + "pthreads", +}) +KNOWN_PLATFORMS = _PLATFORMS | _LIBC | _THREADING + + +class Availability(SphinxDirective): + has_content = True + required_arguments = 1 + optional_arguments = 0 + final_argument_whitespace = True + + def run(self) -> list[nodes.container]: + title = "Availability" + refnode = addnodes.pending_xref( + title, + nodes.inline(title, title, classes=["xref", "std", "std-ref"]), + refdoc=self.env.docname, + refdomain="std", + refexplicit=True, + reftarget="availability", + reftype="ref", + refwarn=True, + ) + sep = nodes.Text(": ") + parsed, msgs = self.state.inline_text(self.arguments[0], self.lineno) + pnode = nodes.paragraph(title, "", refnode, sep, *parsed, *msgs) + self.set_source_info(pnode) + cnode = nodes.container("", pnode, classes=["availability"]) + self.set_source_info(cnode) + if self.content: + self.state.nested_parse(self.content, self.content_offset, cnode) + self.parse_platforms() + + return [cnode] + + def parse_platforms(self) -> dict[str, str | bool]: + """Parse platform information from arguments + + Arguments is a comma-separated string of platforms. A platform may + be prefixed with "not " to indicate that a feature is not available. + + Example:: + + .. availability:: Windows, Linux >= 4.2, not WASI + + Arguments like "Linux >= 3.17 with glibc >= 2.27" are currently not + parsed into separate tokens. + """ + platforms = {} + for arg in self.arguments[0].rstrip(".").split(","): + arg = arg.strip() + platform, _, version = arg.partition(" >= ") + if platform.startswith("not "): + version = False + platform = platform.removeprefix("not ") + elif not version: + version = True + platforms[platform] = version + + if unknown := set(platforms).difference(KNOWN_PLATFORMS): + logger.warning( + "Unknown platform%s or syntax '%s' in '.. availability:: %s', " + "see %s:KNOWN_PLATFORMS for a set of known platforms.", + "s" if len(platforms) != 1 else "", + " ".join(sorted(unknown)), + self.arguments[0], + __file__, + ) + + return platforms + + +def setup(app: Sphinx) -> ExtensionMetadata: + app.add_directive("availability", Availability) + + return { + "version": "1.0", + "parallel_read_safe": True, + "parallel_write_safe": True, + } diff --git a/Doc/tools/extensions/pyspecific.py b/Doc/tools/extensions/pyspecific.py index b6623a2b8e01f1..bcb8a421e32d09 100644 --- a/Doc/tools/extensions/pyspecific.py +++ b/Doc/tools/extensions/pyspecific.py @@ -24,7 +24,6 @@ from sphinx.domains.changeset import VersionChange, versionlabels, versionlabel_classes from sphinx.domains.python import PyFunction, PyMethod, PyModule from sphinx.locale import _ as sphinx_gettext -from sphinx.util import logging from sphinx.util.docutils import SphinxDirective from sphinx.writers.text import TextWriter, TextTranslator from sphinx.util.display import status_iterator @@ -108,80 +107,6 @@ def run(self): return [pnode] -# Support for documenting platform availability - -class Availability(SphinxDirective): - - has_content = True - required_arguments = 1 - optional_arguments = 0 - final_argument_whitespace = True - - # known platform, libc, and threading implementations - known_platforms = frozenset({ - "AIX", "Android", "BSD", "DragonFlyBSD", "Emscripten", "FreeBSD", - "GNU/kFreeBSD", "Linux", "NetBSD", "OpenBSD", "POSIX", "Solaris", - "Unix", "VxWorks", "WASI", "Windows", "macOS", "iOS", - # libc - "BSD libc", "glibc", "musl", - # POSIX platforms with pthreads - "pthreads", - }) - - def run(self): - availability_ref = ':ref:`Availability `: ' - avail_nodes, avail_msgs = self.state.inline_text( - availability_ref + self.arguments[0], - self.lineno) - pnode = nodes.paragraph(availability_ref + self.arguments[0], - '', *avail_nodes, *avail_msgs) - self.set_source_info(pnode) - cnode = nodes.container("", pnode, classes=["availability"]) - self.set_source_info(cnode) - if self.content: - self.state.nested_parse(self.content, self.content_offset, cnode) - self.parse_platforms() - - return [cnode] - - def parse_platforms(self): - """Parse platform information from arguments - - Arguments is a comma-separated string of platforms. A platform may - be prefixed with "not " to indicate that a feature is not available. - - Example:: - - .. availability:: Windows, Linux >= 4.2, not WASI - - Arguments like "Linux >= 3.17 with glibc >= 2.27" are currently not - parsed into separate tokens. - """ - platforms = {} - for arg in self.arguments[0].rstrip(".").split(","): - arg = arg.strip() - platform, _, version = arg.partition(" >= ") - if platform.startswith("not "): - version = False - platform = platform[4:] - elif not version: - version = True - platforms[platform] = version - - unknown = set(platforms).difference(self.known_platforms) - if unknown: - cls = type(self) - logger = logging.getLogger(cls.__qualname__) - logger.warning( - f"Unknown platform(s) or syntax '{' '.join(sorted(unknown))}' " - f"in '.. availability:: {self.arguments[0]}', see " - f"{__file__}:{cls.__qualname__}.known_platforms for a set " - "known platforms." - ) - - return platforms - - # Support for documenting decorators class PyDecoratorMixin(object): @@ -492,7 +417,6 @@ def setup(app): app.add_role('issue', issue_role) app.add_role('gh', gh_issue_role) app.add_directive('impl-detail', ImplementationDetail) - app.add_directive('availability', Availability) app.add_directive('versionadded', PyVersionChange, override=True) app.add_directive('versionchanged', PyVersionChange, override=True) app.add_directive('versionremoved', PyVersionChange, override=True) From 7d2c39752fa6f685f15ad9c585d83a62553477c2 Mon Sep 17 00:00:00 2001 From: Serhiy Storchaka Date: Thu, 10 Oct 2024 00:20:53 +0300 Subject: [PATCH 02/32] gh-91818: Use default program name in the CLI of many modules (GH-124867) As argparse now detects by default when the code was run as a module. This leads to using the actual executable name instead of simply "python" to display in the usage message ("usage: python -m ..."). --- Lib/ast.py | 2 +- Lib/ensurepip/__init__.py | 2 +- Lib/ensurepip/_uninstall.py | 2 +- Lib/json/tool.py | 3 +-- Lib/pdb.py | 3 +-- Lib/sqlite3/__main__.py | 1 - Lib/test/test_sqlite3/test_cli.py | 4 +++- Lib/tokenize.py | 2 +- Lib/venv/__init__.py | 3 +-- .../Library/2024-10-01-23-29-09.gh-issue-91818.Kz8cPI.rst | 4 ++++ 10 files changed, 14 insertions(+), 12 deletions(-) create mode 100644 Misc/NEWS.d/next/Library/2024-10-01-23-29-09.gh-issue-91818.Kz8cPI.rst diff --git a/Lib/ast.py b/Lib/ast.py index a954d4a97d3c22..154d2c8c1f9ebb 100644 --- a/Lib/ast.py +++ b/Lib/ast.py @@ -1743,7 +1743,7 @@ def unparse(ast_obj): def main(): import argparse - parser = argparse.ArgumentParser(prog='python -m ast') + parser = argparse.ArgumentParser() parser.add_argument('infile', nargs='?', default='-', help='the file to parse; defaults to stdin') parser.add_argument('-m', '--mode', default='exec', diff --git a/Lib/ensurepip/__init__.py b/Lib/ensurepip/__init__.py index c5350df270487a..585afc85836c06 100644 --- a/Lib/ensurepip/__init__.py +++ b/Lib/ensurepip/__init__.py @@ -205,7 +205,7 @@ def _uninstall_helper(*, verbosity=0): def _main(argv=None): import argparse - parser = argparse.ArgumentParser(prog="python -m ensurepip") + parser = argparse.ArgumentParser() parser.add_argument( "--version", action="version", diff --git a/Lib/ensurepip/_uninstall.py b/Lib/ensurepip/_uninstall.py index b257904328d2f5..4183c28a809008 100644 --- a/Lib/ensurepip/_uninstall.py +++ b/Lib/ensurepip/_uninstall.py @@ -6,7 +6,7 @@ def _main(argv=None): - parser = argparse.ArgumentParser(prog="python -m ensurepip._uninstall") + parser = argparse.ArgumentParser() parser.add_argument( "--version", action="version", diff --git a/Lib/json/tool.py b/Lib/json/tool.py index 9028e517fb9f7d..1ba91384c81f27 100644 --- a/Lib/json/tool.py +++ b/Lib/json/tool.py @@ -9,10 +9,9 @@ def main(): - prog = 'python -m json' description = ('A simple command line interface for json module ' 'to validate and pretty-print JSON objects.') - parser = argparse.ArgumentParser(prog=prog, description=description) + parser = argparse.ArgumentParser(description=description) parser.add_argument('infile', nargs='?', help='a JSON file to be validated or pretty-printed', default='-') diff --git a/Lib/pdb.py b/Lib/pdb.py index aea6fb70ae3106..d9aed24bfcd8e7 100644 --- a/Lib/pdb.py +++ b/Lib/pdb.py @@ -2423,8 +2423,7 @@ def help(): def main(): import argparse - parser = argparse.ArgumentParser(prog="pdb", - usage="%(prog)s [-h] [-c command] (-m module | pyfile) [args ...]", + parser = argparse.ArgumentParser(usage="%(prog)s [-h] [-c command] (-m module | pyfile) [args ...]", description=_usage, formatter_class=argparse.RawDescriptionHelpFormatter, allow_abbrev=False) diff --git a/Lib/sqlite3/__main__.py b/Lib/sqlite3/__main__.py index d9423c25e34135..cfdee61403d1fa 100644 --- a/Lib/sqlite3/__main__.py +++ b/Lib/sqlite3/__main__.py @@ -65,7 +65,6 @@ def runsource(self, source, filename="", symbol="single"): def main(*args): parser = ArgumentParser( description="Python sqlite3 CLI", - prog="python -m sqlite3", ) parser.add_argument( "filename", type=str, default=":memory:", nargs="?", diff --git a/Lib/test/test_sqlite3/test_cli.py b/Lib/test/test_sqlite3/test_cli.py index 303f9e03b5383f..d014a9ce841607 100644 --- a/Lib/test/test_sqlite3/test_cli.py +++ b/Lib/test/test_sqlite3/test_cli.py @@ -34,7 +34,9 @@ def expect_failure(self, *args): def test_cli_help(self): out = self.expect_success("-h") - self.assertIn("usage: python -m sqlite3", out) + self.assertIn("usage: ", out) + self.assertIn(" [-h] [-v] [filename] [sql]", out) + self.assertIn("Python sqlite3 CLI", out) def test_cli_version(self): out = self.expect_success("-v") diff --git a/Lib/tokenize.py b/Lib/tokenize.py index 4b4c3cfe16999b..7ece4e9b70d31b 100644 --- a/Lib/tokenize.py +++ b/Lib/tokenize.py @@ -510,7 +510,7 @@ def error(message, filename=None, location=None): sys.exit(1) # Parse the arguments and options - parser = argparse.ArgumentParser(prog='python -m tokenize') + parser = argparse.ArgumentParser() parser.add_argument(dest='filename', nargs='?', metavar='filename.py', help='the file to tokenize; defaults to stdin') diff --git a/Lib/venv/__init__.py b/Lib/venv/__init__.py index a5cb2bdb064692..a00fa690fa0b88 100644 --- a/Lib/venv/__init__.py +++ b/Lib/venv/__init__.py @@ -575,8 +575,7 @@ def create(env_dir, system_site_packages=False, clear=False, def main(args=None): import argparse - parser = argparse.ArgumentParser(prog=__name__, - description='Creates virtual Python ' + parser = argparse.ArgumentParser(description='Creates virtual Python ' 'environments in one or ' 'more target ' 'directories.', diff --git a/Misc/NEWS.d/next/Library/2024-10-01-23-29-09.gh-issue-91818.Kz8cPI.rst b/Misc/NEWS.d/next/Library/2024-10-01-23-29-09.gh-issue-91818.Kz8cPI.rst new file mode 100644 index 00000000000000..f45f00e48a3830 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2024-10-01-23-29-09.gh-issue-91818.Kz8cPI.rst @@ -0,0 +1,4 @@ +The CLI of many modules (:mod:`ast`, :mod:`ensurepip`, :mod:`json`, +:mod:`pdb`, :mod:`sqlite3`, :mod:`tokenize`, :mod:`venv`) now uses the +actual executable name instead of simply "python" to display in the usage +message. From 52f70da19cf3c7198be37faeac233ef803080f6f Mon Sep 17 00:00:00 2001 From: Victor Stinner Date: Wed, 9 Oct 2024 23:56:30 +0200 Subject: [PATCH 03/32] gh-125196: Use PyUnicodeWriter for repr(list) (#125202) Replace the private _PyUnicodeWriter with the public PyUnicodeWriter. Replace PyObject_Repr() + _PyUnicodeWriter_WriteStr() with PyUnicodeWriter_WriteRepr(). --- Objects/listobject.c | 45 ++++++++++++++++++++++---------------------- 1 file changed, 23 insertions(+), 22 deletions(-) diff --git a/Objects/listobject.c b/Objects/listobject.c index 8abe15d6674140..e7090f20001a39 100644 --- a/Objects/listobject.c +++ b/Objects/listobject.c @@ -522,49 +522,50 @@ list_dealloc(PyObject *self) static PyObject * list_repr_impl(PyListObject *v) { - PyObject *s; - _PyUnicodeWriter writer; - Py_ssize_t i = Py_ReprEnter((PyObject*)v); - if (i != 0) { - return i > 0 ? PyUnicode_FromString("[...]") : NULL; + int res = Py_ReprEnter((PyObject*)v); + if (res != 0) { + return (res > 0 ? PyUnicode_FromString("[...]") : NULL); } - _PyUnicodeWriter_Init(&writer); - writer.overallocate = 1; /* "[" + "1" + ", 2" * (len - 1) + "]" */ - writer.min_length = 1 + 1 + (2 + 1) * (Py_SIZE(v) - 1) + 1; + Py_ssize_t prealloc = 1 + 1 + (2 + 1) * (Py_SIZE(v) - 1) + 1; + PyUnicodeWriter *writer = PyUnicodeWriter_Create(prealloc); + if (writer == NULL) { + goto error; + } - if (_PyUnicodeWriter_WriteChar(&writer, '[') < 0) + if (PyUnicodeWriter_WriteChar(writer, '[') < 0) { goto error; + } /* Do repr() on each element. Note that this may mutate the list, so must refetch the list size on each iteration. */ - for (i = 0; i < Py_SIZE(v); ++i) { + for (Py_ssize_t i = 0; i < Py_SIZE(v); ++i) { if (i > 0) { - if (_PyUnicodeWriter_WriteASCIIString(&writer, ", ", 2) < 0) + if (PyUnicodeWriter_WriteChar(writer, ',') < 0) { + goto error; + } + if (PyUnicodeWriter_WriteChar(writer, ' ') < 0) { goto error; + } } - s = PyObject_Repr(v->ob_item[i]); - if (s == NULL) - goto error; - - if (_PyUnicodeWriter_WriteStr(&writer, s) < 0) { - Py_DECREF(s); + if (PyUnicodeWriter_WriteRepr(writer, v->ob_item[i]) < 0) { goto error; } - Py_DECREF(s); } - writer.overallocate = 0; - if (_PyUnicodeWriter_WriteChar(&writer, ']') < 0) + if (PyUnicodeWriter_WriteChar(writer, ']') < 0) { goto error; + } Py_ReprLeave((PyObject *)v); - return _PyUnicodeWriter_Finish(&writer); + return PyUnicodeWriter_Finish(writer); error: - _PyUnicodeWriter_Dealloc(&writer); + if (writer != NULL) { + PyUnicodeWriter_Discard(writer); + } Py_ReprLeave((PyObject *)v); return NULL; } From 0c5a48c1c9039eb1ce25a96c43505c4de0a0b9d7 Mon Sep 17 00:00:00 2001 From: Savannah Ostrowski Date: Wed, 9 Oct 2024 15:56:50 -0600 Subject: [PATCH 04/32] GH-124693: Support parsing negative scientific and complex numbers argparse (GH-124823) Co-authored-by: Serhiy Storchaka --- Lib/argparse.py | 2 +- Lib/test/test_argparse.py | 27 ++++++++++++++----- ...-10-01-02-31-13.gh-issue-124693.qzbXKB.rst | 1 + 3 files changed, 22 insertions(+), 8 deletions(-) create mode 100644 Misc/NEWS.d/next/Library/2024-10-01-02-31-13.gh-issue-124693.qzbXKB.rst diff --git a/Lib/argparse.py b/Lib/argparse.py index 21299b69ecd74c..d1f8fa2ace8611 100644 --- a/Lib/argparse.py +++ b/Lib/argparse.py @@ -1367,7 +1367,7 @@ def __init__(self, self._defaults = {} # determines whether an "option" looks like a negative number - self._negative_number_matcher = _re.compile(r'^-(?:\d+(?:_\d+)*(?:\.\d+(?:_\d+)*)?|\.\d+(?:_\d+)*)$') + self._negative_number_matcher = _re.compile(r'-\.?\d') # whether or not there are any optionals that look like negative # numbers -- uses a list so it can be shared and edited diff --git a/Lib/test/test_argparse.py b/Lib/test/test_argparse.py index 1bf812b36fc2c6..c9e79eb18a08fb 100644 --- a/Lib/test/test_argparse.py +++ b/Lib/test/test_argparse.py @@ -2195,20 +2195,33 @@ class TestNegativeNumber(ParserTestCase): argument_signatures = [ Sig('--int', type=int), Sig('--float', type=float), + Sig('--complex', type=complex), ] failures = [ '--float -_.45', '--float -1__000.0', + '--float -1.0.0', '--int -1__000', + '--int -1.0', + '--complex -1__000.0j', + '--complex -1.0jj', + '--complex -_.45j', ] successes = [ - ('--int -1000 --float -1000.0', NS(int=-1000, float=-1000.0)), - ('--int -1_000 --float -1_000.0', NS(int=-1000, float=-1000.0)), - ('--int -1_000_000 --float -1_000_000.0', NS(int=-1000000, float=-1000000.0)), - ('--float -1_000.0', NS(int=None, float=-1000.0)), - ('--float -1_000_000.0_0', NS(int=None, float=-1000000.0)), - ('--float -.5', NS(int=None, float=-0.5)), - ('--float -.5_000', NS(int=None, float=-0.5)), + ('--int -1000 --float -1000.0', NS(int=-1000, float=-1000.0, complex=None)), + ('--int -1_000 --float -1_000.0', NS(int=-1000, float=-1000.0, complex=None)), + ('--int -1_000_000 --float -1_000_000.0', NS(int=-1000000, float=-1000000.0, complex=None)), + ('--float -1_000.0', NS(int=None, float=-1000.0, complex=None)), + ('--float -1_000_000.0_0', NS(int=None, float=-1000000.0, complex=None)), + ('--float -.5', NS(int=None, float=-0.5, complex=None)), + ('--float -.5_000', NS(int=None, float=-0.5, complex=None)), + ('--float -1e3', NS(int=None, float=-1000, complex=None)), + ('--float -1e-3', NS(int=None, float=-0.001, complex=None)), + ('--complex -1j', NS(int=None, float=None, complex=-1j)), + ('--complex -1_000j', NS(int=None, float=None, complex=-1000j)), + ('--complex -1_000.0j', NS(int=None, float=None, complex=-1000.0j)), + ('--complex -1e3j', NS(int=None, float=None, complex=-1000j)), + ('--complex -1e-3j', NS(int=None, float=None, complex=-0.001j)), ] class TestInvalidAction(TestCase): diff --git a/Misc/NEWS.d/next/Library/2024-10-01-02-31-13.gh-issue-124693.qzbXKB.rst b/Misc/NEWS.d/next/Library/2024-10-01-02-31-13.gh-issue-124693.qzbXKB.rst new file mode 100644 index 00000000000000..3e87eb457d9911 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2024-10-01-02-31-13.gh-issue-124693.qzbXKB.rst @@ -0,0 +1 @@ +Fix a bug where :mod:`argparse` doesn't recognize negative complex numbers or negative numbers using scientific notation. From ee3167b9787bf9424d5637a224233de775450231 Mon Sep 17 00:00:00 2001 From: Victor Stinner Date: Thu, 10 Oct 2024 00:01:02 +0200 Subject: [PATCH 05/32] gh-125196: Add fast-path for int in PyUnicodeWriter_WriteStr() (#125214) PyUnicodeWriter_WriteStr() and PyUnicodeWriter_WriteRepr() now call directly _PyLong_FormatWriter() if the argument is an int. --- Objects/unicodeobject.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c index 4ea7d5f380e9a2..a9b33248163880 100644 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -13632,6 +13632,10 @@ _PyUnicodeWriter_WriteStr(_PyUnicodeWriter *writer, PyObject *str) int PyUnicodeWriter_WriteStr(PyUnicodeWriter *writer, PyObject *obj) { + if (Py_TYPE(obj) == &PyLong_Type) { + return _PyLong_FormatWriter((_PyUnicodeWriter*)writer, obj, 10, 0); + } + PyObject *str = PyObject_Str(obj); if (str == NULL) { return -1; @@ -13646,6 +13650,10 @@ PyUnicodeWriter_WriteStr(PyUnicodeWriter *writer, PyObject *obj) int PyUnicodeWriter_WriteRepr(PyUnicodeWriter *writer, PyObject *obj) { + if (Py_TYPE(obj) == &PyLong_Type) { + return _PyLong_FormatWriter((_PyUnicodeWriter*)writer, obj, 10, 0); + } + PyObject *repr = PyObject_Repr(obj); if (repr == NULL) { return -1; From 1877543d03d323d581b5fc0f19eff501926ba151 Mon Sep 17 00:00:00 2001 From: Victor Stinner Date: Thu, 10 Oct 2024 00:04:50 +0200 Subject: [PATCH 06/32] gh-125196: Use PyUnicodeWriter for repr(structseq) (#125219) Replace the private _PyUnicodeWriter with the public PyUnicodeWriter. * Avoid temporary PyUnicode_DecodeUTF8(): call PyUnicodeWriter_WriteUTF8() instead. * Avoid temporary PyObject_Repr(): call PyUnicodeWriter_WriteRepr() instead. --- Objects/structseq.c | 65 +++++++++++++++++---------------------------- 1 file changed, 24 insertions(+), 41 deletions(-) diff --git a/Objects/structseq.c b/Objects/structseq.c index ee3dbf9d4c047a..6092742835400b 100644 --- a/Objects/structseq.c +++ b/Objects/structseq.c @@ -266,83 +266,66 @@ static PyObject * structseq_repr(PyStructSequence *obj) { PyTypeObject *typ = Py_TYPE(obj); - _PyUnicodeWriter writer; - /* Write "typename(" */ - PyObject *type_name = PyUnicode_DecodeUTF8(typ->tp_name, - strlen(typ->tp_name), - NULL); - if (type_name == NULL) { + // count 5 characters per item: "x=1, " + Py_ssize_t type_name_len = strlen(typ->tp_name); + Py_ssize_t prealloc = (type_name_len + 1 + + VISIBLE_SIZE(obj) * 5 + 1); + PyUnicodeWriter *writer = PyUnicodeWriter_Create(prealloc); + if (writer == NULL) { return NULL; } - _PyUnicodeWriter_Init(&writer); - writer.overallocate = 1; - /* count 5 characters per item: "x=1, " */ - writer.min_length = (PyUnicode_GET_LENGTH(type_name) + 1 - + VISIBLE_SIZE(obj) * 5 + 1); - - if (_PyUnicodeWriter_WriteStr(&writer, type_name) < 0) { - Py_DECREF(type_name); + // Write "typename(" + if (PyUnicodeWriter_WriteUTF8(writer, typ->tp_name, type_name_len) < 0) { goto error; } - Py_DECREF(type_name); - - if (_PyUnicodeWriter_WriteChar(&writer, '(') < 0) { + if (PyUnicodeWriter_WriteChar(writer, '(') < 0) { goto error; } for (Py_ssize_t i=0; i < VISIBLE_SIZE(obj); i++) { if (i > 0) { - /* Write ", " */ - if (_PyUnicodeWriter_WriteASCIIString(&writer, ", ", 2) < 0) { + // Write ", " + if (PyUnicodeWriter_WriteChar(writer, ',') < 0) { + goto error; + } + if (PyUnicodeWriter_WriteChar(writer, ' ') < 0) { goto error; } } - /* Write "name=repr" */ + // Write name const char *name_utf8 = typ->tp_members[i].name; if (name_utf8 == NULL) { - PyErr_Format(PyExc_SystemError, "In structseq_repr(), member %zd name is NULL" + PyErr_Format(PyExc_SystemError, + "In structseq_repr(), member %zd name is NULL" " for type %.500s", i, typ->tp_name); goto error; } - - PyObject *name = PyUnicode_DecodeUTF8(name_utf8, strlen(name_utf8), NULL); - if (name == NULL) { - goto error; - } - if (_PyUnicodeWriter_WriteStr(&writer, name) < 0) { - Py_DECREF(name); + if (PyUnicodeWriter_WriteUTF8(writer, name_utf8, -1) < 0) { goto error; } - Py_DECREF(name); - if (_PyUnicodeWriter_WriteChar(&writer, '=') < 0) { + // Write "=" + repr(value) + if (PyUnicodeWriter_WriteChar(writer, '=') < 0) { goto error; } - PyObject *value = PyStructSequence_GetItem((PyObject*)obj, i); assert(value != NULL); - PyObject *repr = PyObject_Repr(value); - if (repr == NULL) { - goto error; - } - if (_PyUnicodeWriter_WriteStr(&writer, repr) < 0) { - Py_DECREF(repr); + if (PyUnicodeWriter_WriteRepr(writer, value) < 0) { goto error; } - Py_DECREF(repr); } - if (_PyUnicodeWriter_WriteChar(&writer, ')') < 0) { + if (PyUnicodeWriter_WriteChar(writer, ')') < 0) { goto error; } - return _PyUnicodeWriter_Finish(&writer); + return PyUnicodeWriter_Finish(writer); error: - _PyUnicodeWriter_Dealloc(&writer); + PyUnicodeWriter_Discard(writer); return NULL; } From c7d5d1d93b630e352abd9a0c93ea6d34c443f444 Mon Sep 17 00:00:00 2001 From: Pablo Galindo Salgado Date: Wed, 9 Oct 2024 23:30:56 +0100 Subject: [PATCH 07/32] gh-125140: Remove the current directory from sys.path when using pyrepl (GH-125212) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Pablo Galindo Co-authored-by: Łukasz Langa Co-authored-by: Peter Bierma --- Lib/site.py | 11 ++++++++--- .../2024-10-09-20-08-13.gh-issue-125140.YgNWRB.rst | 1 + 2 files changed, 9 insertions(+), 3 deletions(-) create mode 100644 Misc/NEWS.d/next/Security/2024-10-09-20-08-13.gh-issue-125140.YgNWRB.rst diff --git a/Lib/site.py b/Lib/site.py index b3194d79fb5ab8..07a6361fad44e5 100644 --- a/Lib/site.py +++ b/Lib/site.py @@ -503,9 +503,14 @@ def register_readline(): if PYTHON_BASIC_REPL: CAN_USE_PYREPL = False else: - import _pyrepl.readline - import _pyrepl.unix_console - from _pyrepl.main import CAN_USE_PYREPL + original_path = sys.path + sys.path = [p for p in original_path if p != ''] + try: + import _pyrepl.readline + import _pyrepl.unix_console + from _pyrepl.main import CAN_USE_PYREPL + finally: + sys.path = original_path except ImportError: return diff --git a/Misc/NEWS.d/next/Security/2024-10-09-20-08-13.gh-issue-125140.YgNWRB.rst b/Misc/NEWS.d/next/Security/2024-10-09-20-08-13.gh-issue-125140.YgNWRB.rst new file mode 100644 index 00000000000000..f4a49302372647 --- /dev/null +++ b/Misc/NEWS.d/next/Security/2024-10-09-20-08-13.gh-issue-125140.YgNWRB.rst @@ -0,0 +1 @@ +Remove the current directory from ``sys.path`` when using PyREPL. From 9ad55e85d78c5338b1ad170e614345652bd1b651 Mon Sep 17 00:00:00 2001 From: Serhiy Storchaka Date: Thu, 10 Oct 2024 02:30:14 +0300 Subject: [PATCH 08/32] gh-124969: Skip the test for ALT_DIGITS also on iOS (#125177) Skip the locale.ALT_DIGITS test on all Apple platforms, not just macOS. --- Lib/test/test__locale.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Lib/test/test__locale.py b/Lib/test/test__locale.py index 02d2acc6d1c417..e403c2a822788d 100644 --- a/Lib/test/test__locale.py +++ b/Lib/test/test__locale.py @@ -209,8 +209,8 @@ def test_alt_digits_nl_langinfo(self): with self.subTest(locale=loc): alt_digits = nl_langinfo(locale.ALT_DIGITS) self.assertIsInstance(alt_digits, tuple) - if count and not alt_digits and sys.platform == 'darwin': - self.skipTest(f'ALT_DIGITS is not set for locale {loc!r} on macOS') + if count and not alt_digits and support.is_apple: + self.skipTest(f'ALT_DIGITS is not set for locale {loc!r} on Apple platforms') self.assertEqual(len(alt_digits), count) for i in samples: self.assertEqual(alt_digits[i], samples[i]) From 1b2a5485f94ccbe43a45eb9990a5649ae3d2499e Mon Sep 17 00:00:00 2001 From: Victor Stinner Date: Thu, 10 Oct 2024 01:32:02 +0200 Subject: [PATCH 09/32] gh-125196: PyUnicodeWriter_Discard(NULL) does nothing (#125222) --- Doc/c-api/unicode.rst | 2 ++ Objects/listobject.c | 4 +--- Objects/unicodeobject.c | 3 +++ 3 files changed, 6 insertions(+), 3 deletions(-) diff --git a/Doc/c-api/unicode.rst b/Doc/c-api/unicode.rst index f5704cffa199a5..4daf9e9fdbf2f1 100644 --- a/Doc/c-api/unicode.rst +++ b/Doc/c-api/unicode.rst @@ -1600,6 +1600,8 @@ object. Discard the internal Unicode buffer and destroy the writer instance. + If *writer* is ``NULL``, no operation is performed. + .. c:function:: int PyUnicodeWriter_WriteChar(PyUnicodeWriter *writer, Py_UCS4 ch) Write the single Unicode character *ch* into *writer*. diff --git a/Objects/listobject.c b/Objects/listobject.c index e7090f20001a39..930aefde325a7c 100644 --- a/Objects/listobject.c +++ b/Objects/listobject.c @@ -563,9 +563,7 @@ list_repr_impl(PyListObject *v) return PyUnicodeWriter_Finish(writer); error: - if (writer != NULL) { - PyUnicodeWriter_Discard(writer); - } + PyUnicodeWriter_Discard(writer); Py_ReprLeave((PyObject *)v); return NULL; } diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c index a9b33248163880..93c1025b6a3cae 100644 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -13455,6 +13455,9 @@ PyUnicodeWriter_Create(Py_ssize_t length) void PyUnicodeWriter_Discard(PyUnicodeWriter *writer) { + if (writer == NULL) { + return; + } _PyUnicodeWriter_Dealloc((_PyUnicodeWriter*)writer); PyMem_Free(writer); } From 942916378aa6a0946b1385c2c7ca6935620d710a Mon Sep 17 00:00:00 2001 From: Victor Stinner Date: Thu, 10 Oct 2024 01:37:14 +0200 Subject: [PATCH 10/32] gh-125196: Use PyUnicodeWriter for repr(contextvars.Token) (#125220) Replace the private _PyUnicodeWriter with the public PyUnicodeWriter. --- Lib/test/test_context.py | 8 ++++++++ Python/context.c | 39 +++++++++++---------------------------- 2 files changed, 19 insertions(+), 28 deletions(-) diff --git a/Lib/test/test_context.py b/Lib/test/test_context.py index 255be306156c0b..b06b9df9f5b0b8 100644 --- a/Lib/test/test_context.py +++ b/Lib/test/test_context.py @@ -60,6 +60,14 @@ def test_context_var_repr_1(self): c.reset(t) self.assertIn(' used ', repr(t)) + @isolated_context + def test_token_repr_1(self): + c = contextvars.ContextVar('a') + tok = c.set(1) + self.assertRegex(repr(tok), + r"^ at 0x[0-9a-fA-F]+>$") + def test_context_subclassing_1(self): with self.assertRaisesRegex(TypeError, 'not an acceptable base type'): class MyContextVar(contextvars.ContextVar): diff --git a/Python/context.c b/Python/context.c index ddb03555f9e402..36e2677c398f59 100644 --- a/Python/context.c +++ b/Python/context.c @@ -1154,48 +1154,31 @@ token_tp_dealloc(PyContextToken *self) static PyObject * token_tp_repr(PyContextToken *self) { - _PyUnicodeWriter writer; - - _PyUnicodeWriter_Init(&writer); - - if (_PyUnicodeWriter_WriteASCIIString(&writer, "tok_used) { - if (_PyUnicodeWriter_WriteASCIIString(&writer, " used", 5) < 0) { + if (PyUnicodeWriter_WriteUTF8(writer, " used", 5) < 0) { goto error; } } - - if (_PyUnicodeWriter_WriteASCIIString(&writer, " var=", 5) < 0) { + if (PyUnicodeWriter_WriteUTF8(writer, " var=", 5) < 0) { goto error; } - - PyObject *var = PyObject_Repr((PyObject *)self->tok_var); - if (var == NULL) { + if (PyUnicodeWriter_WriteRepr(writer, (PyObject *)self->tok_var) < 0) { goto error; } - if (_PyUnicodeWriter_WriteStr(&writer, var) < 0) { - Py_DECREF(var); - goto error; - } - Py_DECREF(var); - - PyObject *addr = PyUnicode_FromFormat(" at %p>", self); - if (addr == NULL) { - goto error; - } - if (_PyUnicodeWriter_WriteStr(&writer, addr) < 0) { - Py_DECREF(addr); + if (PyUnicodeWriter_Format(writer, " at %p>", self) < 0) { goto error; } - Py_DECREF(addr); - - return _PyUnicodeWriter_Finish(&writer); + return PyUnicodeWriter_Finish(writer); error: - _PyUnicodeWriter_Dealloc(&writer); + PyUnicodeWriter_Discard(writer); return NULL; } From 99400930ac1d4e5e10a5ae30f8202d8bc2661e39 Mon Sep 17 00:00:00 2001 From: Richard Hansen Date: Wed, 9 Oct 2024 19:44:03 -0400 Subject: [PATCH 11/32] gh-124872: Refine contextvars documentation (#124773) * Add definitions for "context", "current context", and "context management protocol". * Update related definitions to be consistent with the new definitions. * Restructure the documentation for the `contextvars.Context` class to prepare for adding context manager support, and for consistency with the definitions. * Use `testcode` and `testoutput` to test the `Context.run` example. * Expand the documentation for the `Py_CONTEXT_EVENT_ENTER` and `Py_CONTEXT_EVENT_EXIT` events to clarify and to prepare for planned changes. --- Doc/c-api/contextvars.rst | 20 ++-- Doc/glossary.rst | 40 ++++++-- Doc/library/contextvars.rst | 98 +++++++++++++------ Include/cpython/context.h | 23 +++-- ...-09-29-18-14-52.gh-issue-119333.7tinr0.rst | 3 + 5 files changed, 132 insertions(+), 52 deletions(-) create mode 100644 Misc/NEWS.d/next/Documentation/2024-09-29-18-14-52.gh-issue-119333.7tinr0.rst diff --git a/Doc/c-api/contextvars.rst b/Doc/c-api/contextvars.rst index 0de135b232aaaf..59e74ba1ac7022 100644 --- a/Doc/c-api/contextvars.rst +++ b/Doc/c-api/contextvars.rst @@ -122,18 +122,24 @@ Context object management functions: .. c:type:: PyContextEvent Enumeration of possible context object watcher events: - - ``Py_CONTEXT_EVENT_ENTER`` - - ``Py_CONTEXT_EVENT_EXIT`` + + - ``Py_CONTEXT_EVENT_ENTER``: A context has been entered, causing the + :term:`current context` to switch to it. The object passed to the watch + callback is the now-current :class:`contextvars.Context` object. Each + enter event will eventually have a corresponding exit event for the same + context object after any subsequently entered contexts have themselves been + exited. + - ``Py_CONTEXT_EVENT_EXIT``: A context is about to be exited, which will + cause the :term:`current context` to switch back to what it was before the + context was entered. The object passed to the watch callback is the + still-current :class:`contextvars.Context` object. .. versionadded:: 3.14 .. c:type:: int (*PyContext_WatchCallback)(PyContextEvent event, PyContext* ctx) - Type of a context object watcher callback function. - If *event* is ``Py_CONTEXT_EVENT_ENTER``, then the callback is invoked - after *ctx* has been set as the current context for the current thread. - Otherwise, the callback is invoked before the deactivation of *ctx* as the current context - and the restoration of the previous contex object for the current thread. + Context object watcher callback function. The object passed to the callback + is event-specific; see :c:type:`PyContextEvent` for details. If the callback returns with an exception set, it must return ``-1``; this exception will be printed as an unraisable exception using diff --git a/Doc/glossary.rst b/Doc/glossary.rst index cb7e0a2b89d037..1d407732eef576 100644 --- a/Doc/glossary.rst +++ b/Doc/glossary.rst @@ -265,19 +265,33 @@ Glossary advanced mathematical feature. If you're not aware of a need for them, it's almost certain you can safely ignore them. + context + This term has different meanings depending on where and how it is used. + Some common meanings: + + * The temporary state or environment established by a :term:`context + manager` via a :keyword:`with` statement. + * The collection of key­value bindings associated with a particular + :class:`contextvars.Context` object and accessed via + :class:`~contextvars.ContextVar` objects. Also see :term:`context + variable`. + * A :class:`contextvars.Context` object. Also see :term:`current + context`. + + context management protocol + The :meth:`~object.__enter__` and :meth:`~object.__exit__` methods called + by the :keyword:`with` statement. See :pep:`343`. + context manager - An object which controls the environment seen in a :keyword:`with` - statement by defining :meth:`~object.__enter__` and :meth:`~object.__exit__` methods. - See :pep:`343`. + An object which implements the :term:`context management protocol` and + controls the environment seen in a :keyword:`with` statement. See + :pep:`343`. context variable - A variable which can have different values depending on its context. - This is similar to Thread-Local Storage in which each execution - thread may have a different value for a variable. However, with context - variables, there may be several contexts in one execution thread and the - main usage for context variables is to keep track of variables in + A variable whose value depends on which context is the :term:`current + context`. Values are accessed via :class:`contextvars.ContextVar` + objects. Context variables are primarily used to isolate state between concurrent asynchronous tasks. - See :mod:`contextvars`. contiguous .. index:: C-contiguous, Fortran contiguous @@ -311,6 +325,14 @@ Glossary is used when necessary to distinguish this implementation from others such as Jython or IronPython. + current context + The :term:`context` (:class:`contextvars.Context` object) that is + currently used by :class:`~contextvars.ContextVar` objects to access (get + or set) the values of :term:`context variables `. Each + thread has its own current context. Frameworks for executing asynchronous + tasks (see :mod:`asyncio`) associate each task with a context which + becomes the current context whenever the task starts or resumes execution. + decorator A function returning another function, usually applied as a function transformation using the ``@wrapper`` syntax. Common examples for diff --git a/Doc/library/contextvars.rst b/Doc/library/contextvars.rst index 2a79dfe8f81e26..2b1fb9fdd29cd8 100644 --- a/Doc/library/contextvars.rst +++ b/Doc/library/contextvars.rst @@ -144,51 +144,89 @@ Manual Context Management To get a copy of the current context use the :func:`~contextvars.copy_context` function. - Every thread will have a different top-level :class:`~contextvars.Context` - object. This means that a :class:`ContextVar` object behaves in a similar - fashion to :func:`threading.local` when values are assigned in different - threads. + Each thread has its own effective stack of :class:`!Context` objects. The + :term:`current context` is the :class:`!Context` object at the top of the + current thread's stack. All :class:`!Context` objects in the stacks are + considered to be *entered*. + + *Entering* a context, which can be done by calling its :meth:`~Context.run` + method, makes the context the current context by pushing it onto the top of + the current thread's context stack. + + *Exiting* from the current context, which can be done by returning from the + callback passed to the :meth:`~Context.run` method, restores the current + context to what it was before the context was entered by popping the context + off the top of the context stack. + + Since each thread has its own context stack, :class:`ContextVar` objects + behave in a similar fashion to :func:`threading.local` when values are + assigned in different threads. + + Attempting to enter an already entered context, including contexts entered in + other threads, raises a :exc:`RuntimeError`. + + After exiting a context, it can later be re-entered (from any thread). + + Any changes to :class:`ContextVar` values via the :meth:`ContextVar.set` + method are recorded in the current context. The :meth:`ContextVar.get` + method returns the value associated with the current context. Exiting a + context effectively reverts any changes made to context variables while the + context was entered (if needed, the values can be restored by re-entering the + context). Context implements the :class:`collections.abc.Mapping` interface. .. method:: run(callable, *args, **kwargs) - Execute ``callable(*args, **kwargs)`` code in the context object - the *run* method is called on. Return the result of the execution - or propagate an exception if one occurred. + Enters the Context, executes ``callable(*args, **kwargs)``, then exits the + Context. Returns *callable*'s return value, or propagates an exception if + one occurred. + + Example: + + .. testcode:: + + import contextvars - Any changes to any context variables that *callable* makes will - be contained in the context object:: + var = contextvars.ContextVar('var') + var.set('spam') + print(var.get()) # 'spam' - var = ContextVar('var') - var.set('spam') + ctx = contextvars.copy_context() - def main(): - # 'var' was set to 'spam' before - # calling 'copy_context()' and 'ctx.run(main)', so: - # var.get() == ctx[var] == 'spam' + def main(): + # 'var' was set to 'spam' before + # calling 'copy_context()' and 'ctx.run(main)', so: + print(var.get()) # 'spam' + print(ctx[var]) # 'spam' - var.set('ham') + var.set('ham') - # Now, after setting 'var' to 'ham': - # var.get() == ctx[var] == 'ham' + # Now, after setting 'var' to 'ham': + print(var.get()) # 'ham' + print(ctx[var]) # 'ham' - ctx = copy_context() + # Any changes that the 'main' function makes to 'var' + # will be contained in 'ctx'. + ctx.run(main) - # Any changes that the 'main' function makes to 'var' - # will be contained in 'ctx'. - ctx.run(main) + # The 'main()' function was run in the 'ctx' context, + # so changes to 'var' are contained in it: + print(ctx[var]) # 'ham' - # The 'main()' function was run in the 'ctx' context, - # so changes to 'var' are contained in it: - # ctx[var] == 'ham' + # However, outside of 'ctx', 'var' is still set to 'spam': + print(var.get()) # 'spam' - # However, outside of 'ctx', 'var' is still set to 'spam': - # var.get() == 'spam' + .. testoutput:: + :hide: - The method raises a :exc:`RuntimeError` when called on the same - context object from more than one OS thread, or when called - recursively. + spam + spam + spam + ham + ham + ham + spam .. method:: copy() diff --git a/Include/cpython/context.h b/Include/cpython/context.h index ec72966e82c6f9..d722b4d93134f7 100644 --- a/Include/cpython/context.h +++ b/Include/cpython/context.h @@ -28,15 +28,26 @@ PyAPI_FUNC(int) PyContext_Enter(PyObject *); PyAPI_FUNC(int) PyContext_Exit(PyObject *); typedef enum { - Py_CONTEXT_EVENT_ENTER, - Py_CONTEXT_EVENT_EXIT, + /* + * A context has been entered, causing the "current context" to switch to + * it. The object passed to the watch callback is the now-current + * contextvars.Context object. Each enter event will eventually have a + * corresponding exit event for the same context object after any + * subsequently entered contexts have themselves been exited. + */ + Py_CONTEXT_EVENT_ENTER, + /* + * A context is about to be exited, which will cause the "current context" + * to switch back to what it was before the context was entered. The + * object passed to the watch callback is the still-current + * contextvars.Context object. + */ + Py_CONTEXT_EVENT_EXIT, } PyContextEvent; /* - * Callback to be invoked when a context object is entered or exited. - * - * The callback is invoked with the event and a reference to - * the context after its entered and before its exited. + * Context object watcher callback function. The object passed to the callback + * is event-specific; see PyContextEvent for details. * * if the callback returns with an exception set, it must return -1. Otherwise * it should return 0 diff --git a/Misc/NEWS.d/next/Documentation/2024-09-29-18-14-52.gh-issue-119333.7tinr0.rst b/Misc/NEWS.d/next/Documentation/2024-09-29-18-14-52.gh-issue-119333.7tinr0.rst new file mode 100644 index 00000000000000..69a5c764d05a2e --- /dev/null +++ b/Misc/NEWS.d/next/Documentation/2024-09-29-18-14-52.gh-issue-119333.7tinr0.rst @@ -0,0 +1,3 @@ +Added definitions for :term:`context`, :term:`current context`, and +:term:`context management protocol`, updated related definitions to be +consistent, and expanded the documentation for :class:`contextvars.Context`. From 7a10cdec359750b5154490fa9e24475c90d05aab Mon Sep 17 00:00:00 2001 From: Adam Turner <9087854+AA-Turner@users.noreply.github.com> Date: Thu, 10 Oct 2024 10:34:55 +0100 Subject: [PATCH 12/32] Pin the doctest workflow to Ubuntu 22.04 (#125236) --- .github/workflows/reusable-docs.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/reusable-docs.yml b/.github/workflows/reusable-docs.yml index 4b021b3dc32f15..7755cb431bd301 100644 --- a/.github/workflows/reusable-docs.yml +++ b/.github/workflows/reusable-docs.yml @@ -95,7 +95,7 @@ jobs: # Run "doctest" on HEAD as new syntax doesn't exist in the latest stable release doctest: name: 'Doctest' - runs-on: ubuntu-latest + runs-on: ubuntu-22.04 timeout-minutes: 60 steps: - uses: actions/checkout@v4 From 1639d934b9180c278ac9c02be43cbb1beada494a Mon Sep 17 00:00:00 2001 From: Victor Stinner Date: Thu, 10 Oct 2024 12:11:06 +0200 Subject: [PATCH 13/32] gh-125196: Add a free list to PyUnicodeWriter (#125227) --- Include/internal/pycore_freelist_state.h | 2 ++ Objects/object.c | 1 + Objects/unicodeobject.c | 13 +++++++++---- 3 files changed, 12 insertions(+), 4 deletions(-) diff --git a/Include/internal/pycore_freelist_state.h b/Include/internal/pycore_freelist_state.h index 762c583ce94e9a..4e04cf431e0b31 100644 --- a/Include/internal/pycore_freelist_state.h +++ b/Include/internal/pycore_freelist_state.h @@ -20,6 +20,7 @@ extern "C" { # define Py_async_gen_asends_MAXFREELIST 80 # define Py_futureiters_MAXFREELIST 255 # define Py_object_stack_chunks_MAXFREELIST 4 +# define Py_unicode_writers_MAXFREELIST 1 // A generic freelist of either PyObjects or other data structures. struct _Py_freelist { @@ -44,6 +45,7 @@ struct _Py_freelists { struct _Py_freelist async_gen_asends; struct _Py_freelist futureiters; struct _Py_freelist object_stack_chunks; + struct _Py_freelist unicode_writers; }; #ifdef __cplusplus diff --git a/Objects/object.c b/Objects/object.c index 8d809158a6c1da..a97a900890320d 100644 --- a/Objects/object.c +++ b/Objects/object.c @@ -862,6 +862,7 @@ _PyObject_ClearFreeLists(struct _Py_freelists *freelists, int is_finalization) // stacks during GC, so emptying the free-list is counterproductive. clear_freelist(&freelists->object_stack_chunks, 1, PyMem_RawFree); } + clear_freelist(&freelists->unicode_writers, is_finalization, PyMem_Free); } /* diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c index 93c1025b6a3cae..b94a74c2c688a9 100644 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -46,6 +46,7 @@ OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. #include "pycore_codecs.h" // _PyCodec_Lookup() #include "pycore_critical_section.h" // Py_*_CRITICAL_SECTION_SEQUENCE_FAST #include "pycore_format.h" // F_LJUST +#include "pycore_freelist.h" // _Py_FREELIST_FREE(), _Py_FREELIST_POP() #include "pycore_initconfig.h" // _PyStatus_OK() #include "pycore_interp.h" // PyInterpreterState.fs_codec #include "pycore_long.h" // _PyLong_FormatWriter() @@ -13436,9 +13437,13 @@ PyUnicodeWriter_Create(Py_ssize_t length) } const size_t size = sizeof(_PyUnicodeWriter); - PyUnicodeWriter *pub_writer = (PyUnicodeWriter *)PyMem_Malloc(size); + PyUnicodeWriter *pub_writer; + pub_writer = _Py_FREELIST_POP_MEM(unicode_writers); if (pub_writer == NULL) { - return (PyUnicodeWriter *)PyErr_NoMemory(); + pub_writer = (PyUnicodeWriter *)PyMem_Malloc(size); + if (pub_writer == NULL) { + return (PyUnicodeWriter *)PyErr_NoMemory(); + } } _PyUnicodeWriter *writer = (_PyUnicodeWriter *)pub_writer; @@ -13459,7 +13464,7 @@ void PyUnicodeWriter_Discard(PyUnicodeWriter *writer) return; } _PyUnicodeWriter_Dealloc((_PyUnicodeWriter*)writer); - PyMem_Free(writer); + _Py_FREELIST_FREE(unicode_writers, writer, PyMem_Free); } @@ -13881,7 +13886,7 @@ PyUnicodeWriter_Finish(PyUnicodeWriter *writer) { PyObject *str = _PyUnicodeWriter_Finish((_PyUnicodeWriter*)writer); assert(((_PyUnicodeWriter*)writer)->buffer == NULL); - PyMem_Free(writer); + _Py_FREELIST_FREE(unicode_writers, writer, PyMem_Free); return str; } From 82dfdc328779778295075d791ee30a0308fb9af4 Mon Sep 17 00:00:00 2001 From: Victor Stinner Date: Thu, 10 Oct 2024 12:20:53 +0200 Subject: [PATCH 14/32] gh-125196: Use PyUnicodeWriter for repr(tuple) (#125242) --- Objects/tupleobject.c | 50 +++++++++++++++++++++---------------------- 1 file changed, 24 insertions(+), 26 deletions(-) diff --git a/Objects/tupleobject.c b/Objects/tupleobject.c index 47134697918052..f3132e0933ac30 100644 --- a/Objects/tupleobject.c +++ b/Objects/tupleobject.c @@ -232,56 +232,54 @@ tuple_repr(PyObject *self) return res > 0 ? PyUnicode_FromString("(...)") : NULL; } - _PyUnicodeWriter writer; - _PyUnicodeWriter_Init(&writer); - writer.overallocate = 1; + Py_ssize_t prealloc; if (n > 1) { - /* "(" + "1" + ", 2" * (len - 1) + ")" */ - writer.min_length = 1 + 1 + (2 + 1) * (n - 1) + 1; + // "(" + "1" + ", 2" * (len - 1) + ")" + prealloc = 1 + 1 + (2 + 1) * (n - 1) + 1; } else { - /* "(1,)" */ - writer.min_length = 4; + // "(1,)" + prealloc = 4; + } + PyUnicodeWriter *writer = PyUnicodeWriter_Create(prealloc); + if (writer == NULL) { + goto error; } - if (_PyUnicodeWriter_WriteChar(&writer, '(') < 0) + if (PyUnicodeWriter_WriteChar(writer, '(') < 0) { goto error; + } /* Do repr() on each element. */ for (Py_ssize_t i = 0; i < n; ++i) { - PyObject *s; - if (i > 0) { - if (_PyUnicodeWriter_WriteASCIIString(&writer, ", ", 2) < 0) + if (PyUnicodeWriter_WriteChar(writer, ',') < 0) { goto error; + } + if (PyUnicodeWriter_WriteChar(writer, ' ') < 0) { + goto error; + } } - s = PyObject_Repr(v->ob_item[i]); - if (s == NULL) - goto error; - - if (_PyUnicodeWriter_WriteStr(&writer, s) < 0) { - Py_DECREF(s); + if (PyUnicodeWriter_WriteRepr(writer, v->ob_item[i]) < 0) { goto error; } - Py_DECREF(s); } - writer.overallocate = 0; - if (n > 1) { - if (_PyUnicodeWriter_WriteChar(&writer, ')') < 0) + if (n == 1) { + if (PyUnicodeWriter_WriteChar(writer, ',') < 0) { goto error; + } } - else { - if (_PyUnicodeWriter_WriteASCIIString(&writer, ",)", 2) < 0) - goto error; + if (PyUnicodeWriter_WriteChar(writer, ')') < 0) { + goto error; } Py_ReprLeave((PyObject *)v); - return _PyUnicodeWriter_Finish(&writer); + return PyUnicodeWriter_Finish(writer); error: - _PyUnicodeWriter_Dealloc(&writer); + PyUnicodeWriter_Discard(writer); Py_ReprLeave((PyObject *)v); return NULL; } From f9ae5d1cee2f8927a71cd4f1f66f10050a4f658a Mon Sep 17 00:00:00 2001 From: Irit Katriel <1055913+iritkatriel@users.noreply.github.com> Date: Thu, 10 Oct 2024 11:21:12 +0100 Subject: [PATCH 15/32] gh-71784: [doc] add usage examples for traceback.TracebackException (#125189) Co-authored-by: Alex Waygood --- Doc/library/traceback.rst | 133 +++++++++++++++++++++++++++++++++----- 1 file changed, 118 insertions(+), 15 deletions(-) diff --git a/Doc/library/traceback.rst b/Doc/library/traceback.rst index 401e12be45f418..100a92b73d5497 100644 --- a/Doc/library/traceback.rst +++ b/Doc/library/traceback.rst @@ -8,11 +8,15 @@ -------------- -This module provides a standard interface to extract, format and print stack -traces of Python programs. It exactly mimics the behavior of the Python -interpreter when it prints a stack trace. This is useful when you want to print -stack traces under program control, such as in a "wrapper" around the -interpreter. +This module provides a standard interface to extract, format and print +stack traces of Python programs. It is more flexible than the +interpreter's default traceback display, and therefore makes it +possible to configure certain aspects of the output. Finally, +it contains a utility for capturing enough information about an +exception to print it later, without the need to save a reference +to the actual exception. Since exceptions can be the roots of large +objects graph, this utility can significantly improve +memory management. .. index:: pair: object; traceback @@ -29,7 +33,20 @@ which are assigned to the :attr:`~BaseException.__traceback__` field of Module :mod:`pdb` Interactive source code debugger for Python programs. -The module defines the following functions: +The module's API can be divided into two parts: + +* Module-level functions offering basic functionality, which are useful for interactive + inspection of exceptions and tracebacks. + +* :class:`TracebackException` class and its helper classes + :class:`StackSummary` and :class:`FrameSummary`. These offer both more + flexibility in the output generated and the ability to store the information + necessary for later formatting without holding references to actual exception + and traceback objects. + + +Module-Level Functions +---------------------- .. function:: print_tb(tb, limit=None, file=None) @@ -237,7 +254,6 @@ The module defines the following functions: .. versionadded:: 3.5 -The module also defines the following classes: :class:`!TracebackException` Objects ------------------------------------ @@ -245,12 +261,17 @@ The module also defines the following classes: .. versionadded:: 3.5 :class:`!TracebackException` objects are created from actual exceptions to -capture data for later printing in a lightweight fashion. +capture data for later printing. They offer a more lightweight method of +storing this information by avoiding holding references to +:ref:`traceback` and :ref:`frame` objects +In addition, they expose more options to configure the output compared to +the module-level functions described above. .. class:: TracebackException(exc_type, exc_value, exc_traceback, *, limit=None, lookup_lines=True, capture_locals=False, compact=False, max_group_width=15, max_group_depth=10) - Capture an exception for later rendering. *limit*, *lookup_lines* and - *capture_locals* are as for the :class:`StackSummary` class. + Capture an exception for later rendering. The meaning of *limit*, + *lookup_lines* and *capture_locals* are as for the :class:`StackSummary` + class. If *compact* is true, only data that is required by :class:`!TracebackException`'s :meth:`format` method @@ -509,8 +530,8 @@ in a :ref:`traceback `. .. _traceback-example: -Traceback Examples ------------------- +Examples of Using the Module-Level Functions +-------------------------------------------- This simple example implements a basic read-eval-print loop, similar to (but less useful than) the standard Python interactive interpreter loop. For a more @@ -549,8 +570,7 @@ exception and traceback: try: lumberjack() - except IndexError: - exc = sys.exception() + except IndexError as exc: print("*** print_tb:") traceback.print_tb(exc.__traceback__, limit=1, file=sys.stdout) print("*** print_exception:") @@ -653,5 +673,88 @@ This last example demonstrates the final few formatting functions: [' File "spam.py", line 3, in \n spam.eggs()\n', ' File "eggs.py", line 42, in eggs\n return "bacon"\n'] >>> an_error = IndexError('tuple index out of range') - >>> traceback.format_exception_only(type(an_error), an_error) + >>> traceback.format_exception_only(an_error) ['IndexError: tuple index out of range\n'] + + +Examples of Using :class:`TracebackException` +--------------------------------------------- + +With the helper class, we have more options:: + + >>> import sys + >>> from traceback import TracebackException + >>> + >>> def lumberjack(): + ... bright_side_of_life() + ... + >>> def bright_side_of_life(): + ... t = "bright", "side", "of", "life" + ... return t[5] + ... + >>> try: + ... lumberjack() + ... except IndexError as e: + ... exc = e + ... + >>> try: + ... try: + ... lumberjack() + ... except: + ... 1/0 + ... except Exception as e: + ... chained_exc = e + ... + >>> # limit works as with the module-level functions + >>> TracebackException.from_exception(exc, limit=-2).print() + Traceback (most recent call last): + File "", line 6, in lumberjack + bright_side_of_life() + ~~~~~~~~~~~~~~~~~~~^^ + File "", line 10, in bright_side_of_life + return t[5] + ~^^^ + IndexError: tuple index out of range + + >>> # capture_locals adds local variables in frames + >>> TracebackException.from_exception(exc, limit=-2, capture_locals=True).print() + Traceback (most recent call last): + File "", line 6, in lumberjack + bright_side_of_life() + ~~~~~~~~~~~~~~~~~~~^^ + File "", line 10, in bright_side_of_life + return t[5] + ~^^^ + t = ("bright", "side", "of", "life") + IndexError: tuple index out of range + + >>> # The *chain* kwarg to print() controls whether chained + >>> # exceptions are displayed + >>> TracebackException.from_exception(chained_exc).print() + Traceback (most recent call last): + File "", line 4, in + lumberjack() + ~~~~~~~~~~^^ + File "", line 7, in lumberjack + bright_side_of_life() + ~~~~~~~~~~~~~~~~~~~^^ + File "", line 11, in bright_side_of_life + return t[5] + ~^^^ + IndexError: tuple index out of range + + During handling of the above exception, another exception occurred: + + Traceback (most recent call last): + File "", line 6, in + 1/0 + ~^~ + ZeroDivisionError: division by zero + + >>> TracebackException.from_exception(chained_exc).print(chain=False) + Traceback (most recent call last): + File "", line 6, in + 1/0 + ~^~ + ZeroDivisionError: division by zero + From e4cab488d4445e8444932f3bed1c329c0d9e5038 Mon Sep 17 00:00:00 2001 From: Donghee Na Date: Thu, 10 Oct 2024 21:39:53 +0900 Subject: [PATCH 16/32] gh-124471: Set name for unnamed reusable workflow (#124475) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: Adam Turner <9087854+AA-Turner@users.noreply.github.com> Co-authored-by: Sviatoslav Sydorenko (Святослав Сидоренко) --- .github/workflows/reusable-change-detection.yml | 4 +--- .github/workflows/reusable-docs.yml | 2 +- .github/workflows/reusable-macos.yml | 2 ++ .github/workflows/reusable-tsan.yml | 2 ++ .github/workflows/reusable-ubuntu.yml | 2 ++ .github/workflows/reusable-wasi.yml | 2 ++ .github/workflows/reusable-windows-msi.yml | 2 +- .github/workflows/reusable-windows.yml | 2 ++ 8 files changed, 13 insertions(+), 5 deletions(-) diff --git a/.github/workflows/reusable-change-detection.yml b/.github/workflows/reusable-change-detection.yml index 6f599f75547ceb..5cd6fb39f1e12f 100644 --- a/.github/workflows/reusable-change-detection.yml +++ b/.github/workflows/reusable-change-detection.yml @@ -1,6 +1,4 @@ ---- - -name: Change detection +name: Reusable change detection on: # yamllint disable-line rule:truthy workflow_call: diff --git a/.github/workflows/reusable-docs.yml b/.github/workflows/reusable-docs.yml index 7755cb431bd301..3809f24dcc977e 100644 --- a/.github/workflows/reusable-docs.yml +++ b/.github/workflows/reusable-docs.yml @@ -1,4 +1,4 @@ -name: Docs +name: Reusable Docs on: workflow_call: diff --git a/.github/workflows/reusable-macos.yml b/.github/workflows/reusable-macos.yml index b4227545887ad1..b3a160fbbf8053 100644 --- a/.github/workflows/reusable-macos.yml +++ b/.github/workflows/reusable-macos.yml @@ -1,3 +1,5 @@ +name: Reusable macOS + on: workflow_call: inputs: diff --git a/.github/workflows/reusable-tsan.yml b/.github/workflows/reusable-tsan.yml index 27f4eacd86fd95..f4c976ca996410 100644 --- a/.github/workflows/reusable-tsan.yml +++ b/.github/workflows/reusable-tsan.yml @@ -1,3 +1,5 @@ +name: Reusable Thread Sanitizer + on: workflow_call: inputs: diff --git a/.github/workflows/reusable-ubuntu.yml b/.github/workflows/reusable-ubuntu.yml index 769f1210de4d3c..0cf40ba8a9b03b 100644 --- a/.github/workflows/reusable-ubuntu.yml +++ b/.github/workflows/reusable-ubuntu.yml @@ -1,3 +1,5 @@ +name: Reusable Ubuntu + on: workflow_call: inputs: diff --git a/.github/workflows/reusable-wasi.yml b/.github/workflows/reusable-wasi.yml index 1b1a68c0badc76..4c8137c958a312 100644 --- a/.github/workflows/reusable-wasi.yml +++ b/.github/workflows/reusable-wasi.yml @@ -1,3 +1,5 @@ +name: Reusable WASI + on: workflow_call: inputs: diff --git a/.github/workflows/reusable-windows-msi.yml b/.github/workflows/reusable-windows-msi.yml index fc34ab7c3eb1f2..abdb1a1982fef8 100644 --- a/.github/workflows/reusable-windows-msi.yml +++ b/.github/workflows/reusable-windows-msi.yml @@ -1,4 +1,4 @@ -name: TestsMSI +name: Reusable Windows MSI on: workflow_call: diff --git a/.github/workflows/reusable-windows.yml b/.github/workflows/reusable-windows.yml index e9c3c8e05a801c..dcfc62d7f5d145 100644 --- a/.github/workflows/reusable-windows.yml +++ b/.github/workflows/reusable-windows.yml @@ -1,3 +1,5 @@ +name: Reusable Windows + on: workflow_call: inputs: From 87d7315ac57250046372b0d9ae4619ba619c8c87 Mon Sep 17 00:00:00 2001 From: Sergey B Kirpichev Date: Thu, 10 Oct 2024 15:42:03 +0300 Subject: [PATCH 17/32] gh-125118: don't copy arbitrary values to _Bool in the struct module (GH-125169) memcopy'ing arbitrary values to _Bool variable triggers undefined behaviour. Avoid this. We assume that `false` is represented by all zero bytes. Credits to Alex Gaynor. Co-authored-by: Sam Gross Co-authored-by: Victor Stinner Co-authored-by: Petr Viktorin --- Lib/test/test_struct.py | 3 +++ .../Library/2024-10-09-07-09-00.gh-issue-125118.J9rQ1S.rst | 1 + Modules/_struct.c | 5 ++--- 3 files changed, 6 insertions(+), 3 deletions(-) create mode 100644 Misc/NEWS.d/next/Library/2024-10-09-07-09-00.gh-issue-125118.J9rQ1S.rst diff --git a/Lib/test/test_struct.py b/Lib/test/test_struct.py index e3193c7863fbae..04ec3ed0837c82 100644 --- a/Lib/test/test_struct.py +++ b/Lib/test/test_struct.py @@ -540,6 +540,9 @@ def __bool__(self): for c in [b'\x01', b'\x7f', b'\xff', b'\x0f', b'\xf0']: self.assertTrue(struct.unpack('>?', c)[0]) + self.assertTrue(struct.unpack(' Date: Thu, 10 Oct 2024 21:57:13 +0900 Subject: [PATCH 18/32] gh-124153: Simplify PyType_GetBaseByToken (GH-124488) --- Objects/typeobject.c | 123 +++++++++++++++---------------------------- 1 file changed, 43 insertions(+), 80 deletions(-) diff --git a/Objects/typeobject.c b/Objects/typeobject.c index 6484e8921f8122..5380633fa1149e 100644 --- a/Objects/typeobject.c +++ b/Objects/typeobject.c @@ -5269,11 +5269,10 @@ PyType_GetModuleByDef(PyTypeObject *type, PyModuleDef *def) static PyTypeObject * -get_base_by_token_recursive(PyTypeObject *type, void *token) +get_base_by_token_recursive(PyObject *bases, void *token) { - assert(PyType_GetSlot(type, Py_tp_token) != token); - PyObject *bases = lookup_tp_bases(type); assert(bases != NULL); + PyTypeObject *res = NULL; Py_ssize_t n = PyTuple_GET_SIZE(bases); for (Py_ssize_t i = 0; i < n; i++) { PyTypeObject *base = _PyType_CAST(PyTuple_GET_ITEM(bases, i)); @@ -5281,112 +5280,76 @@ get_base_by_token_recursive(PyTypeObject *type, void *token) continue; } if (((PyHeapTypeObject*)base)->ht_token == token) { - return base; + res = base; + break; } - base = get_base_by_token_recursive(base, token); + base = get_base_by_token_recursive(lookup_tp_bases(base), token); if (base != NULL) { - return base; + res = base; + break; } } - return NULL; + return res; // Prefer to return recursively from one place } -static inline PyTypeObject * -get_base_by_token_from_mro(PyTypeObject *type, void *token) +int +PyType_GetBaseByToken(PyTypeObject *type, void *token, PyTypeObject **result) { - // Bypass lookup_tp_mro() as PyType_IsSubtype() does - PyObject *mro = type->tp_mro; - assert(mro != NULL); - assert(PyTuple_Check(mro)); - // mro_invoke() ensures that the type MRO cannot be empty. - assert(PyTuple_GET_SIZE(mro) >= 1); - // Also, the first item in the MRO is the type itself, which is supposed - // to be already checked by the caller. We skip it in the loop. - assert(PyTuple_GET_ITEM(mro, 0) == (PyObject *)type); - assert(PyType_GetSlot(type, Py_tp_token) != token); - - Py_ssize_t n = PyTuple_GET_SIZE(mro); - for (Py_ssize_t i = 1; i < n; i++) { - PyTypeObject *base = _PyType_CAST(PyTuple_GET_ITEM(mro, i)); - if (!_PyType_HasFeature(base, Py_TPFLAGS_HEAPTYPE)) { - continue; - } - if (((PyHeapTypeObject*)base)->ht_token == token) { - return base; - } + if (result != NULL) { + *result = NULL; } - return NULL; -} -static int -check_base_by_token(PyTypeObject *type, void *token) { - // Chain the branches, which will be optimized exclusive here if (token == NULL) { PyErr_Format(PyExc_SystemError, "PyType_GetBaseByToken called with token=NULL"); return -1; } - else if (!PyType_Check(type)) { + if (!PyType_Check(type)) { PyErr_Format(PyExc_TypeError, "expected a type, got a '%T' object", type); return -1; } - else if (!_PyType_HasFeature(type, Py_TPFLAGS_HEAPTYPE)) { - return 0; - } - else if (((PyHeapTypeObject*)type)->ht_token == token) { - return 1; - } - else if (type->tp_mro != NULL) { - // This will not be inlined - return get_base_by_token_from_mro(type, token) ? 1 : 0; - } - else { - return get_base_by_token_recursive(type, token) ? 1 : 0; - } -} -int -PyType_GetBaseByToken(PyTypeObject *type, void *token, PyTypeObject **result) -{ - if (result == NULL) { - // If the `result` is checked only once here, the subsequent - // branches will become trivial to optimize. - return check_base_by_token(type, token); - } - if (token == NULL || !PyType_Check(type)) { - *result = NULL; - return check_base_by_token(type, token); - } - - // Chain the branches, which will be optimized exclusive here - PyTypeObject *base; if (!_PyType_HasFeature(type, Py_TPFLAGS_HEAPTYPE)) { // No static type has a heaptype superclass, // which is ensured by type_ready_mro(). - *result = NULL; return 0; } - else if (((PyHeapTypeObject*)type)->ht_token == token) { - *result = (PyTypeObject *)Py_NewRef(type); + if (((PyHeapTypeObject*)type)->ht_token == token) { +found: + if (result != NULL) { + *result = (PyTypeObject *)Py_NewRef(type); + } return 1; } - else if (type->tp_mro != NULL) { - // Expect this to be inlined - base = get_base_by_token_from_mro(type, token); - } - else { - base = get_base_by_token_recursive(type, token); - } - if (base != NULL) { - *result = (PyTypeObject *)Py_NewRef(base); - return 1; - } - else { - *result = NULL; + PyObject *mro = type->tp_mro; // No lookup, following PyType_IsSubtype() + if (mro == NULL) { + PyTypeObject *base; + base = get_base_by_token_recursive(lookup_tp_bases(type), token); + if (base != NULL) { + // Copying the given type can cause a slowdown, + // unlike the overwrite below. + type = base; + goto found; + } return 0; } + // mro_invoke() ensures that the type MRO cannot be empty. + assert(PyTuple_GET_SIZE(mro) >= 1); + // Also, the first item in the MRO is the type itself, which + // we already checked above. We skip it in the loop. + assert(PyTuple_GET_ITEM(mro, 0) == (PyObject *)type); + Py_ssize_t n = PyTuple_GET_SIZE(mro); + for (Py_ssize_t i = 1; i < n; i++) { + PyTypeObject *base = (PyTypeObject *)PyTuple_GET_ITEM(mro, i); + if (_PyType_HasFeature(base, Py_TPFLAGS_HEAPTYPE) + && ((PyHeapTypeObject*)base)->ht_token == token) { + type = base; + goto found; + } + } + return 0; } From c914212474792312bb125211bae5719650fe2f58 Mon Sep 17 00:00:00 2001 From: Victor Stinner Date: Thu, 10 Oct 2024 15:33:00 +0200 Subject: [PATCH 19/32] gh-125196: Use PyUnicodeWriter for JSON encoder (#125249) Replace the private _PyUnicodeWriter with the public PyUnicodeWriter. --- Modules/_json.c | 87 ++++++++++++++++++++++++++++--------------------- 1 file changed, 49 insertions(+), 38 deletions(-) diff --git a/Modules/_json.c b/Modules/_json.c index 9e29de0f22465f..ce0093ab431d05 100644 --- a/Modules/_json.c +++ b/Modules/_json.c @@ -86,11 +86,11 @@ encoder_dealloc(PyObject *self); static int encoder_clear(PyEncoderObject *self); static int -encoder_listencode_list(PyEncoderObject *s, _PyUnicodeWriter *writer, PyObject *seq, PyObject *newline_indent); +encoder_listencode_list(PyEncoderObject *s, PyUnicodeWriter *writer, PyObject *seq, PyObject *newline_indent); static int -encoder_listencode_obj(PyEncoderObject *s, _PyUnicodeWriter *writer, PyObject *obj, PyObject *newline_indent); +encoder_listencode_obj(PyEncoderObject *s, PyUnicodeWriter *writer, PyObject *obj, PyObject *newline_indent); static int -encoder_listencode_dict(PyEncoderObject *s, _PyUnicodeWriter *writer, PyObject *dct, PyObject *newline_indent); +encoder_listencode_dict(PyEncoderObject *s, PyUnicodeWriter *writer, PyObject *dct, PyObject *newline_indent); static PyObject * _encoded_const(PyObject *obj); static void @@ -1268,38 +1268,39 @@ encoder_call(PyEncoderObject *self, PyObject *args, PyObject *kwds) { /* Python callable interface to encode_listencode_obj */ static char *kwlist[] = {"obj", "_current_indent_level", NULL}; - PyObject *obj, *result; + PyObject *obj; Py_ssize_t indent_level; - _PyUnicodeWriter writer; if (!PyArg_ParseTupleAndKeywords(args, kwds, "On:_iterencode", kwlist, - &obj, &indent_level)) + &obj, &indent_level)) return NULL; - _PyUnicodeWriter_Init(&writer); - writer.overallocate = 1; + PyUnicodeWriter *writer = PyUnicodeWriter_Create(0); + if (writer == NULL) { + return NULL; + } PyObject *newline_indent = NULL; if (self->indent != Py_None) { newline_indent = _create_newline_indent(self->indent, indent_level); if (newline_indent == NULL) { - _PyUnicodeWriter_Dealloc(&writer); + PyUnicodeWriter_Discard(writer); return NULL; } } - if (encoder_listencode_obj(self, &writer, obj, newline_indent)) { - _PyUnicodeWriter_Dealloc(&writer); + if (encoder_listencode_obj(self, writer, obj, newline_indent)) { + PyUnicodeWriter_Discard(writer); Py_XDECREF(newline_indent); return NULL; } Py_XDECREF(newline_indent); - result = PyTuple_New(1); - if (result == NULL || - PyTuple_SetItem(result, 0, _PyUnicodeWriter_Finish(&writer)) < 0) { - Py_XDECREF(result); + PyObject *str = PyUnicodeWriter_Finish(writer); + if (str == NULL) { return NULL; } + PyObject *result = PyTuple_Pack(1, str); + Py_DECREF(str); return result; } @@ -1370,16 +1371,16 @@ encoder_encode_string(PyEncoderObject *s, PyObject *obj) } static int -_steal_accumulate(_PyUnicodeWriter *writer, PyObject *stolen) +_steal_accumulate(PyUnicodeWriter *writer, PyObject *stolen) { /* Append stolen and then decrement its reference count */ - int rval = _PyUnicodeWriter_WriteStr(writer, stolen); + int rval = PyUnicodeWriter_WriteStr(writer, stolen); Py_DECREF(stolen); return rval; } static int -encoder_listencode_obj(PyEncoderObject *s, _PyUnicodeWriter *writer, +encoder_listencode_obj(PyEncoderObject *s, PyUnicodeWriter *writer, PyObject *obj, PyObject *newline_indent) { /* Encode Python object obj to a JSON term */ @@ -1387,13 +1388,13 @@ encoder_listencode_obj(PyEncoderObject *s, _PyUnicodeWriter *writer, int rv; if (obj == Py_None) { - return _PyUnicodeWriter_WriteASCIIString(writer, "null", 4); + return PyUnicodeWriter_WriteUTF8(writer, "null", 4); } else if (obj == Py_True) { - return _PyUnicodeWriter_WriteASCIIString(writer, "true", 4); + return PyUnicodeWriter_WriteUTF8(writer, "true", 4); } else if (obj == Py_False) { - return _PyUnicodeWriter_WriteASCIIString(writer, "false", 5); + return PyUnicodeWriter_WriteUTF8(writer, "false", 5); } else if (PyUnicode_Check(obj)) { PyObject *encoded = encoder_encode_string(s, obj); @@ -1402,6 +1403,10 @@ encoder_listencode_obj(PyEncoderObject *s, _PyUnicodeWriter *writer, return _steal_accumulate(writer, encoded); } else if (PyLong_Check(obj)) { + if (PyLong_CheckExact(obj)) { + // Fast-path for exact integers + return PyUnicodeWriter_WriteRepr(writer, obj); + } PyObject *encoded = PyLong_Type.tp_repr(obj); if (encoded == NULL) return -1; @@ -1478,7 +1483,7 @@ encoder_listencode_obj(PyEncoderObject *s, _PyUnicodeWriter *writer, } static int -encoder_encode_key_value(PyEncoderObject *s, _PyUnicodeWriter *writer, bool *first, +encoder_encode_key_value(PyEncoderObject *s, PyUnicodeWriter *writer, bool *first, PyObject *dct, PyObject *key, PyObject *value, PyObject *newline_indent, PyObject *item_separator) @@ -1518,7 +1523,7 @@ encoder_encode_key_value(PyEncoderObject *s, _PyUnicodeWriter *writer, bool *fir *first = false; } else { - if (_PyUnicodeWriter_WriteStr(writer, item_separator) < 0) { + if (PyUnicodeWriter_WriteStr(writer, item_separator) < 0) { Py_DECREF(keystr); return -1; } @@ -1533,7 +1538,7 @@ encoder_encode_key_value(PyEncoderObject *s, _PyUnicodeWriter *writer, bool *fir if (_steal_accumulate(writer, encoded) < 0) { return -1; } - if (_PyUnicodeWriter_WriteStr(writer, s->key_separator) < 0) { + if (PyUnicodeWriter_WriteStr(writer, s->key_separator) < 0) { return -1; } if (encoder_listencode_obj(s, writer, value, newline_indent) < 0) { @@ -1544,7 +1549,7 @@ encoder_encode_key_value(PyEncoderObject *s, _PyUnicodeWriter *writer, bool *fir } static int -encoder_listencode_dict(PyEncoderObject *s, _PyUnicodeWriter *writer, +encoder_listencode_dict(PyEncoderObject *s, PyUnicodeWriter *writer, PyObject *dct, PyObject *newline_indent) { /* Encode Python dict dct a JSON term */ @@ -1555,8 +1560,10 @@ encoder_listencode_dict(PyEncoderObject *s, _PyUnicodeWriter *writer, PyObject *new_newline_indent = NULL; PyObject *separator_indent = NULL; - if (PyDict_GET_SIZE(dct) == 0) /* Fast path */ - return _PyUnicodeWriter_WriteASCIIString(writer, "{}", 2); + if (PyDict_GET_SIZE(dct) == 0) { + /* Fast path */ + return PyUnicodeWriter_WriteUTF8(writer, "{}", 2); + } if (s->markers != Py_None) { int has_key; @@ -1574,8 +1581,9 @@ encoder_listencode_dict(PyEncoderObject *s, _PyUnicodeWriter *writer, } } - if (_PyUnicodeWriter_WriteChar(writer, '{')) + if (PyUnicodeWriter_WriteChar(writer, '{')) { goto bail; + } PyObject *current_item_separator = s->item_separator; // borrowed reference if (s->indent != Py_None) { @@ -1589,7 +1597,7 @@ encoder_listencode_dict(PyEncoderObject *s, _PyUnicodeWriter *writer, } // update item separator with a borrowed reference current_item_separator = separator_indent; - if (_PyUnicodeWriter_WriteStr(writer, new_newline_indent) < 0) { + if (PyUnicodeWriter_WriteStr(writer, new_newline_indent) < 0) { goto bail; } } @@ -1635,13 +1643,14 @@ encoder_listencode_dict(PyEncoderObject *s, _PyUnicodeWriter *writer, Py_CLEAR(new_newline_indent); Py_CLEAR(separator_indent); - if (_PyUnicodeWriter_WriteStr(writer, newline_indent) < 0) { + if (PyUnicodeWriter_WriteStr(writer, newline_indent) < 0) { goto bail; } } - if (_PyUnicodeWriter_WriteChar(writer, '}')) + if (PyUnicodeWriter_WriteChar(writer, '}')) { goto bail; + } return 0; bail: @@ -1653,7 +1662,7 @@ encoder_listencode_dict(PyEncoderObject *s, _PyUnicodeWriter *writer, } static int -encoder_listencode_list(PyEncoderObject *s, _PyUnicodeWriter *writer, +encoder_listencode_list(PyEncoderObject *s, PyUnicodeWriter *writer, PyObject *seq, PyObject *newline_indent) { PyObject *ident = NULL; @@ -1668,7 +1677,7 @@ encoder_listencode_list(PyEncoderObject *s, _PyUnicodeWriter *writer, return -1; if (PySequence_Fast_GET_SIZE(s_fast) == 0) { Py_DECREF(s_fast); - return _PyUnicodeWriter_WriteASCIIString(writer, "[]", 2); + return PyUnicodeWriter_WriteUTF8(writer, "[]", 2); } if (s->markers != Py_None) { @@ -1687,8 +1696,9 @@ encoder_listencode_list(PyEncoderObject *s, _PyUnicodeWriter *writer, } } - if (_PyUnicodeWriter_WriteChar(writer, '[')) + if (PyUnicodeWriter_WriteChar(writer, '[')) { goto bail; + } PyObject *separator = s->item_separator; // borrowed reference if (s->indent != Py_None) { @@ -1697,7 +1707,7 @@ encoder_listencode_list(PyEncoderObject *s, _PyUnicodeWriter *writer, goto bail; } - if (_PyUnicodeWriter_WriteStr(writer, new_newline_indent) < 0) { + if (PyUnicodeWriter_WriteStr(writer, new_newline_indent) < 0) { goto bail; } @@ -1710,7 +1720,7 @@ encoder_listencode_list(PyEncoderObject *s, _PyUnicodeWriter *writer, for (i = 0; i < PySequence_Fast_GET_SIZE(s_fast); i++) { PyObject *obj = PySequence_Fast_GET_ITEM(s_fast, i); if (i) { - if (_PyUnicodeWriter_WriteStr(writer, separator) < 0) + if (PyUnicodeWriter_WriteStr(writer, separator) < 0) goto bail; } if (encoder_listencode_obj(s, writer, obj, new_newline_indent)) { @@ -1727,13 +1737,14 @@ encoder_listencode_list(PyEncoderObject *s, _PyUnicodeWriter *writer, if (s->indent != Py_None) { Py_CLEAR(new_newline_indent); Py_CLEAR(separator_indent); - if (_PyUnicodeWriter_WriteStr(writer, newline_indent) < 0) { + if (PyUnicodeWriter_WriteStr(writer, newline_indent) < 0) { goto bail; } } - if (_PyUnicodeWriter_WriteChar(writer, ']')) + if (PyUnicodeWriter_WriteChar(writer, ']')) { goto bail; + } Py_DECREF(s_fast); return 0; From 01fc3b34cc6994bc83b6540da3a8573e79dfbb56 Mon Sep 17 00:00:00 2001 From: Petr Viktorin Date: Thu, 10 Oct 2024 16:27:52 +0200 Subject: [PATCH 20/32] gh-124570: ctypes: Run some Structure tests on Union as well (GH-124976) - Move some Structure tests to test_structunion; use a common base test class + two subclasses to run them on Union too - Remove test_union for now as it's redundant Note: `test_simple_structs` & `test_simple_unions` are in the common file because they share `formats`. --- Lib/test/test_ctypes/test_structunion.py | 353 +++++++++++++++++++++++ Lib/test/test_ctypes/test_structures.py | 337 +--------------------- Lib/test/test_ctypes/test_unions.py | 35 --- 3 files changed, 368 insertions(+), 357 deletions(-) create mode 100644 Lib/test/test_ctypes/test_structunion.py delete mode 100644 Lib/test/test_ctypes/test_unions.py diff --git a/Lib/test/test_ctypes/test_structunion.py b/Lib/test/test_ctypes/test_structunion.py new file mode 100644 index 00000000000000..973ac3b2f1919d --- /dev/null +++ b/Lib/test/test_ctypes/test_structunion.py @@ -0,0 +1,353 @@ +"""Common tests for ctypes.Structure and ctypes.Union""" + +import unittest +from ctypes import (Structure, Union, POINTER, sizeof, alignment, + c_char, c_byte, c_ubyte, + c_short, c_ushort, c_int, c_uint, + c_long, c_ulong, c_longlong, c_ulonglong, c_float, c_double) +from ._support import (_CData, PyCStructType, UnionType, + Py_TPFLAGS_DISALLOW_INSTANTIATION, + Py_TPFLAGS_IMMUTABLETYPE) +from struct import calcsize + + +class StructUnionTestBase: + formats = {"c": c_char, + "b": c_byte, + "B": c_ubyte, + "h": c_short, + "H": c_ushort, + "i": c_int, + "I": c_uint, + "l": c_long, + "L": c_ulong, + "q": c_longlong, + "Q": c_ulonglong, + "f": c_float, + "d": c_double, + } + + def test_subclass(self): + class X(self.cls): + _fields_ = [("a", c_int)] + + class Y(X): + _fields_ = [("b", c_int)] + + class Z(X): + pass + + self.assertEqual(sizeof(X), sizeof(c_int)) + self.check_sizeof(Y, + struct_size=sizeof(c_int)*2, + union_size=sizeof(c_int)) + self.assertEqual(sizeof(Z), sizeof(c_int)) + self.assertEqual(X._fields_, [("a", c_int)]) + self.assertEqual(Y._fields_, [("b", c_int)]) + self.assertEqual(Z._fields_, [("a", c_int)]) + + def test_subclass_delayed(self): + class X(self.cls): + pass + self.assertEqual(sizeof(X), 0) + X._fields_ = [("a", c_int)] + + class Y(X): + pass + self.assertEqual(sizeof(Y), sizeof(X)) + Y._fields_ = [("b", c_int)] + + class Z(X): + pass + + self.assertEqual(sizeof(X), sizeof(c_int)) + self.check_sizeof(Y, + struct_size=sizeof(c_int)*2, + union_size=sizeof(c_int)) + self.assertEqual(sizeof(Z), sizeof(c_int)) + self.assertEqual(X._fields_, [("a", c_int)]) + self.assertEqual(Y._fields_, [("b", c_int)]) + self.assertEqual(Z._fields_, [("a", c_int)]) + + def test_inheritance_hierarchy(self): + self.assertEqual(self.cls.mro(), [self.cls, _CData, object]) + self.assertEqual(type(self.metacls), type) + + def test_type_flags(self): + for cls in self.cls, self.metacls: + with self.subTest(cls=cls): + self.assertTrue(cls.__flags__ & Py_TPFLAGS_IMMUTABLETYPE) + self.assertFalse(cls.__flags__ & Py_TPFLAGS_DISALLOW_INSTANTIATION) + + def test_metaclass_details(self): + # Abstract classes (whose metaclass __init__ was not called) can't be + # instantiated directly + NewClass = self.metacls.__new__(self.metacls, 'NewClass', + (self.cls,), {}) + for cls in self.cls, NewClass: + with self.subTest(cls=cls): + with self.assertRaisesRegex(TypeError, "abstract class"): + obj = cls() + + # Cannot call the metaclass __init__ more than once + class T(self.cls): + _fields_ = [("x", c_char), + ("y", c_char)] + with self.assertRaisesRegex(SystemError, "already initialized"): + self.metacls.__init__(T, 'ptr', (), {}) + + def test_alignment(self): + class X(self.cls): + _fields_ = [("x", c_char * 3)] + self.assertEqual(alignment(X), calcsize("s")) + self.assertEqual(sizeof(X), calcsize("3s")) + + class Y(self.cls): + _fields_ = [("x", c_char * 3), + ("y", c_int)] + self.assertEqual(alignment(Y), alignment(c_int)) + self.check_sizeof(Y, + struct_size=calcsize("3s i"), + union_size=max(calcsize("3s"), calcsize("i"))) + + class SI(self.cls): + _fields_ = [("a", X), + ("b", Y)] + self.assertEqual(alignment(SI), max(alignment(Y), alignment(X))) + self.check_sizeof(SI, + struct_size=calcsize("3s0i 3si 0i"), + union_size=max(calcsize("3s"), calcsize("i"))) + + class IS(self.cls): + _fields_ = [("b", Y), + ("a", X)] + + self.assertEqual(alignment(SI), max(alignment(X), alignment(Y))) + self.check_sizeof(IS, + struct_size=calcsize("3si 3s 0i"), + union_size=max(calcsize("3s"), calcsize("i"))) + + class XX(self.cls): + _fields_ = [("a", X), + ("b", X)] + self.assertEqual(alignment(XX), alignment(X)) + self.check_sizeof(XX, + struct_size=calcsize("3s 3s 0s"), + union_size=calcsize("3s")) + + def test_empty(self): + # I had problems with these + # + # Although these are pathological cases: Empty Structures! + class X(self.cls): + _fields_ = [] + + # Is this really the correct alignment, or should it be 0? + self.assertTrue(alignment(X) == 1) + self.assertTrue(sizeof(X) == 0) + + class XX(self.cls): + _fields_ = [("a", X), + ("b", X)] + + self.assertEqual(alignment(XX), 1) + self.assertEqual(sizeof(XX), 0) + + def test_fields(self): + # test the offset and size attributes of Structure/Union fields. + class X(self.cls): + _fields_ = [("x", c_int), + ("y", c_char)] + + self.assertEqual(X.x.offset, 0) + self.assertEqual(X.x.size, sizeof(c_int)) + + if self.cls == Structure: + self.assertEqual(X.y.offset, sizeof(c_int)) + else: + self.assertEqual(X.y.offset, 0) + self.assertEqual(X.y.size, sizeof(c_char)) + + # readonly + self.assertRaises((TypeError, AttributeError), setattr, X.x, "offset", 92) + self.assertRaises((TypeError, AttributeError), setattr, X.x, "size", 92) + + # XXX Should we check nested data types also? + # offset is always relative to the class... + + def test_invalid_field_types(self): + class POINT(self.cls): + pass + self.assertRaises(TypeError, setattr, POINT, "_fields_", [("x", 1), ("y", 2)]) + + def test_invalid_name(self): + # field name must be string + def declare_with_name(name): + class S(self.cls): + _fields_ = [(name, c_int)] + + self.assertRaises(TypeError, declare_with_name, b"x") + + def test_intarray_fields(self): + class SomeInts(self.cls): + _fields_ = [("a", c_int * 4)] + + # can use tuple to initialize array (but not list!) + self.assertEqual(SomeInts((1, 2)).a[:], [1, 2, 0, 0]) + self.assertEqual(SomeInts((1, 2)).a[::], [1, 2, 0, 0]) + self.assertEqual(SomeInts((1, 2)).a[::-1], [0, 0, 2, 1]) + self.assertEqual(SomeInts((1, 2)).a[::2], [1, 0]) + self.assertEqual(SomeInts((1, 2)).a[1:5:6], [2]) + self.assertEqual(SomeInts((1, 2)).a[6:4:-1], []) + self.assertEqual(SomeInts((1, 2, 3, 4)).a[:], [1, 2, 3, 4]) + self.assertEqual(SomeInts((1, 2, 3, 4)).a[::], [1, 2, 3, 4]) + # too long + # XXX Should raise ValueError?, not RuntimeError + self.assertRaises(RuntimeError, SomeInts, (1, 2, 3, 4, 5)) + + def test_huge_field_name(self): + # issue12881: segfault with large structure field names + def create_class(length): + class S(self.cls): + _fields_ = [('x' * length, c_int)] + + for length in [10 ** i for i in range(0, 8)]: + try: + create_class(length) + except MemoryError: + # MemoryErrors are OK, we just don't want to segfault + pass + + def test_abstract_class(self): + class X(self.cls): + _abstract_ = "something" + with self.assertRaisesRegex(TypeError, r"^abstract class$"): + X() + + def test_methods(self): + self.assertIn("in_dll", dir(type(self.cls))) + self.assertIn("from_address", dir(type(self.cls))) + self.assertIn("in_dll", dir(type(self.cls))) + + +class StructureTestCase(unittest.TestCase, StructUnionTestBase): + cls = Structure + metacls = PyCStructType + + def test_metaclass_name(self): + self.assertEqual(self.metacls.__name__, "PyCStructType") + + def check_sizeof(self, cls, *, struct_size, union_size): + self.assertEqual(sizeof(cls), struct_size) + + def test_simple_structs(self): + for code, tp in self.formats.items(): + class X(Structure): + _fields_ = [("x", c_char), + ("y", tp)] + self.assertEqual((sizeof(X), code), + (calcsize("c%c0%c" % (code, code)), code)) + + +class UnionTestCase(unittest.TestCase, StructUnionTestBase): + cls = Union + metacls = UnionType + + def test_metaclass_name(self): + self.assertEqual(self.metacls.__name__, "UnionType") + + def check_sizeof(self, cls, *, struct_size, union_size): + self.assertEqual(sizeof(cls), union_size) + + def test_simple_unions(self): + for code, tp in self.formats.items(): + class X(Union): + _fields_ = [("x", c_char), + ("y", tp)] + self.assertEqual((sizeof(X), code), + (calcsize("%c" % (code)), code)) + + +class PointerMemberTestBase: + def test(self): + # a Structure/Union with a POINTER field + class S(self.cls): + _fields_ = [("array", POINTER(c_int))] + + s = S() + # We can assign arrays of the correct type + s.array = (c_int * 3)(1, 2, 3) + items = [s.array[i] for i in range(3)] + self.assertEqual(items, [1, 2, 3]) + + s.array[0] = 42 + + items = [s.array[i] for i in range(3)] + self.assertEqual(items, [42, 2, 3]) + + s.array[0] = 1 + + items = [s.array[i] for i in range(3)] + self.assertEqual(items, [1, 2, 3]) + +class PointerMemberTestCase_Struct(unittest.TestCase, PointerMemberTestBase): + cls = Structure + + def test_none_to_pointer_fields(self): + class S(self.cls): + _fields_ = [("x", c_int), + ("p", POINTER(c_int))] + + s = S() + s.x = 12345678 + s.p = None + self.assertEqual(s.x, 12345678) + +class PointerMemberTestCase_Union(unittest.TestCase, PointerMemberTestBase): + cls = Union + + def test_none_to_pointer_fields(self): + class S(self.cls): + _fields_ = [("x", c_int), + ("p", POINTER(c_int))] + + s = S() + s.x = 12345678 + s.p = None + self.assertFalse(s.p) # NULL pointers are falsy + + +class TestRecursiveBase: + def test_contains_itself(self): + class Recursive(self.cls): + pass + + try: + Recursive._fields_ = [("next", Recursive)] + except AttributeError as details: + self.assertIn("Structure or union cannot contain itself", + str(details)) + else: + self.fail("Structure or union cannot contain itself") + + + def test_vice_versa(self): + class First(self.cls): + pass + class Second(self.cls): + pass + + First._fields_ = [("second", Second)] + + try: + Second._fields_ = [("first", First)] + except AttributeError as details: + self.assertIn("_fields_ is final", str(details)) + else: + self.fail("AttributeError not raised") + +class TestRecursiveStructure(unittest.TestCase, TestRecursiveBase): + cls = Structure + +class TestRecursiveUnion(unittest.TestCase, TestRecursiveBase): + cls = Union diff --git a/Lib/test/test_ctypes/test_structures.py b/Lib/test/test_ctypes/test_structures.py index 6cc09c8f2b5b59..0ec238e04b74cd 100644 --- a/Lib/test/test_ctypes/test_structures.py +++ b/Lib/test/test_ctypes/test_structures.py @@ -1,209 +1,24 @@ +"""Tests for ctypes.Structure + +Features common with Union should go in test_structunion.py instead. +""" + from platform import architecture as _architecture import struct import sys import unittest -from ctypes import (CDLL, Structure, Union, POINTER, sizeof, byref, alignment, +from ctypes import (CDLL, Structure, Union, POINTER, sizeof, byref, c_void_p, c_char, c_wchar, c_byte, c_ubyte, - c_uint8, c_uint16, c_uint32, - c_short, c_ushort, c_int, c_uint, - c_long, c_ulong, c_longlong, c_ulonglong, c_float, c_double) + c_uint8, c_uint16, c_uint32, c_int, c_uint, + c_long, c_ulong, c_longlong, c_float, c_double) from ctypes.util import find_library -from struct import calcsize from collections import namedtuple from test import support from test.support import import_helper _ctypes_test = import_helper.import_module("_ctypes_test") -from ._support import (_CData, PyCStructType, Py_TPFLAGS_DISALLOW_INSTANTIATION, - Py_TPFLAGS_IMMUTABLETYPE) - - -class SubclassesTest(unittest.TestCase): - def test_subclass(self): - class X(Structure): - _fields_ = [("a", c_int)] - - class Y(X): - _fields_ = [("b", c_int)] - - class Z(X): - pass - - self.assertEqual(sizeof(X), sizeof(c_int)) - self.assertEqual(sizeof(Y), sizeof(c_int)*2) - self.assertEqual(sizeof(Z), sizeof(c_int)) - self.assertEqual(X._fields_, [("a", c_int)]) - self.assertEqual(Y._fields_, [("b", c_int)]) - self.assertEqual(Z._fields_, [("a", c_int)]) - - def test_subclass_delayed(self): - class X(Structure): - pass - self.assertEqual(sizeof(X), 0) - X._fields_ = [("a", c_int)] - - class Y(X): - pass - self.assertEqual(sizeof(Y), sizeof(X)) - Y._fields_ = [("b", c_int)] - - class Z(X): - pass - - self.assertEqual(sizeof(X), sizeof(c_int)) - self.assertEqual(sizeof(Y), sizeof(c_int)*2) - self.assertEqual(sizeof(Z), sizeof(c_int)) - self.assertEqual(X._fields_, [("a", c_int)]) - self.assertEqual(Y._fields_, [("b", c_int)]) - self.assertEqual(Z._fields_, [("a", c_int)]) class StructureTestCase(unittest.TestCase): - formats = {"c": c_char, - "b": c_byte, - "B": c_ubyte, - "h": c_short, - "H": c_ushort, - "i": c_int, - "I": c_uint, - "l": c_long, - "L": c_ulong, - "q": c_longlong, - "Q": c_ulonglong, - "f": c_float, - "d": c_double, - } - - def test_inheritance_hierarchy(self): - self.assertEqual(Structure.mro(), [Structure, _CData, object]) - - self.assertEqual(PyCStructType.__name__, "PyCStructType") - self.assertEqual(type(PyCStructType), type) - - - def test_type_flags(self): - for cls in Structure, PyCStructType: - with self.subTest(cls=cls): - self.assertTrue(Structure.__flags__ & Py_TPFLAGS_IMMUTABLETYPE) - self.assertFalse(Structure.__flags__ & Py_TPFLAGS_DISALLOW_INSTANTIATION) - - def test_metaclass_details(self): - # Abstract classes (whose metaclass __init__ was not called) can't be - # instantiated directly - NewStructure = PyCStructType.__new__(PyCStructType, 'NewStructure', - (Structure,), {}) - for cls in Structure, NewStructure: - with self.subTest(cls=cls): - with self.assertRaisesRegex(TypeError, "abstract class"): - obj = cls() - - # Cannot call the metaclass __init__ more than once - class T(Structure): - _fields_ = [("x", c_char), - ("y", c_char)] - with self.assertRaisesRegex(SystemError, "already initialized"): - PyCStructType.__init__(T, 'ptr', (), {}) - - def test_simple_structs(self): - for code, tp in self.formats.items(): - class X(Structure): - _fields_ = [("x", c_char), - ("y", tp)] - self.assertEqual((sizeof(X), code), - (calcsize("c%c0%c" % (code, code)), code)) - - def test_unions(self): - for code, tp in self.formats.items(): - class X(Union): - _fields_ = [("x", c_char), - ("y", tp)] - self.assertEqual((sizeof(X), code), - (calcsize("%c" % (code)), code)) - - def test_struct_alignment(self): - class X(Structure): - _fields_ = [("x", c_char * 3)] - self.assertEqual(alignment(X), calcsize("s")) - self.assertEqual(sizeof(X), calcsize("3s")) - - class Y(Structure): - _fields_ = [("x", c_char * 3), - ("y", c_int)] - self.assertEqual(alignment(Y), alignment(c_int)) - self.assertEqual(sizeof(Y), calcsize("3si")) - - class SI(Structure): - _fields_ = [("a", X), - ("b", Y)] - self.assertEqual(alignment(SI), max(alignment(Y), alignment(X))) - self.assertEqual(sizeof(SI), calcsize("3s0i 3si 0i")) - - class IS(Structure): - _fields_ = [("b", Y), - ("a", X)] - - self.assertEqual(alignment(SI), max(alignment(X), alignment(Y))) - self.assertEqual(sizeof(IS), calcsize("3si 3s 0i")) - - class XX(Structure): - _fields_ = [("a", X), - ("b", X)] - self.assertEqual(alignment(XX), alignment(X)) - self.assertEqual(sizeof(XX), calcsize("3s 3s 0s")) - - def test_empty(self): - # I had problems with these - # - # Although these are pathological cases: Empty Structures! - class X(Structure): - _fields_ = [] - - class Y(Union): - _fields_ = [] - - # Is this really the correct alignment, or should it be 0? - self.assertTrue(alignment(X) == alignment(Y) == 1) - self.assertTrue(sizeof(X) == sizeof(Y) == 0) - - class XX(Structure): - _fields_ = [("a", X), - ("b", X)] - - self.assertEqual(alignment(XX), 1) - self.assertEqual(sizeof(XX), 0) - - def test_fields(self): - # test the offset and size attributes of Structure/Union fields. - class X(Structure): - _fields_ = [("x", c_int), - ("y", c_char)] - - self.assertEqual(X.x.offset, 0) - self.assertEqual(X.x.size, sizeof(c_int)) - - self.assertEqual(X.y.offset, sizeof(c_int)) - self.assertEqual(X.y.size, sizeof(c_char)) - - # readonly - self.assertRaises((TypeError, AttributeError), setattr, X.x, "offset", 92) - self.assertRaises((TypeError, AttributeError), setattr, X.x, "size", 92) - - class X(Union): - _fields_ = [("x", c_int), - ("y", c_char)] - - self.assertEqual(X.x.offset, 0) - self.assertEqual(X.x.size, sizeof(c_int)) - - self.assertEqual(X.y.offset, 0) - self.assertEqual(X.y.size, sizeof(c_char)) - - # readonly - self.assertRaises((TypeError, AttributeError), setattr, X.x, "offset", 92) - self.assertRaises((TypeError, AttributeError), setattr, X.x, "size", 92) - - # XXX Should we check nested data types also? - # offset is always relative to the class... - def test_packed(self): class X(Structure): _fields_ = [("a", c_byte), @@ -290,36 +105,6 @@ class POINT(Structure): pt = POINT(y=2, x=1) self.assertEqual((pt.x, pt.y), (1, 2)) - def test_invalid_field_types(self): - class POINT(Structure): - pass - self.assertRaises(TypeError, setattr, POINT, "_fields_", [("x", 1), ("y", 2)]) - - def test_invalid_name(self): - # field name must be string - def declare_with_name(name): - class S(Structure): - _fields_ = [(name, c_int)] - - self.assertRaises(TypeError, declare_with_name, b"x") - - def test_intarray_fields(self): - class SomeInts(Structure): - _fields_ = [("a", c_int * 4)] - - # can use tuple to initialize array (but not list!) - self.assertEqual(SomeInts((1, 2)).a[:], [1, 2, 0, 0]) - self.assertEqual(SomeInts((1, 2)).a[::], [1, 2, 0, 0]) - self.assertEqual(SomeInts((1, 2)).a[::-1], [0, 0, 2, 1]) - self.assertEqual(SomeInts((1, 2)).a[::2], [1, 0]) - self.assertEqual(SomeInts((1, 2)).a[1:5:6], [2]) - self.assertEqual(SomeInts((1, 2)).a[6:4:-1], []) - self.assertEqual(SomeInts((1, 2, 3, 4)).a[:], [1, 2, 3, 4]) - self.assertEqual(SomeInts((1, 2, 3, 4)).a[::], [1, 2, 3, 4]) - # too long - # XXX Should raise ValueError?, not RuntimeError - self.assertRaises(RuntimeError, SomeInts, (1, 2, 3, 4, 5)) - def test_nested_initializers(self): # test initializing nested structures class Phone(Structure): @@ -374,37 +159,12 @@ class Person(Structure): self.assertEqual(msg, "(Phone) TypeError: too many initializers") - def test_huge_field_name(self): - # issue12881: segfault with large structure field names - def create_class(length): - class S(Structure): - _fields_ = [('x' * length, c_int)] - - for length in [10 ** i for i in range(0, 8)]: - try: - create_class(length) - except MemoryError: - # MemoryErrors are OK, we just don't want to segfault - pass - def get_except(self, func, *args): try: func(*args) except Exception as detail: return detail.__class__, str(detail) - def test_abstract_class(self): - class X(Structure): - _abstract_ = "something" - # try 'X()' - cls, msg = self.get_except(eval, "X()", locals()) - self.assertEqual((cls, msg), (TypeError, "abstract class")) - - def test_methods(self): - self.assertIn("in_dll", dir(type(Structure))) - self.assertIn("from_address", dir(type(Structure))) - self.assertIn("in_dll", dir(type(Structure))) - def test_positional_args(self): # see also http://bugs.python.org/issue5042 class W(Structure): @@ -507,6 +267,8 @@ class X(Structure): self.assertEqual(s.second, got.second) def _test_issue18060(self, Vector): + # Regression tests for gh-62260 + # The call to atan2() should succeed if the # class fields were correctly cloned in the # subclasses. Otherwise, it will segfault. @@ -698,6 +460,7 @@ class Test3E(Structure): self.assertEqual(result.data[i], float(i+1)) def test_38368(self): + # Regression test for gh-82549 class U(Union): _fields_ = [ ('f1', c_uint8 * 16), @@ -719,9 +482,9 @@ class U(Union): self.assertEqual(f2, [0x4567, 0x0123, 0xcdef, 0x89ab, 0x3210, 0x7654, 0xba98, 0xfedc]) - @unittest.skipIf(True, 'Test disabled for now - see bpo-16575/bpo-16576') + @unittest.skipIf(True, 'Test disabled for now - see gh-60779/gh-60780') def test_union_by_value(self): - # See bpo-16575 + # See gh-60779 # These should mirror the structures in Modules/_ctypes/_ctypes_test.c @@ -800,9 +563,9 @@ class Test5(Structure): self.assertEqual(test5.nested.an_int, 0) self.assertEqual(test5.another_int, 0) - @unittest.skipIf(True, 'Test disabled for now - see bpo-16575/bpo-16576') + @unittest.skipIf(True, 'Test disabled for now - see gh-60779/gh-60780') def test_bitfield_by_value(self): - # See bpo-16576 + # See gh-60780 # These should mirror the structures in Modules/_ctypes/_ctypes_test.c @@ -882,75 +645,5 @@ class Test8(Union): 'a union by value, which is unsupported.') -class PointerMemberTestCase(unittest.TestCase): - - def test(self): - # a Structure with a POINTER field - class S(Structure): - _fields_ = [("array", POINTER(c_int))] - - s = S() - # We can assign arrays of the correct type - s.array = (c_int * 3)(1, 2, 3) - items = [s.array[i] for i in range(3)] - self.assertEqual(items, [1, 2, 3]) - - # The following are bugs, but are included here because the unittests - # also describe the current behaviour. - # - # This fails with SystemError: bad arg to internal function - # or with IndexError (with a patch I have) - - s.array[0] = 42 - - items = [s.array[i] for i in range(3)] - self.assertEqual(items, [42, 2, 3]) - - s.array[0] = 1 - - items = [s.array[i] for i in range(3)] - self.assertEqual(items, [1, 2, 3]) - - def test_none_to_pointer_fields(self): - class S(Structure): - _fields_ = [("x", c_int), - ("p", POINTER(c_int))] - - s = S() - s.x = 12345678 - s.p = None - self.assertEqual(s.x, 12345678) - - -class TestRecursiveStructure(unittest.TestCase): - def test_contains_itself(self): - class Recursive(Structure): - pass - - try: - Recursive._fields_ = [("next", Recursive)] - except AttributeError as details: - self.assertIn("Structure or union cannot contain itself", - str(details)) - else: - self.fail("Structure or union cannot contain itself") - - - def test_vice_versa(self): - class First(Structure): - pass - class Second(Structure): - pass - - First._fields_ = [("second", Second)] - - try: - Second._fields_ = [("first", First)] - except AttributeError as details: - self.assertIn("_fields_ is final", str(details)) - else: - self.fail("AttributeError not raised") - - if __name__ == '__main__': unittest.main() diff --git a/Lib/test/test_ctypes/test_unions.py b/Lib/test/test_ctypes/test_unions.py deleted file mode 100644 index e2dff0f22a9213..00000000000000 --- a/Lib/test/test_ctypes/test_unions.py +++ /dev/null @@ -1,35 +0,0 @@ -import unittest -from ctypes import Union, c_char -from ._support import (_CData, UnionType, Py_TPFLAGS_DISALLOW_INSTANTIATION, - Py_TPFLAGS_IMMUTABLETYPE) - - -class ArrayTestCase(unittest.TestCase): - def test_inheritance_hierarchy(self): - self.assertEqual(Union.mro(), [Union, _CData, object]) - - self.assertEqual(UnionType.__name__, "UnionType") - self.assertEqual(type(UnionType), type) - - def test_type_flags(self): - for cls in Union, UnionType: - with self.subTest(cls=Union): - self.assertTrue(Union.__flags__ & Py_TPFLAGS_IMMUTABLETYPE) - self.assertFalse(Union.__flags__ & Py_TPFLAGS_DISALLOW_INSTANTIATION) - - def test_metaclass_details(self): - # Abstract classes (whose metaclass __init__ was not called) can't be - # instantiated directly - NewUnion = UnionType.__new__(UnionType, 'NewUnion', - (Union,), {}) - for cls in Union, NewUnion: - with self.subTest(cls=cls): - with self.assertRaisesRegex(TypeError, "abstract class"): - obj = cls() - - # Cannot call the metaclass __init__ more than once - class T(Union): - _fields_ = [("x", c_char), - ("y", c_char)] - with self.assertRaisesRegex(SystemError, "already initialized"): - UnionType.__init__(T, 'ptr', (), {}) From c9014374c50d6ef64786d3e7d9c7e99053d5c9e2 Mon Sep 17 00:00:00 2001 From: Mark Shannon Date: Thu, 10 Oct 2024 18:19:08 +0100 Subject: [PATCH 21/32] GH-125174: Make immortal objects more robust, following design from PEP 683 (GH-125251) --- .../pycore_global_objects_fini_generated.h | 2 +- Include/internal/pycore_object.h | 34 ++++----------- Include/object.h | 2 +- Include/refcount.h | 42 ++++++++++--------- Lib/test/test_builtin.py | 4 +- ...-10-10-12-04-56.gh-issue-125174._8h6T7.rst | 4 ++ Modules/_asynciomodule.c | 2 +- Objects/bytesobject.c | 6 +-- Objects/dictobject.c | 10 +++-- Objects/object.c | 2 +- Objects/structseq.c | 2 +- Objects/typeobject.c | 8 ++-- Python/bytecodes.c | 4 +- Python/executor_cases.c.h | 4 +- Python/generated_cases.c.h | 4 +- Python/import.c | 2 +- Tools/cases_generator/analyzer.py | 1 - 17 files changed, 61 insertions(+), 72 deletions(-) create mode 100644 Misc/NEWS.d/next/Core_and_Builtins/2024-10-10-12-04-56.gh-issue-125174._8h6T7.rst diff --git a/Include/internal/pycore_global_objects_fini_generated.h b/Include/internal/pycore_global_objects_fini_generated.h index 3140a75a47c5ee..de68ef93257234 100644 --- a/Include/internal/pycore_global_objects_fini_generated.h +++ b/Include/internal/pycore_global_objects_fini_generated.h @@ -11,7 +11,7 @@ extern "C" { #ifdef Py_DEBUG static inline void _PyStaticObject_CheckRefcnt(PyObject *obj) { - if (Py_REFCNT(obj) < _Py_IMMORTAL_REFCNT) { + if (!_Py_IsImmortal(obj)) { fprintf(stderr, "Immortal Object has less refcnt than expected.\n"); _PyObject_Dump(obj); } diff --git a/Include/internal/pycore_object.h b/Include/internal/pycore_object.h index 0af13b1bcda20b..8832692d03c29e 100644 --- a/Include/internal/pycore_object.h +++ b/Include/internal/pycore_object.h @@ -16,9 +16,6 @@ extern "C" { #include "pycore_pystate.h" // _PyInterpreterState_GET() #include "pycore_uniqueid.h" // _PyType_IncrefSlow - -#define _Py_IMMORTAL_REFCNT_LOOSE ((_Py_IMMORTAL_REFCNT >> 1) + 1) - // This value is added to `ob_ref_shared` for objects that use deferred // reference counting so that they are not immediately deallocated when the // non-deferred reference count drops to zero. @@ -27,25 +24,8 @@ extern "C" { // `ob_ref_shared` are used for flags. #define _Py_REF_DEFERRED (PY_SSIZE_T_MAX / 8) -// gh-121528, gh-118997: Similar to _Py_IsImmortal() but be more loose when -// comparing the reference count to stay compatible with C extensions built -// with the stable ABI 3.11 or older. Such extensions implement INCREF/DECREF -// as refcnt++ and refcnt-- without taking in account immortal objects. For -// example, the reference count of an immortal object can change from -// _Py_IMMORTAL_REFCNT to _Py_IMMORTAL_REFCNT+1 (INCREF) or -// _Py_IMMORTAL_REFCNT-1 (DECREF). -// -// This function should only be used in assertions. Otherwise, _Py_IsImmortal() -// must be used instead. -static inline int _Py_IsImmortalLoose(PyObject *op) -{ -#if defined(Py_GIL_DISABLED) - return _Py_IsImmortal(op); -#else - return (op->ob_refcnt >= _Py_IMMORTAL_REFCNT_LOOSE); -#endif -} -#define _Py_IsImmortalLoose(op) _Py_IsImmortalLoose(_PyObject_CAST(op)) +/* For backwards compatibility -- Do not use this */ +#define _Py_IsImmortalLoose(op) _Py_IsImmortal /* Check if an object is consistent. For example, ensure that the reference @@ -97,7 +77,7 @@ PyAPI_FUNC(int) _PyObject_IsFreed(PyObject *); #else #define _PyObject_HEAD_INIT(type) \ { \ - .ob_refcnt = _Py_IMMORTAL_REFCNT, \ + .ob_refcnt = _Py_IMMORTAL_INITIAL_REFCNT, \ .ob_type = (type) \ } #endif @@ -184,7 +164,7 @@ PyAPI_FUNC(void) _Py_SetImmortalUntracked(PyObject *op); static inline void _Py_SetMortal(PyObject *op, Py_ssize_t refcnt) { if (op) { - assert(_Py_IsImmortalLoose(op)); + assert(_Py_IsImmortal(op)); #ifdef Py_GIL_DISABLED op->ob_tid = _Py_UNOWNED_TID; op->ob_ref_local = 0; @@ -316,7 +296,7 @@ static inline void _Py_INCREF_TYPE(PyTypeObject *type) { if (!_PyType_HasFeature(type, Py_TPFLAGS_HEAPTYPE)) { - assert(_Py_IsImmortalLoose(type)); + assert(_Py_IsImmortal(type)); _Py_INCREF_IMMORTAL_STAT_INC(); return; } @@ -357,7 +337,7 @@ static inline void _Py_DECREF_TYPE(PyTypeObject *type) { if (!_PyType_HasFeature(type, Py_TPFLAGS_HEAPTYPE)) { - assert(_Py_IsImmortalLoose(type)); + assert(_Py_IsImmortal(type)); _Py_DECREF_IMMORTAL_STAT_INC(); return; } @@ -393,7 +373,7 @@ _PyObject_Init(PyObject *op, PyTypeObject *typeobj) { assert(op != NULL); Py_SET_TYPE(op, typeobj); - assert(_PyType_HasFeature(typeobj, Py_TPFLAGS_HEAPTYPE) || _Py_IsImmortalLoose(typeobj)); + assert(_PyType_HasFeature(typeobj, Py_TPFLAGS_HEAPTYPE) || _Py_IsImmortal(typeobj)); _Py_INCREF_TYPE(typeobj); _Py_NewReference(op); } diff --git a/Include/object.h b/Include/object.h index 418f2196062df7..5be4dedadc20eb 100644 --- a/Include/object.h +++ b/Include/object.h @@ -81,7 +81,7 @@ whose size is determined when the object is allocated. #else #define PyObject_HEAD_INIT(type) \ { \ - { _Py_IMMORTAL_REFCNT }, \ + { _Py_IMMORTAL_INITIAL_REFCNT }, \ (type) \ }, #endif diff --git a/Include/refcount.h b/Include/refcount.h index 9a4e15065ecab8..141cbd34dd72e6 100644 --- a/Include/refcount.h +++ b/Include/refcount.h @@ -21,25 +21,30 @@ cleanup during runtime finalization. #if SIZEOF_VOID_P > 4 /* -In 64+ bit systems, an object will be marked as immortal by setting all of the -lower 32 bits of the reference count field, which is equal to: 0xFFFFFFFF +In 64+ bit systems, any object whose 32 bit reference count is >= 2**31 +will be treated as immortal. Using the lower 32 bits makes the value backwards compatible by allowing C-Extensions without the updated checks in Py_INCREF and Py_DECREF to safely -increase and decrease the objects reference count. The object would lose its -immortality, but the execution would still be correct. +increase and decrease the objects reference count. + +In order to offer sufficient resilience to C extensions using the stable ABI +compiled against 3.11 or earlier, we set the initial value near the +middle of the range (2**31, 2**32). That way the the refcount can be +off by ~1 billion without affecting immortality. Reference count increases will use saturated arithmetic, taking advantage of having all the lower 32 bits set, which will avoid the reference count to go beyond the refcount limit. Immortality checks for reference count decreases will be done by checking the bit sign flag in the lower 32 bits. + */ -#define _Py_IMMORTAL_REFCNT _Py_CAST(Py_ssize_t, UINT_MAX) +#define _Py_IMMORTAL_INITIAL_REFCNT ((Py_ssize_t)(3UL << 30)) #else /* -In 32 bit systems, an object will be marked as immortal by setting all of the -lower 30 bits of the reference count field, which is equal to: 0x3FFFFFFF +In 32 bit systems, an object will be treated as immortal if its reference +count equals or exceeds _Py_IMMORTAL_MINIMUM_REFCNT (2**30). Using the lower 30 bits makes the value backwards compatible by allowing C-Extensions without the updated checks in Py_INCREF and Py_DECREF to safely @@ -47,9 +52,10 @@ increase and decrease the objects reference count. The object would lose its immortality, but the execution would still be correct. Reference count increases and decreases will first go through an immortality -check by comparing the reference count field to the immortality reference count. +check by comparing the reference count field to the minimum immortality refcount. */ -#define _Py_IMMORTAL_REFCNT _Py_CAST(Py_ssize_t, UINT_MAX >> 2) +#define _Py_IMMORTAL_INITIAL_REFCNT ((Py_ssize_t)(3L << 29)) +#define _Py_IMMORTAL_MINIMUM_REFCNT ((Py_ssize_t)(1L << 30)) #endif // Py_GIL_DISABLED builds indicate immortal objects using `ob_ref_local`, which is @@ -90,7 +96,7 @@ PyAPI_FUNC(Py_ssize_t) Py_REFCNT(PyObject *ob); #else uint32_t local = _Py_atomic_load_uint32_relaxed(&ob->ob_ref_local); if (local == _Py_IMMORTAL_REFCNT_LOCAL) { - return _Py_IMMORTAL_REFCNT; + return _Py_IMMORTAL_INITIAL_REFCNT; } Py_ssize_t shared = _Py_atomic_load_ssize_relaxed(&ob->ob_ref_shared); return _Py_STATIC_CAST(Py_ssize_t, local) + @@ -109,9 +115,9 @@ static inline Py_ALWAYS_INLINE int _Py_IsImmortal(PyObject *op) return (_Py_atomic_load_uint32_relaxed(&op->ob_ref_local) == _Py_IMMORTAL_REFCNT_LOCAL); #elif SIZEOF_VOID_P > 4 - return (_Py_CAST(PY_INT32_T, op->ob_refcnt) < 0); + return _Py_CAST(PY_INT32_T, op->ob_refcnt) < 0; #else - return (op->ob_refcnt == _Py_IMMORTAL_REFCNT); + return op->ob_refcnt >= _Py_IMMORTAL_MINIMUM_REFCNT; #endif } #define _Py_IsImmortal(op) _Py_IsImmortal(_PyObject_CAST(op)) @@ -236,7 +242,7 @@ static inline Py_ALWAYS_INLINE void Py_INCREF(PyObject *op) uint32_t new_local = local + 1; if (new_local == 0) { _Py_INCREF_IMMORTAL_STAT_INC(); - // local is equal to _Py_IMMORTAL_REFCNT: do nothing + // local is equal to _Py_IMMORTAL_REFCNT_LOCAL: do nothing return; } if (_Py_IsOwnedByCurrentThread(op)) { @@ -246,18 +252,14 @@ static inline Py_ALWAYS_INLINE void Py_INCREF(PyObject *op) _Py_atomic_add_ssize(&op->ob_ref_shared, (1 << _Py_REF_SHARED_SHIFT)); } #elif SIZEOF_VOID_P > 4 - // Portable saturated add, branching on the carry flag and set low bits PY_UINT32_T cur_refcnt = op->ob_refcnt_split[PY_BIG_ENDIAN]; - PY_UINT32_T new_refcnt = cur_refcnt + 1; - if (new_refcnt == 0) { + if (((int32_t)cur_refcnt) < 0) { + // the object is immortal _Py_INCREF_IMMORTAL_STAT_INC(); - // cur_refcnt is equal to _Py_IMMORTAL_REFCNT: the object is immortal, - // do nothing return; } - op->ob_refcnt_split[PY_BIG_ENDIAN] = new_refcnt; + op->ob_refcnt_split[PY_BIG_ENDIAN] = cur_refcnt + 1; #else - // Explicitly check immortality against the immortal value if (_Py_IsImmortal(op)) { _Py_INCREF_IMMORTAL_STAT_INC(); return; diff --git a/Lib/test/test_builtin.py b/Lib/test/test_builtin.py index d884f54940b471..eb5906f8944c8e 100644 --- a/Lib/test/test_builtin.py +++ b/Lib/test/test_builtin.py @@ -2574,9 +2574,9 @@ def __del__(self): class ImmortalTests(unittest.TestCase): if sys.maxsize < (1 << 32): - IMMORTAL_REFCOUNT = (1 << 30) - 1 + IMMORTAL_REFCOUNT = 3 << 29 else: - IMMORTAL_REFCOUNT = (1 << 32) - 1 + IMMORTAL_REFCOUNT = 3 << 30 IMMORTALS = (None, True, False, Ellipsis, NotImplemented, *range(-5, 257)) diff --git a/Misc/NEWS.d/next/Core_and_Builtins/2024-10-10-12-04-56.gh-issue-125174._8h6T7.rst b/Misc/NEWS.d/next/Core_and_Builtins/2024-10-10-12-04-56.gh-issue-125174._8h6T7.rst new file mode 100644 index 00000000000000..c7eaac32601bb3 --- /dev/null +++ b/Misc/NEWS.d/next/Core_and_Builtins/2024-10-10-12-04-56.gh-issue-125174._8h6T7.rst @@ -0,0 +1,4 @@ +Make the handling of reference counts of immortal objects more robust. +Immortal objects with reference counts that deviate from their original +reference count by up to a billion (half a billion on 32 bit builds) are +still counted as immortal. diff --git a/Modules/_asynciomodule.c b/Modules/_asynciomodule.c index 870084100a1b85..0a769c46b87ac8 100644 --- a/Modules/_asynciomodule.c +++ b/Modules/_asynciomodule.c @@ -1387,7 +1387,7 @@ FutureObj_get_state(FutureObj *fut, void *Py_UNUSED(ignored)) default: assert (0); } - assert(_Py_IsImmortalLoose(ret)); + assert(_Py_IsImmortal(ret)); return ret; } diff --git a/Objects/bytesobject.c b/Objects/bytesobject.c index bf58e55e100b3a..dcc1aba76abbed 100644 --- a/Objects/bytesobject.c +++ b/Objects/bytesobject.c @@ -46,7 +46,7 @@ Py_LOCAL_INLINE(Py_ssize_t) _PyBytesWriter_GetSize(_PyBytesWriter *writer, static inline PyObject* bytes_get_empty(void) { PyObject *empty = &EMPTY->ob_base.ob_base; - assert(_Py_IsImmortalLoose(empty)); + assert(_Py_IsImmortal(empty)); return empty; } @@ -119,7 +119,7 @@ PyBytes_FromStringAndSize(const char *str, Py_ssize_t size) } if (size == 1 && str != NULL) { op = CHARACTER(*str & 255); - assert(_Py_IsImmortalLoose(op)); + assert(_Py_IsImmortal(op)); return (PyObject *)op; } if (size == 0) { @@ -155,7 +155,7 @@ PyBytes_FromString(const char *str) } else if (size == 1) { op = CHARACTER(*str & 255); - assert(_Py_IsImmortalLoose(op)); + assert(_Py_IsImmortal(op)); return (PyObject *)op; } diff --git a/Objects/dictobject.c b/Objects/dictobject.c index adfd91d1e4d63b..12722eca6be5e5 100644 --- a/Objects/dictobject.c +++ b/Objects/dictobject.c @@ -416,6 +416,8 @@ _PyDict_DebugMallocStats(FILE *out) #define DK_MASK(dk) (DK_SIZE(dk)-1) +#define _Py_DICT_IMMORTAL_INITIAL_REFCNT PY_SSIZE_T_MIN + static void free_keys_object(PyDictKeysObject *keys, bool use_qsbr); /* PyDictKeysObject has refcounts like PyObject does, so we have the @@ -428,7 +430,8 @@ static void free_keys_object(PyDictKeysObject *keys, bool use_qsbr); static inline void dictkeys_incref(PyDictKeysObject *dk) { - if (FT_ATOMIC_LOAD_SSIZE_RELAXED(dk->dk_refcnt) == _Py_IMMORTAL_REFCNT) { + if (FT_ATOMIC_LOAD_SSIZE_RELAXED(dk->dk_refcnt) < 0) { + assert(FT_ATOMIC_LOAD_SSIZE_RELAXED(dk->dk_refcnt) == _Py_DICT_IMMORTAL_INITIAL_REFCNT); return; } #ifdef Py_REF_DEBUG @@ -440,7 +443,8 @@ dictkeys_incref(PyDictKeysObject *dk) static inline void dictkeys_decref(PyInterpreterState *interp, PyDictKeysObject *dk, bool use_qsbr) { - if (FT_ATOMIC_LOAD_SSIZE_RELAXED(dk->dk_refcnt) == _Py_IMMORTAL_REFCNT) { + if (FT_ATOMIC_LOAD_SSIZE_RELAXED(dk->dk_refcnt) < 0) { + assert(FT_ATOMIC_LOAD_SSIZE_RELAXED(dk->dk_refcnt) == _Py_DICT_IMMORTAL_INITIAL_REFCNT); return; } assert(FT_ATOMIC_LOAD_SSIZE(dk->dk_refcnt) > 0); @@ -586,7 +590,7 @@ estimate_log2_keysize(Py_ssize_t n) * (which cannot fail and thus can do no allocation). */ static PyDictKeysObject empty_keys_struct = { - _Py_IMMORTAL_REFCNT, /* dk_refcnt */ + _Py_DICT_IMMORTAL_INITIAL_REFCNT, /* dk_refcnt */ 0, /* dk_log2_size */ 0, /* dk_log2_index_bytes */ DICT_KEYS_UNICODE, /* dk_kind */ diff --git a/Objects/object.c b/Objects/object.c index a97a900890320d..27d06cc081259d 100644 --- a/Objects/object.c +++ b/Objects/object.c @@ -2453,7 +2453,7 @@ _Py_SetImmortalUntracked(PyObject *op) op->ob_ref_local = _Py_IMMORTAL_REFCNT_LOCAL; op->ob_ref_shared = 0; #else - op->ob_refcnt = _Py_IMMORTAL_REFCNT; + op->ob_refcnt = _Py_IMMORTAL_INITIAL_REFCNT; #endif } diff --git a/Objects/structseq.c b/Objects/structseq.c index 6092742835400b..56a7851b98788d 100644 --- a/Objects/structseq.c +++ b/Objects/structseq.c @@ -708,7 +708,7 @@ _PyStructSequence_FiniBuiltin(PyInterpreterState *interp, PyTypeObject *type) assert(type->tp_name != NULL); assert(type->tp_base == &PyTuple_Type); assert((type->tp_flags & _Py_TPFLAGS_STATIC_BUILTIN)); - assert(_Py_IsImmortalLoose(type)); + assert(_Py_IsImmortal(type)); // Cannot delete a type if it still has subclasses if (_PyType_HasSubclasses(type)) { diff --git a/Objects/typeobject.c b/Objects/typeobject.c index 5380633fa1149e..d90bb5825fd437 100644 --- a/Objects/typeobject.c +++ b/Objects/typeobject.c @@ -476,7 +476,7 @@ set_tp_bases(PyTypeObject *self, PyObject *bases, int initial) assert(PyTuple_GET_SIZE(bases) == 1); assert(PyTuple_GET_ITEM(bases, 0) == (PyObject *)self->tp_base); assert(self->tp_base->tp_flags & _Py_TPFLAGS_STATIC_BUILTIN); - assert(_Py_IsImmortalLoose(self->tp_base)); + assert(_Py_IsImmortal(self->tp_base)); } _Py_SetImmortal(bases); } @@ -493,7 +493,7 @@ clear_tp_bases(PyTypeObject *self, int final) Py_CLEAR(self->tp_bases); } else { - assert(_Py_IsImmortalLoose(self->tp_bases)); + assert(_Py_IsImmortal(self->tp_bases)); _Py_ClearImmortal(self->tp_bases); } } @@ -558,7 +558,7 @@ clear_tp_mro(PyTypeObject *self, int final) Py_CLEAR(self->tp_mro); } else { - assert(_Py_IsImmortalLoose(self->tp_mro)); + assert(_Py_IsImmortal(self->tp_mro)); _Py_ClearImmortal(self->tp_mro); } } @@ -5966,7 +5966,7 @@ fini_static_type(PyInterpreterState *interp, PyTypeObject *type, int isbuiltin, int final) { assert(type->tp_flags & _Py_TPFLAGS_STATIC_BUILTIN); - assert(_Py_IsImmortalLoose((PyObject *)type)); + assert(_Py_IsImmortal((PyObject *)type)); type_dealloc_common(type); diff --git a/Python/bytecodes.c b/Python/bytecodes.c index 87cca3fc1d373c..34fdfcb05e3c18 100644 --- a/Python/bytecodes.c +++ b/Python/bytecodes.c @@ -381,7 +381,7 @@ dummy_func( EXIT_IF(!PyLong_CheckExact(value_o)); STAT_INC(TO_BOOL, hit); if (_PyLong_IsZero((PyLongObject *)value_o)) { - assert(_Py_IsImmortalLoose(value_o)); + assert(_Py_IsImmortal(value_o)); DEAD(value); res = PyStackRef_False; } @@ -412,7 +412,7 @@ dummy_func( EXIT_IF(!PyUnicode_CheckExact(value_o)); STAT_INC(TO_BOOL, hit); if (value_o == &_Py_STR(empty)) { - assert(_Py_IsImmortalLoose(value_o)); + assert(_Py_IsImmortal(value_o)); DEAD(value); res = PyStackRef_False; } diff --git a/Python/executor_cases.c.h b/Python/executor_cases.c.h index 57e15f33ca7703..ef110e2e2a794a 100644 --- a/Python/executor_cases.c.h +++ b/Python/executor_cases.c.h @@ -402,7 +402,7 @@ } STAT_INC(TO_BOOL, hit); if (_PyLong_IsZero((PyLongObject *)value_o)) { - assert(_Py_IsImmortalLoose(value_o)); + assert(_Py_IsImmortal(value_o)); res = PyStackRef_False; } else { @@ -455,7 +455,7 @@ } STAT_INC(TO_BOOL, hit); if (value_o == &_Py_STR(empty)) { - assert(_Py_IsImmortalLoose(value_o)); + assert(_Py_IsImmortal(value_o)); res = PyStackRef_False; } else { diff --git a/Python/generated_cases.c.h b/Python/generated_cases.c.h index 7656ce6bb7e313..7023aea369db49 100644 --- a/Python/generated_cases.c.h +++ b/Python/generated_cases.c.h @@ -7840,7 +7840,7 @@ DEOPT_IF(!PyLong_CheckExact(value_o), TO_BOOL); STAT_INC(TO_BOOL, hit); if (_PyLong_IsZero((PyLongObject *)value_o)) { - assert(_Py_IsImmortalLoose(value_o)); + assert(_Py_IsImmortal(value_o)); res = PyStackRef_False; } else { @@ -7902,7 +7902,7 @@ DEOPT_IF(!PyUnicode_CheckExact(value_o), TO_BOOL); STAT_INC(TO_BOOL, hit); if (value_o == &_Py_STR(empty)) { - assert(_Py_IsImmortalLoose(value_o)); + assert(_Py_IsImmortal(value_o)); res = PyStackRef_False; } else { diff --git a/Python/import.c b/Python/import.c index 460b1fe225c72e..acf849f14562b9 100644 --- a/Python/import.c +++ b/Python/import.c @@ -1051,7 +1051,7 @@ del_cached_def(struct extensions_cache_value *value) However, this decref would be problematic if the module def were dynamically allocated, it were the last ref, and this function were called with an interpreter other than the def's owner. */ - assert(value->def == NULL || _Py_IsImmortalLoose(value->def)); + assert(value->def == NULL || _Py_IsImmortal(value->def)); Py_XDECREF(value->def->m_base.m_copy); value->def->m_base.m_copy = NULL; diff --git a/Tools/cases_generator/analyzer.py b/Tools/cases_generator/analyzer.py index 9c2981a68ac909..60f5d010a7a083 100644 --- a/Tools/cases_generator/analyzer.py +++ b/Tools/cases_generator/analyzer.py @@ -614,7 +614,6 @@ def has_error_without_pop(op: parser.InstDef) -> bool: "_Py_EnterRecursiveCallTstateUnchecked", "_Py_ID", "_Py_IsImmortal", - "_Py_IsImmortalLoose", "_Py_LeaveRecursiveCallPy", "_Py_LeaveRecursiveCallTstate", "_Py_NewRef", From 3b87fb74c907510402678bf1b7c4a94df0e5e65a Mon Sep 17 00:00:00 2001 From: Justin Kunimune Date: Thu, 10 Oct 2024 13:56:05 -0400 Subject: [PATCH 22/32] Note argparse exit code in documentation (GH-119568) Co-authored-by: Savannah Ostrowski --- Doc/library/argparse.rst | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/Doc/library/argparse.rst b/Doc/library/argparse.rst index e9a08984f77c3a..4eb6fad41f11ef 100644 --- a/Doc/library/argparse.rst +++ b/Doc/library/argparse.rst @@ -541,7 +541,8 @@ exit_on_error ^^^^^^^^^^^^^ Normally, when you pass an invalid argument list to the :meth:`~ArgumentParser.parse_args` -method of an :class:`ArgumentParser`, it will exit with error info. +method of an :class:`ArgumentParser`, it will print a *message* to :data:`sys.stderr` and exit with a status +code of 2. If the user would like to catch errors manually, the feature can be enabled by setting ``exit_on_error`` to ``False``:: From bb594e801b6a84823badbb85b88f0fc8b221d7bf Mon Sep 17 00:00:00 2001 From: Victor Stinner Date: Thu, 10 Oct 2024 20:41:14 +0200 Subject: [PATCH 23/32] gh-125196: Use PyUnicodeWriter for repr(dict) (#125270) --- Objects/dictobject.c | 72 ++++++++++++++++++++++---------------------- 1 file changed, 36 insertions(+), 36 deletions(-) diff --git a/Objects/dictobject.c b/Objects/dictobject.c index 12722eca6be5e5..b27599d2815c82 100644 --- a/Objects/dictobject.c +++ b/Objects/dictobject.c @@ -3200,16 +3200,12 @@ static PyObject * dict_repr_lock_held(PyObject *self) { PyDictObject *mp = (PyDictObject *)self; - Py_ssize_t i; PyObject *key = NULL, *value = NULL; - _PyUnicodeWriter writer; - int first; - ASSERT_DICT_LOCKED(mp); - i = Py_ReprEnter((PyObject *)mp); - if (i != 0) { - return i > 0 ? PyUnicode_FromString("{...}") : NULL; + int res = Py_ReprEnter((PyObject *)mp); + if (res != 0) { + return (res > 0 ? PyUnicode_FromString("{...}") : NULL); } if (mp->ma_used == 0) { @@ -3217,66 +3213,70 @@ dict_repr_lock_held(PyObject *self) return PyUnicode_FromString("{}"); } - _PyUnicodeWriter_Init(&writer); - writer.overallocate = 1; - /* "{" + "1: 2" + ", 3: 4" * (len - 1) + "}" */ - writer.min_length = 1 + 4 + (2 + 4) * (mp->ma_used - 1) + 1; + // "{" + "1: 2" + ", 3: 4" * (len - 1) + "}" + Py_ssize_t prealloc = 1 + 4 + 6 * (mp->ma_used - 1) + 1; + PyUnicodeWriter *writer = PyUnicodeWriter_Create(prealloc); + if (writer == NULL) { + goto error; + } - if (_PyUnicodeWriter_WriteChar(&writer, '{') < 0) + if (PyUnicodeWriter_WriteChar(writer, '{') < 0) { goto error; + } /* Do repr() on each key+value pair, and insert ": " between them. Note that repr may mutate the dict. */ - i = 0; - first = 1; + Py_ssize_t i = 0; + int first = 1; while (_PyDict_Next((PyObject *)mp, &i, &key, &value, NULL)) { - PyObject *s; - int res; - - /* Prevent repr from deleting key or value during key format. */ + // Prevent repr from deleting key or value during key format. Py_INCREF(key); Py_INCREF(value); if (!first) { - if (_PyUnicodeWriter_WriteASCIIString(&writer, ", ", 2) < 0) + // Write ", " + if (PyUnicodeWriter_WriteChar(writer, ',') < 0) { + goto error; + } + if (PyUnicodeWriter_WriteChar(writer, ' ') < 0) { goto error; + } } first = 0; - s = PyObject_Repr(key); - if (s == NULL) - goto error; - res = _PyUnicodeWriter_WriteStr(&writer, s); - Py_DECREF(s); - if (res < 0) + // Write repr(key) + if (PyUnicodeWriter_WriteRepr(writer, key) < 0) { goto error; + } - if (_PyUnicodeWriter_WriteASCIIString(&writer, ": ", 2) < 0) + // Write ": " + if (PyUnicodeWriter_WriteChar(writer, ':') < 0) { goto error; - - s = PyObject_Repr(value); - if (s == NULL) + } + if (PyUnicodeWriter_WriteChar(writer, ' ') < 0) { goto error; - res = _PyUnicodeWriter_WriteStr(&writer, s); - Py_DECREF(s); - if (res < 0) + } + + // Write repr(value) + if (PyUnicodeWriter_WriteRepr(writer, value) < 0) { goto error; + } Py_CLEAR(key); Py_CLEAR(value); } - writer.overallocate = 0; - if (_PyUnicodeWriter_WriteChar(&writer, '}') < 0) + if (PyUnicodeWriter_WriteChar(writer, '}') < 0) { goto error; + } Py_ReprLeave((PyObject *)mp); - return _PyUnicodeWriter_Finish(&writer); + return PyUnicodeWriter_Finish(writer); error: Py_ReprLeave((PyObject *)mp); - _PyUnicodeWriter_Dealloc(&writer); + PyUnicodeWriter_Discard(writer); Py_XDECREF(key); Py_XDECREF(value); return NULL; From 427dcf24de4e06d239745d74d08c4b2e541dca5a Mon Sep 17 00:00:00 2001 From: Sam Gross Date: Thu, 10 Oct 2024 16:21:29 -0400 Subject: [PATCH 24/32] gh-125268: Use static string for "1e309" in AST (#125272) When formatting the AST as a string, infinite values are replaced by 1e309, which evaluates to infinity. The initialization of this string replacement was not thread-safe in the free threading build. --- Include/internal/pycore_global_objects.h | 3 -- .../pycore_global_objects_fini_generated.h | 1 + Include/internal/pycore_global_strings.h | 1 + .../internal/pycore_runtime_init_generated.h | 1 + .../internal/pycore_unicodeobject_generated.h | 4 +++ Parser/asdl_c.py | 2 -- Python/Python-ast.c | 2 -- Python/ast_unparse.c | 29 +++---------------- 8 files changed, 11 insertions(+), 32 deletions(-) diff --git a/Include/internal/pycore_global_objects.h b/Include/internal/pycore_global_objects.h index 913dce6f1ec0fe..e3f7ac707f0c37 100644 --- a/Include/internal/pycore_global_objects.h +++ b/Include/internal/pycore_global_objects.h @@ -66,9 +66,6 @@ struct _Py_static_objects { struct _Py_interp_cached_objects { PyObject *interned_strings; - /* AST */ - PyObject *str_replace_inf; - /* object.__reduce__ */ PyObject *objreduce; PyObject *type_slots_pname; diff --git a/Include/internal/pycore_global_objects_fini_generated.h b/Include/internal/pycore_global_objects_fini_generated.h index de68ef93257234..2fd7d5d13a98b2 100644 --- a/Include/internal/pycore_global_objects_fini_generated.h +++ b/Include/internal/pycore_global_objects_fini_generated.h @@ -562,6 +562,7 @@ _PyStaticObjects_CheckRefcnt(PyInterpreterState *interp) { _PyStaticObject_CheckRefcnt((PyObject *)&_Py_STR(json_decoder)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_STR(kwdefaults)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_STR(list_err)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_STR(str_replace_inf)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_STR(type_params)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_STR(utf_8)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(CANCELLED)); diff --git a/Include/internal/pycore_global_strings.h b/Include/internal/pycore_global_strings.h index 1591cb0a3f114f..fc3871570cc49d 100644 --- a/Include/internal/pycore_global_strings.h +++ b/Include/internal/pycore_global_strings.h @@ -48,6 +48,7 @@ struct _Py_global_strings { STRUCT_FOR_STR(json_decoder, "json.decoder") STRUCT_FOR_STR(kwdefaults, ".kwdefaults") STRUCT_FOR_STR(list_err, "list index out of range") + STRUCT_FOR_STR(str_replace_inf, "1e309") STRUCT_FOR_STR(type_params, ".type_params") STRUCT_FOR_STR(utf_8, "utf-8") } literals; diff --git a/Include/internal/pycore_runtime_init_generated.h b/Include/internal/pycore_runtime_init_generated.h index c9d20d0b5aacdb..3b80e265b0ca50 100644 --- a/Include/internal/pycore_runtime_init_generated.h +++ b/Include/internal/pycore_runtime_init_generated.h @@ -557,6 +557,7 @@ extern "C" { INIT_STR(json_decoder, "json.decoder"), \ INIT_STR(kwdefaults, ".kwdefaults"), \ INIT_STR(list_err, "list index out of range"), \ + INIT_STR(str_replace_inf, "1e309"), \ INIT_STR(type_params, ".type_params"), \ INIT_STR(utf_8, "utf-8"), \ } diff --git a/Include/internal/pycore_unicodeobject_generated.h b/Include/internal/pycore_unicodeobject_generated.h index d335373e88ee74..eb2eca06ec4d4f 100644 --- a/Include/internal/pycore_unicodeobject_generated.h +++ b/Include/internal/pycore_unicodeobject_generated.h @@ -2936,6 +2936,10 @@ _PyUnicode_InitStaticStrings(PyInterpreterState *interp) { _PyUnicode_InternStatic(interp, &string); assert(_PyUnicode_CheckConsistency(string, 1)); assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_STR(str_replace_inf); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); string = &_Py_STR(anon_null); _PyUnicode_InternStatic(interp, &string); assert(_PyUnicode_CheckConsistency(string, 1)); diff --git a/Parser/asdl_c.py b/Parser/asdl_c.py index f50c28afcfe205..32eac3afafa5d5 100755 --- a/Parser/asdl_c.py +++ b/Parser/asdl_c.py @@ -2242,8 +2242,6 @@ def generate_ast_fini(module_state, f): for s in module_state: f.write(" Py_CLEAR(state->" + s + ');\n') f.write(textwrap.dedent(""" - Py_CLEAR(_Py_INTERP_CACHED_OBJECT(interp, str_replace_inf)); - state->finalized = 1; state->once = (_PyOnceFlag){0}; } diff --git a/Python/Python-ast.c b/Python/Python-ast.c index 89c52b9dc73cac..38d74b48d232f8 100644 --- a/Python/Python-ast.c +++ b/Python/Python-ast.c @@ -281,8 +281,6 @@ void _PyAST_Fini(PyInterpreterState *interp) Py_CLEAR(state->vararg); Py_CLEAR(state->withitem_type); - Py_CLEAR(_Py_INTERP_CACHED_OBJECT(interp, str_replace_inf)); - state->finalized = 1; state->once = (_PyOnceFlag){0}; } diff --git a/Python/ast_unparse.c b/Python/ast_unparse.c index 86f7a582b981a3..8017cfc7fcf268 100644 --- a/Python/ast_unparse.c +++ b/Python/ast_unparse.c @@ -2,7 +2,6 @@ #include "pycore_ast.h" // expr_ty #include "pycore_pystate.h" // _PyInterpreterState_GET() #include "pycore_runtime.h" // _Py_ID() -#include // DBL_MAX_10_EXP #include /* This limited unparser is used to convert annotations back to strings @@ -13,10 +12,6 @@ _Py_DECLARE_STR(dbl_open_br, "{{"); _Py_DECLARE_STR(dbl_close_br, "}}"); -/* We would statically initialize this if doing so were simple enough. */ -#define _str_replace_inf(interp) \ - _Py_INTERP_CACHED_OBJECT(interp, str_replace_inf) - /* Forward declarations for recursion via helper functions. */ static PyObject * expr_as_unicode(expr_ty e, int level); @@ -78,13 +73,13 @@ append_repr(_PyUnicodeWriter *writer, PyObject *obj) } if ((PyFloat_CheckExact(obj) && isinf(PyFloat_AS_DOUBLE(obj))) || - PyComplex_CheckExact(obj)) + PyComplex_CheckExact(obj)) { - PyInterpreterState *interp = _PyInterpreterState_GET(); + _Py_DECLARE_STR(str_replace_inf, "1e309"); // evaluates to inf PyObject *new_repr = PyUnicode_Replace( repr, &_Py_ID(inf), - _str_replace_inf(interp), + &_Py_STR(str_replace_inf), -1 ); Py_DECREF(repr); @@ -918,20 +913,6 @@ append_ast_expr(_PyUnicodeWriter *writer, expr_ty e, int level) return -1; } -static int -maybe_init_static_strings(void) -{ - PyInterpreterState *interp = _PyInterpreterState_GET(); - if (_str_replace_inf(interp) == NULL) { - PyObject *tmp = PyUnicode_FromFormat("1e%d", 1 + DBL_MAX_10_EXP); - if (tmp == NULL) { - return -1; - } - _str_replace_inf(interp) = tmp; - } - return 0; -} - static PyObject * expr_as_unicode(expr_ty e, int level) { @@ -939,9 +920,7 @@ expr_as_unicode(expr_ty e, int level) _PyUnicodeWriter_Init(&writer); writer.min_length = 256; writer.overallocate = 1; - if (-1 == maybe_init_static_strings() || - -1 == append_ast_expr(&writer, e, level)) - { + if (-1 == append_ast_expr(&writer, e, level)) { _PyUnicodeWriter_Dealloc(&writer); return NULL; } From dd0ee201da34d1d4a631d77b420728f9233f53f9 Mon Sep 17 00:00:00 2001 From: Adam Turner <9087854+AA-Turner@users.noreply.github.com> Date: Thu, 10 Oct 2024 21:26:01 +0100 Subject: [PATCH 25/32] Doc: Upgrade Sphinx to 8.1 (#125276) --- Doc/conf.py | 31 ++++++++++++++++++++++++------- Doc/requirements.txt | 2 +- 2 files changed, 25 insertions(+), 8 deletions(-) diff --git a/Doc/conf.py b/Doc/conf.py index 287e0da46eb11c..d7197b17865854 100644 --- a/Doc/conf.py +++ b/Doc/conf.py @@ -11,6 +11,8 @@ import sys import time +import sphinx + sys.path.append(os.path.abspath('tools/extensions')) sys.path.append(os.path.abspath('includes')) @@ -62,7 +64,10 @@ # General substitutions. project = 'Python' -copyright = f"2001-{time.strftime('%Y')}, Python Software Foundation" +if sphinx.version_info[:2] >= (8, 1): + copyright = "2001-%Y, Python Software Foundation" +else: + copyright = f"2001-{time.strftime('%Y')}, Python Software Foundation" # We look for the Include/patchlevel.h file in the current Python source tree # and replace the values accordingly. @@ -361,10 +366,14 @@ } # This 'Last updated on:' timestamp is inserted at the bottom of every page. -html_time = int(os.environ.get('SOURCE_DATE_EPOCH', time.time())) -html_last_updated_fmt = time.strftime( - '%b %d, %Y (%H:%M UTC)', time.gmtime(html_time) -) +html_last_updated_fmt = '%b %d, %Y (%H:%M UTC)' +if sphinx.version_info[:2] >= (8, 1): + html_last_updated_use_utc = True +else: + html_time = int(os.environ.get('SOURCE_DATE_EPOCH', time.time())) + html_last_updated_fmt = time.strftime( + html_last_updated_fmt, time.gmtime(html_time) + ) # Path to find HTML templates. templates_path = ['tools/templates'] @@ -596,13 +605,21 @@ # mapping unique short aliases to a base URL and a prefix. # https://www.sphinx-doc.org/en/master/usage/extensions/extlinks.html extlinks = { - "cve": ("https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-%s", "CVE-%s"), - "cwe": ("https://cwe.mitre.org/data/definitions/%s.html", "CWE-%s"), "pypi": ("https://pypi.org/project/%s/", "%s"), "source": (SOURCE_URI, "%s"), } extlinks_detect_hardcoded_links = True +if sphinx.version_info[:2] < (8, 1): + # Sphinx 8.1 has in-built CVE and CWE roles. + extlinks |= { + "cve": ( + "https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-%s", + "CVE-%s", + ), + "cwe": ("https://cwe.mitre.org/data/definitions/%s.html", "CWE-%s"), + } + # Options for c_annotations # ------------------------- diff --git a/Doc/requirements.txt b/Doc/requirements.txt index bf1028020b7af7..5105786ccf283c 100644 --- a/Doc/requirements.txt +++ b/Doc/requirements.txt @@ -6,7 +6,7 @@ # Sphinx version is pinned so that new versions that introduce new warnings # won't suddenly cause build failures. Updating the version is fine as long # as no warnings are raised by doing so. -sphinx~=8.0.0 +sphinx~=8.1.0 blurb From a726ce73ca69b3a5ccc2cbe23061070e686b1150 Mon Sep 17 00:00:00 2001 From: Alex Waygood Date: Fri, 11 Oct 2024 00:53:45 +0100 Subject: [PATCH 26/32] Add some doctest cleanups for `turtle` and `configparser` (#125288) Co-authored-by: Adam Turner <9087854+AA-Turner@users.noreply.github.com> --- Doc/library/configparser.rst | 1 + Doc/library/turtle.rst | 5 +++++ 2 files changed, 6 insertions(+) diff --git a/Doc/library/configparser.rst b/Doc/library/configparser.rst index b5c18bbccffb78..3aad6f7b5d2d20 100644 --- a/Doc/library/configparser.rst +++ b/Doc/library/configparser.rst @@ -54,6 +54,7 @@ can be customized by end users easily. import os os.remove("example.ini") + os.remove("override.ini") Quick Start diff --git a/Doc/library/turtle.rst b/Doc/library/turtle.rst index da801d4dc1f5b3..efa4b6f8f1d3f9 100644 --- a/Doc/library/turtle.rst +++ b/Doc/library/turtle.rst @@ -14,6 +14,11 @@ from turtle import * turtle = Turtle() +.. testcleanup:: + + import os + os.remove("my_drawing.ps") + -------------- Introduction From 2f8301cbfbdd2976d254a4a772b4879069dd4298 Mon Sep 17 00:00:00 2001 From: Richard Hansen Date: Thu, 10 Oct 2024 22:39:17 -0400 Subject: [PATCH 27/32] gh-124872: Rename blurb file to reference the correct issue (#125285) --- ....7tinr0.rst => 2024-10-10-02-56-24.gh-issue-124872.0mDDOq.rst} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename Misc/NEWS.d/next/Documentation/{2024-09-29-18-14-52.gh-issue-119333.7tinr0.rst => 2024-10-10-02-56-24.gh-issue-124872.0mDDOq.rst} (100%) diff --git a/Misc/NEWS.d/next/Documentation/2024-09-29-18-14-52.gh-issue-119333.7tinr0.rst b/Misc/NEWS.d/next/Documentation/2024-10-10-02-56-24.gh-issue-124872.0mDDOq.rst similarity index 100% rename from Misc/NEWS.d/next/Documentation/2024-09-29-18-14-52.gh-issue-119333.7tinr0.rst rename to Misc/NEWS.d/next/Documentation/2024-10-10-02-56-24.gh-issue-124872.0mDDOq.rst From c1913effeed4e4da4d5310a40ab518945001ffba Mon Sep 17 00:00:00 2001 From: Savannah Ostrowski Date: Thu, 10 Oct 2024 23:30:27 -0700 Subject: [PATCH 28/32] gh-125296: Fix strange fragment identifier for `name or flags` in argparse docs (#125297) --- Doc/library/argparse.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Doc/library/argparse.rst b/Doc/library/argparse.rst index 4eb6fad41f11ef..d337de87ca8f39 100644 --- a/Doc/library/argparse.rst +++ b/Doc/library/argparse.rst @@ -602,7 +602,7 @@ The add_argument() method The following sections describe how each of these are used. -.. _name_or_flags: +.. _`name or flags`: name or flags ^^^^^^^^^^^^^ From b12e99261e656585ffbaa395af7c5dbaee5ad1ad Mon Sep 17 00:00:00 2001 From: Sam Gross Date: Fri, 11 Oct 2024 03:56:01 -0400 Subject: [PATCH 29/32] gh-125221: Fix free-threading data race in `object.__reduce_ex__` (#125267) --- ...-10-10-14-47-13.gh-issue-125221.nfSQzT.rst | 2 ++ Objects/object.c | 8 ++++++++ Objects/typeobject.c | 20 +++++-------------- 3 files changed, 15 insertions(+), 15 deletions(-) create mode 100644 Misc/NEWS.d/next/Core_and_Builtins/2024-10-10-14-47-13.gh-issue-125221.nfSQzT.rst diff --git a/Misc/NEWS.d/next/Core_and_Builtins/2024-10-10-14-47-13.gh-issue-125221.nfSQzT.rst b/Misc/NEWS.d/next/Core_and_Builtins/2024-10-10-14-47-13.gh-issue-125221.nfSQzT.rst new file mode 100644 index 00000000000000..c79650c3a64feb --- /dev/null +++ b/Misc/NEWS.d/next/Core_and_Builtins/2024-10-10-14-47-13.gh-issue-125221.nfSQzT.rst @@ -0,0 +1,2 @@ +Fix possible race condition when calling :meth:`~object.__reduce_ex__` for the +first time in the free threading build. diff --git a/Objects/object.c b/Objects/object.c index 27d06cc081259d..4a4c5bf7d7f08a 100644 --- a/Objects/object.c +++ b/Objects/object.c @@ -2372,6 +2372,14 @@ _PyTypes_InitTypes(PyInterpreterState *interp) } } + // Cache __reduce__ from PyBaseObject_Type object + PyObject *baseobj_dict = _PyType_GetDict(&PyBaseObject_Type); + PyObject *baseobj_reduce = PyDict_GetItemWithError(baseobj_dict, &_Py_ID(__reduce__)); + if (baseobj_reduce == NULL && PyErr_Occurred()) { + return _PyStatus_ERR("Can't get __reduce__ from base object"); + } + _Py_INTERP_CACHED_OBJECT(interp, objreduce) = baseobj_reduce; + // Must be after static types are initialized if (_Py_initialize_generic(interp) < 0) { return _PyStatus_ERR("Can't initialize generic types"); diff --git a/Objects/typeobject.c b/Objects/typeobject.c index d90bb5825fd437..6ca4406ec0ea2d 100644 --- a/Objects/typeobject.c +++ b/Objects/typeobject.c @@ -7359,18 +7359,7 @@ static PyObject * object___reduce_ex___impl(PyObject *self, int protocol) /*[clinic end generated code: output=2e157766f6b50094 input=f326b43fb8a4c5ff]*/ { -#define objreduce \ - (_Py_INTERP_CACHED_OBJECT(_PyInterpreterState_GET(), objreduce)) - PyObject *reduce, *res; - - if (objreduce == NULL) { - PyObject *dict = lookup_tp_dict(&PyBaseObject_Type); - objreduce = PyDict_GetItemWithError(dict, &_Py_ID(__reduce__)); - if (objreduce == NULL && PyErr_Occurred()) { - return NULL; - } - } - + PyObject *reduce; if (PyObject_GetOptionalAttr(self, &_Py_ID(__reduce__), &reduce) < 0) { return NULL; } @@ -7384,10 +7373,12 @@ object___reduce_ex___impl(PyObject *self, int protocol) Py_DECREF(reduce); return NULL; } - override = (clsreduce != objreduce); + + PyInterpreterState *interp = _PyInterpreterState_GET(); + override = (clsreduce != _Py_INTERP_CACHED_OBJECT(interp, objreduce)); Py_DECREF(clsreduce); if (override) { - res = _PyObject_CallNoArgs(reduce); + PyObject *res = _PyObject_CallNoArgs(reduce); Py_DECREF(reduce); return res; } @@ -7396,7 +7387,6 @@ object___reduce_ex___impl(PyObject *self, int protocol) } return _common_reduce(self, protocol); -#undef objreduce } static PyObject * From 0135848059162ad81478a7776fec622d68a36524 Mon Sep 17 00:00:00 2001 From: Jan Kaliszewski Date: Fri, 11 Oct 2024 10:15:46 +0200 Subject: [PATCH 30/32] gh-125058: update `_thread` docs regarding interruptibility of `lock.acquire()` (#125141) --- Doc/library/_thread.rst | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/Doc/library/_thread.rst b/Doc/library/_thread.rst index 5fd604c05380ac..6a66fc4c64bc45 100644 --- a/Doc/library/_thread.rst +++ b/Doc/library/_thread.rst @@ -219,9 +219,11 @@ In addition to these methods, lock objects can also be used via the * Calling :func:`sys.exit` or raising the :exc:`SystemExit` exception is equivalent to calling :func:`_thread.exit`. -* It is not possible to interrupt the :meth:`~threading.Lock.acquire` method on - a lock --- the :exc:`KeyboardInterrupt` exception will happen after the lock - has been acquired. +* It is platform-dependent whether the :meth:`~threading.Lock.acquire` method + on a lock can be interrupted (so that the :exc:`KeyboardInterrupt` exception + will happen immediately, rather than only after the lock has been acquired or + the operation has timed out). It can be interrupted on POSIX, but not on + Windows. * When the main thread exits, it is system defined whether the other threads survive. On most systems, they are killed without executing From 18c74497681e0107d7cde53e63ea42feb38f2176 Mon Sep 17 00:00:00 2001 From: Serhiy Storchaka Date: Fri, 11 Oct 2024 11:43:29 +0300 Subject: [PATCH 31/32] gh-61011: Fix inheritance of nested mutually exclusive groups in argparse (GH-125210) Previously, all nested mutually exclusive groups lost their connection to the group containing them and were displayed as belonging directly to the parser. Co-authored-by: Danica J. Sutherland --- Lib/argparse.py | 6 +++- Lib/test/test_argparse.py | 29 +++++++++++++++++++ Misc/ACKS | 1 + ...4-10-09-21-42-43.gh-issue-61011.pQXZb1.rst | 4 +++ 4 files changed, 39 insertions(+), 1 deletion(-) create mode 100644 Misc/NEWS.d/next/Library/2024-10-09-21-42-43.gh-issue-61011.pQXZb1.rst diff --git a/Lib/argparse.py b/Lib/argparse.py index d1f8fa2ace8611..2d8a7ef343a4ef 100644 --- a/Lib/argparse.py +++ b/Lib/argparse.py @@ -1521,7 +1521,11 @@ def _add_container_actions(self, container): # NOTE: if add_mutually_exclusive_group ever gains title= and # description= then this code will need to be expanded as above for group in container._mutually_exclusive_groups: - mutex_group = self.add_mutually_exclusive_group( + if group._container is container: + cont = self + else: + cont = title_group_map[group._container.title] + mutex_group = cont.add_mutually_exclusive_group( required=group.required) # map the actions to their new mutex group diff --git a/Lib/test/test_argparse.py b/Lib/test/test_argparse.py index c9e79eb18a08fb..1ebbc21bc1755b 100644 --- a/Lib/test/test_argparse.py +++ b/Lib/test/test_argparse.py @@ -2942,6 +2942,35 @@ def test_groups_parents(self): def test_wrong_type_parents(self): self.assertRaises(TypeError, ErrorRaisingArgumentParser, parents=[1]) + def test_mutex_groups_parents(self): + parent = ErrorRaisingArgumentParser(add_help=False) + g = parent.add_argument_group(title='g', description='gd') + g.add_argument('-w') + g.add_argument('-x') + m = g.add_mutually_exclusive_group() + m.add_argument('-y') + m.add_argument('-z') + parser = ErrorRaisingArgumentParser(prog='PROG', parents=[parent]) + + self.assertRaises(ArgumentParserError, parser.parse_args, + ['-y', 'Y', '-z', 'Z']) + + parser_help = parser.format_help() + self.assertEqual(parser_help, textwrap.dedent('''\ + usage: PROG [-h] [-w W] [-x X] [-y Y | -z Z] + + options: + -h, --help show this help message and exit + + g: + gd + + -w W + -x X + -y Y + -z Z + ''')) + # ============================== # Mutually exclusive group tests # ============================== diff --git a/Misc/ACKS b/Misc/ACKS index d94cbacf888468..a1769d9601a2ea 100644 --- a/Misc/ACKS +++ b/Misc/ACKS @@ -1814,6 +1814,7 @@ Reuben Sumner Eryk Sun Sanjay Sundaresan Marek Šuppa +Danica J. Sutherland Hisao Suzuki Kalle Svensson Andrew Svetlov diff --git a/Misc/NEWS.d/next/Library/2024-10-09-21-42-43.gh-issue-61011.pQXZb1.rst b/Misc/NEWS.d/next/Library/2024-10-09-21-42-43.gh-issue-61011.pQXZb1.rst new file mode 100644 index 00000000000000..20f9c0b9c78b12 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2024-10-09-21-42-43.gh-issue-61011.pQXZb1.rst @@ -0,0 +1,4 @@ +Fix inheritance of nested mutually exclusive groups from parent parser in +:class:`argparse.ArgumentParser`. Previously, all nested mutually exclusive +groups lost their connection to the group containing them and were displayed +as belonging directly to the parser. From b3aa1b5fe260382788a2df416599325ad680a5ee Mon Sep 17 00:00:00 2001 From: Y5 <124019959+y5c4l3@users.noreply.github.com> Date: Fri, 11 Oct 2024 09:08:03 +0000 Subject: [PATCH 32/32] gh-125235: Keep `_tkinter` TCL paths pointing to base installation on Windows (#125250) Signed-off-by: y5c4l3 --- .../next/Library/2024-10-10-18-33-31.gh-issue-125235.0kOB5I.rst | 2 ++ Modules/_tkinter.c | 2 +- 2 files changed, 3 insertions(+), 1 deletion(-) create mode 100644 Misc/NEWS.d/next/Library/2024-10-10-18-33-31.gh-issue-125235.0kOB5I.rst diff --git a/Misc/NEWS.d/next/Library/2024-10-10-18-33-31.gh-issue-125235.0kOB5I.rst b/Misc/NEWS.d/next/Library/2024-10-10-18-33-31.gh-issue-125235.0kOB5I.rst new file mode 100644 index 00000000000000..f64d15917da1fc --- /dev/null +++ b/Misc/NEWS.d/next/Library/2024-10-10-18-33-31.gh-issue-125235.0kOB5I.rst @@ -0,0 +1,2 @@ +Keep :mod:`tkinter` TCL paths in venv pointing to base installation on +Windows. diff --git a/Modules/_tkinter.c b/Modules/_tkinter.c index 4f05cab375ed6b..b0b70ccb8cc3d3 100644 --- a/Modules/_tkinter.c +++ b/Modules/_tkinter.c @@ -143,7 +143,7 @@ _get_tcl_lib_path(void) struct stat stat_buf; int stat_return_value; - PyObject *prefix = PySys_GetObject("prefix"); // borrowed reference + PyObject *prefix = PySys_GetObject("base_prefix"); // borrowed reference if (prefix == NULL) { return NULL; }