From c5c21fee7ae1ea689a351caa454c98e716a6e537 Mon Sep 17 00:00:00 2001 From: Alex Waygood Date: Mon, 21 Oct 2024 07:53:21 +0100 Subject: [PATCH 01/36] gh-125519: Improve traceback if `importlib.reload()` is called with a non-module object (#125520) --- Lib/importlib/__init__.py | 2 +- Lib/test/test_importlib/test_api.py | 15 +++++++++++++++ ...2024-10-15-14-01-03.gh-issue-125519.TqGh6a.rst | 2 ++ 3 files changed, 18 insertions(+), 1 deletion(-) create mode 100644 Misc/NEWS.d/next/Library/2024-10-15-14-01-03.gh-issue-125519.TqGh6a.rst diff --git a/Lib/importlib/__init__.py b/Lib/importlib/__init__.py index f38fe5c1ab461a..a7d57561ead046 100644 --- a/Lib/importlib/__init__.py +++ b/Lib/importlib/__init__.py @@ -103,7 +103,7 @@ def reload(module): try: name = module.__name__ except AttributeError: - raise TypeError("reload() argument must be a module") + raise TypeError("reload() argument must be a module") from None if sys.modules.get(name) is not module: raise ImportError(f"module {name} not in sys.modules", name=name) diff --git a/Lib/test/test_importlib/test_api.py b/Lib/test/test_importlib/test_api.py index 973237c0791a3e..51ea5270b1a928 100644 --- a/Lib/test/test_importlib/test_api.py +++ b/Lib/test/test_importlib/test_api.py @@ -9,6 +9,7 @@ from test import support from test.support import import_helper from test.support import os_helper +import traceback import types import unittest @@ -354,6 +355,20 @@ def test_module_missing_spec(self): with self.assertRaises(ModuleNotFoundError): self.init.reload(module) + def test_reload_traceback_with_non_str(self): + # gh-125519 + with support.captured_stdout() as stdout: + try: + self.init.reload("typing") + except TypeError as exc: + traceback.print_exception(exc, file=stdout) + else: + self.fail("Expected TypeError to be raised") + printed_traceback = stdout.getvalue() + self.assertIn("TypeError", printed_traceback) + self.assertNotIn("AttributeError", printed_traceback) + self.assertNotIn("module.__spec__.name", printed_traceback) + (Frozen_ReloadTests, Source_ReloadTests diff --git a/Misc/NEWS.d/next/Library/2024-10-15-14-01-03.gh-issue-125519.TqGh6a.rst b/Misc/NEWS.d/next/Library/2024-10-15-14-01-03.gh-issue-125519.TqGh6a.rst new file mode 100644 index 00000000000000..e6062625104590 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2024-10-15-14-01-03.gh-issue-125519.TqGh6a.rst @@ -0,0 +1,2 @@ +Improve traceback if :func:`importlib.reload` is called with an object that +is not a module. Patch by Alex Waygood. From ded105a62b9d78717f8dc64652e3903190b585dd Mon Sep 17 00:00:00 2001 From: ember91 <31469580+ember91@users.noreply.github.com> Date: Mon, 21 Oct 2024 10:44:18 +0200 Subject: [PATCH 02/36] Doc: Fix typos (#125728) --- Doc/c-api/init.rst | 2 +- Doc/c-api/long.rst | 2 +- Doc/c-api/monitoring.rst | 2 +- Doc/library/concurrent.futures.rst | 2 +- Doc/library/importlib.metadata.rst | 2 +- Doc/using/windows.rst | 4 ++-- 6 files changed, 7 insertions(+), 7 deletions(-) diff --git a/Doc/c-api/init.rst b/Doc/c-api/init.rst index ffc5b4223ba589..412a41ce02cfd7 100644 --- a/Doc/c-api/init.rst +++ b/Doc/c-api/init.rst @@ -2418,7 +2418,7 @@ Example usage:: In the above example, :c:macro:`Py_SETREF` calls :c:macro:`Py_DECREF`, which can call arbitrary code through an object's deallocation function. The critical -section API avoids potentital deadlocks due to reentrancy and lock ordering +section API avoids potential deadlocks due to reentrancy and lock ordering by allowing the runtime to temporarily suspend the critical section if the code triggered by the finalizer blocks and calls :c:func:`PyEval_SaveThread`. diff --git a/Doc/c-api/long.rst b/Doc/c-api/long.rst index 6d3463fe25a614..9ff3e5265004a1 100644 --- a/Doc/c-api/long.rst +++ b/Doc/c-api/long.rst @@ -511,7 +511,7 @@ distinguished from a number. Use :c:func:`PyErr_Occurred` to disambiguate. free(bignum); *flags* is either ``-1`` (``Py_ASNATIVEBYTES_DEFAULTS``) to select defaults - that behave most like a C cast, or a combintation of the other flags in + that behave most like a C cast, or a combination of the other flags in the table below. Note that ``-1`` cannot be combined with other flags. diff --git a/Doc/c-api/monitoring.rst b/Doc/c-api/monitoring.rst index 285ddb2889a67f..51d866cfd47469 100644 --- a/Doc/c-api/monitoring.rst +++ b/Doc/c-api/monitoring.rst @@ -147,7 +147,7 @@ would typically correspond to a python function. The ``version`` argument is a pointer to a value which should be allocated by the user together with ``state_array`` and initialized to 0, - and then set only by :c:func:`!PyMonitoring_EnterScope` itelf. It allows this + and then set only by :c:func:`!PyMonitoring_EnterScope` itself. It allows this function to determine whether event states have changed since the previous call, and to return quickly if they have not. diff --git a/Doc/library/concurrent.futures.rst b/Doc/library/concurrent.futures.rst index 45a73705f10e92..48e027152a9851 100644 --- a/Doc/library/concurrent.futures.rst +++ b/Doc/library/concurrent.futures.rst @@ -208,7 +208,7 @@ ThreadPoolExecutor Example 'http://www.cnn.com/', 'http://europe.wsj.com/', 'http://www.bbc.co.uk/', - 'http://nonexistant-subdomain.python.org/'] + 'http://nonexistent-subdomain.python.org/'] # Retrieve a single page and report the URL and contents def load_url(url, timeout): diff --git a/Doc/library/importlib.metadata.rst b/Doc/library/importlib.metadata.rst index 85d5a2d684d6eb..ddfc5c06d825c0 100644 --- a/Doc/library/importlib.metadata.rst +++ b/Doc/library/importlib.metadata.rst @@ -559,7 +559,7 @@ path. ``DatabaseDistribution``, then, would look something like:: - class DatabaseDistribution(importlib.metadata.Distributon): + class DatabaseDistribution(importlib.metadata.Distribution): def __init__(self, record): self.record = record diff --git a/Doc/using/windows.rst b/Doc/using/windows.rst index 20d872d7639219..daaf8822af1161 100644 --- a/Doc/using/windows.rst +++ b/Doc/using/windows.rst @@ -838,8 +838,8 @@ The short form of the argument (``-3``) only ever selects from core Python releases, and not other distributions. However, the longer form (``-V:3``) will select from any. -The Company is matched on the full string, case-insenitive. The Tag is matched -oneither the full string, or a prefix, provided the next character is a dot or a +The Company is matched on the full string, case-insensitive. The Tag is matched +on either the full string, or a prefix, provided the next character is a dot or a hyphen. This allows ``-V:3.1`` to match ``3.1-32``, but not ``3.10``. Tags are sorted using numerical ordering (``3.10`` is newer than ``3.1``), but are compared using text (``-V:3.01`` does not match ``3.1``). From 7d88140d5299bd086434840db66ede8ccd01a688 Mon Sep 17 00:00:00 2001 From: Y5 <124019959+y5c4l3@users.noreply.github.com> Date: Mon, 21 Oct 2024 17:35:54 +0800 Subject: [PATCH 03/36] gh-125313: Prefer `sys.base_*` paths in `Py_Get(Exec)Prefix` deprecation notes (#125317) Prefer `sys.base_*` paths in `Py_Get(Exec)Prefix` deprecation notes. Signed-off-by: y5c4l3 --- Doc/c-api/init.rst | 11 +++++++---- Doc/deprecations/c-api-pending-removal-in-3.15.rst | 4 ++-- 2 files changed, 9 insertions(+), 6 deletions(-) diff --git a/Doc/c-api/init.rst b/Doc/c-api/init.rst index 412a41ce02cfd7..6e881590131cab 100644 --- a/Doc/c-api/init.rst +++ b/Doc/c-api/init.rst @@ -625,7 +625,7 @@ Process-wide parameters returned string points into static storage; the caller should not modify its value. This corresponds to the :makevar:`prefix` variable in the top-level :file:`Makefile` and the :option:`--prefix` argument to the :program:`configure` - script at build time. The value is available to Python code as ``sys.prefix``. + script at build time. The value is available to Python code as ``sys.base_prefix``. It is only useful on Unix. See also the next function. This function should not be called before :c:func:`Py_Initialize`, otherwise @@ -635,7 +635,8 @@ Process-wide parameters It now returns ``NULL`` if called before :c:func:`Py_Initialize`. .. deprecated-removed:: 3.13 3.15 - Get :data:`sys.prefix` instead. + Get :data:`sys.base_prefix` instead, or :data:`sys.prefix` if + :ref:`virtual environments ` need to be handled. .. c:function:: wchar_t* Py_GetExecPrefix() @@ -648,7 +649,8 @@ Process-wide parameters should not modify its value. This corresponds to the :makevar:`exec_prefix` variable in the top-level :file:`Makefile` and the ``--exec-prefix`` argument to the :program:`configure` script at build time. The value is - available to Python code as ``sys.exec_prefix``. It is only useful on Unix. + available to Python code as ``sys.base_exec_prefix``. It is only useful on + Unix. Background: The exec-prefix differs from the prefix when platform dependent files (such as executables and shared libraries) are installed in a different @@ -679,7 +681,8 @@ Process-wide parameters It now returns ``NULL`` if called before :c:func:`Py_Initialize`. .. deprecated-removed:: 3.13 3.15 - Get :data:`sys.exec_prefix` instead. + Get :data:`sys.base_exec_prefix` instead, or :data:`sys.exec_prefix` if + :ref:`virtual environments ` need to be handled. .. c:function:: wchar_t* Py_GetProgramFullPath() diff --git a/Doc/deprecations/c-api-pending-removal-in-3.15.rst b/Doc/deprecations/c-api-pending-removal-in-3.15.rst index 1bb49e5b4874f2..0ce0f9c118c094 100644 --- a/Doc/deprecations/c-api-pending-removal-in-3.15.rst +++ b/Doc/deprecations/c-api-pending-removal-in-3.15.rst @@ -13,11 +13,11 @@ Pending removal in Python 3.15 * :c:func:`PySys_ResetWarnOptions`: Clear :data:`sys.warnoptions` and :data:`!warnings.filters` instead. * :c:func:`Py_GetExecPrefix`: - Get :data:`sys.exec_prefix` instead. + Get :data:`sys.base_exec_prefix` and :data:`sys.exec_prefix` instead. * :c:func:`Py_GetPath`: Get :data:`sys.path` instead. * :c:func:`Py_GetPrefix`: - Get :data:`sys.prefix` instead. + Get :data:`sys.base_prefix` and :data:`sys.prefix` instead. * :c:func:`Py_GetProgramFullPath`: Get :data:`sys.executable` instead. * :c:func:`Py_GetProgramName`: From f36d37bbafcee711c765a8cda9ac3ca00b8258c8 Mon Sep 17 00:00:00 2001 From: Adam Turner <9087854+AA-Turner@users.noreply.github.com> Date: Mon, 21 Oct 2024 11:54:54 +0100 Subject: [PATCH 04/36] gh-125741: Update `build.yml` for the new check_autoconf_regen job (#125772) --- .github/workflows/build.yml | 2 ++ .github/workflows/posix-deps-apt.sh | 2 -- Tools/build/regen-configure.sh | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index a72d4a1bb97cb9..88898895d15ad0 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -536,6 +536,7 @@ jobs: needs: - check_source # Transitive dependency, needed to access `run_tests` value - check-docs + - check_autoconf_regen - check_generated_files - build_macos - build_ubuntu @@ -571,6 +572,7 @@ jobs: ${{ needs.check_source.outputs.run_tests != 'true' && ' + check_autoconf_regen, check_generated_files, build_macos, build_ubuntu, diff --git a/.github/workflows/posix-deps-apt.sh b/.github/workflows/posix-deps-apt.sh index fb485bd4f82bd2..bfc5a0874281bd 100755 --- a/.github/workflows/posix-deps-apt.sh +++ b/.github/workflows/posix-deps-apt.sh @@ -1,11 +1,9 @@ #!/bin/sh apt-get update -# autoconf-archive is needed by autoreconf (check_generated_files job) apt-get -yq install \ build-essential \ pkg-config \ - autoconf-archive \ ccache \ gdb \ lcov \ diff --git a/Tools/build/regen-configure.sh b/Tools/build/regen-configure.sh index 1a24b07c3ff707..ee7c03e86999c1 100755 --- a/Tools/build/regen-configure.sh +++ b/Tools/build/regen-configure.sh @@ -2,7 +2,7 @@ set -e -x -# The check_generated_files job of .github/workflows/build.yml must kept in +# The check_autoconf_regen job of .github/workflows/build.yml must kept in # sync with this script. Use the same container image than the job so the job # doesn't need to run autoreconf in a container. IMAGE="ghcr.io/python/autoconf:2024.10.11.11293396815" From 0cd21406bf84b3b4927a8117024232774823aee0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Lum=C3=ADr=20=27Frenzy=27=20Balhar?= Date: Mon, 21 Oct 2024 13:14:02 +0200 Subject: [PATCH 05/36] gh-119311: Add missing magic number (3571) for 3.13.0b1 (#125771) Add missing magic number 3571 for 3.13b1 It was added after branching in https://github.com/python/cpython/commit/6394a72e99b342d980297ec437ecafea92a044c4#diff-efefe383b3a81d16150c280db0b64eed7569254299418f64cc0d749f8e16f3a4R475 --- Include/internal/pycore_magic_number.h | 1 + 1 file changed, 1 insertion(+) diff --git a/Include/internal/pycore_magic_number.h b/Include/internal/pycore_magic_number.h index a88ff2deeba941..4aa89f3cac8063 100644 --- a/Include/internal/pycore_magic_number.h +++ b/Include/internal/pycore_magic_number.h @@ -251,6 +251,7 @@ Known values: Python 3.13a1 3568 (Change semantics of END_FOR) Python 3.13a5 3569 (Specialize CONTAINS_OP) Python 3.13a6 3570 (Add __firstlineno__ class attribute) + Python 3.13b1 3571 (Fix miscompilation of private names in generic classes) Python 3.14a1 3600 (Add LOAD_COMMON_CONSTANT) Python 3.14a1 3601 (Fix miscompilation of private names in generic classes) Python 3.14a1 3602 (Add LOAD_SPECIAL. Remove BEFORE_WITH and BEFORE_ASYNC_WITH) From 5989eb74463c26780632f17f221d6bf4c9372a01 Mon Sep 17 00:00:00 2001 From: Sam Gross Date: Mon, 21 Oct 2024 08:23:38 -0400 Subject: [PATCH 06/36] gh-125608: Trigger dictionary watchers when inline values change (#125611) Dictionary watchers on an object's attributes dictionary (`object.__dict__`) were not triggered when the managed dictionary used the object's inline values. --- Lib/test/test_capi/test_watchers.py | 17 +++++++++++++++ ...-10-16-19-28-23.gh-issue-125608.gTsU2g.rst | 3 +++ Objects/dictobject.c | 21 +++++++++++++------ 3 files changed, 35 insertions(+), 6 deletions(-) create mode 100644 Misc/NEWS.d/next/C_API/2024-10-16-19-28-23.gh-issue-125608.gTsU2g.rst diff --git a/Lib/test/test_capi/test_watchers.py b/Lib/test/test_capi/test_watchers.py index 4bb764bf9d0963..e578a622a03487 100644 --- a/Lib/test/test_capi/test_watchers.py +++ b/Lib/test/test_capi/test_watchers.py @@ -97,6 +97,23 @@ def test_dealloc(self): del d self.assert_events(["dealloc"]) + def test_object_dict(self): + class MyObj: pass + o = MyObj() + + with self.watcher() as wid: + self.watch(wid, o.__dict__) + o.foo = "bar" + o.foo = "baz" + del o.foo + self.assert_events(["new:foo:bar", "mod:foo:baz", "del:foo"]) + + with self.watcher() as wid: + self.watch(wid, o.__dict__) + for _ in range(100): + o.foo = "bar" + self.assert_events(["new:foo:bar"] + ["mod:foo:bar"] * 99) + def test_unwatch(self): d = {} with self.watcher() as wid: diff --git a/Misc/NEWS.d/next/C_API/2024-10-16-19-28-23.gh-issue-125608.gTsU2g.rst b/Misc/NEWS.d/next/C_API/2024-10-16-19-28-23.gh-issue-125608.gTsU2g.rst new file mode 100644 index 00000000000000..e70f9f173957a2 --- /dev/null +++ b/Misc/NEWS.d/next/C_API/2024-10-16-19-28-23.gh-issue-125608.gTsU2g.rst @@ -0,0 +1,3 @@ +Fix a bug where dictionary watchers (e.g., :c:func:`PyDict_Watch`) on an +object's attribute dictionary (:attr:`~object.__dict__`) were not triggered +when the object's attributes were modified. diff --git a/Objects/dictobject.c b/Objects/dictobject.c index b27599d2815c82..806096f5814062 100644 --- a/Objects/dictobject.c +++ b/Objects/dictobject.c @@ -6835,15 +6835,24 @@ store_instance_attr_lock_held(PyObject *obj, PyDictValues *values, } PyObject *old_value = values->values[ix]; + if (old_value == NULL && value == NULL) { + PyErr_Format(PyExc_AttributeError, + "'%.100s' object has no attribute '%U'", + Py_TYPE(obj)->tp_name, name); + return -1; + } + + if (dict) { + PyInterpreterState *interp = _PyInterpreterState_GET(); + PyDict_WatchEvent event = (old_value == NULL ? PyDict_EVENT_ADDED : + value == NULL ? PyDict_EVENT_DELETED : + PyDict_EVENT_MODIFIED); + _PyDict_NotifyEvent(interp, event, dict, name, value); + } + FT_ATOMIC_STORE_PTR_RELEASE(values->values[ix], Py_XNewRef(value)); if (old_value == NULL) { - if (value == NULL) { - PyErr_Format(PyExc_AttributeError, - "'%.100s' object has no attribute '%U'", - Py_TYPE(obj)->tp_name, name); - return -1; - } _PyDictValues_AddToInsertionOrder(values, ix); if (dict) { assert(dict->ma_values == values); From 3d1df3d84e5c75a52b6f1379cd7f2809fc50befa Mon Sep 17 00:00:00 2001 From: Pablo Galindo Salgado Date: Mon, 21 Oct 2024 15:39:05 +0100 Subject: [PATCH 07/36] gh-125703: Correctly honour tracemalloc hooks on more PyDECREF specialized paths (#125712) --- Python/ceval.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/Python/ceval.c b/Python/ceval.c index 55e5eba25eaa21..ca75646b585f07 100644 --- a/Python/ceval.c +++ b/Python/ceval.c @@ -99,6 +99,11 @@ } \ _Py_DECREF_STAT_INC(); \ if (--op->ob_refcnt == 0) { \ + struct _reftracer_runtime_state *tracer = &_PyRuntime.ref_tracer; \ + if (tracer->tracer_func != NULL) { \ + void* data = tracer->tracer_data; \ + tracer->tracer_func(op, PyRefTracer_DESTROY, data); \ + } \ destructor d = (destructor)(dealloc); \ d(op); \ } \ From 5b7a872b26a9ba6c93d7c2109559a82d1c1612de Mon Sep 17 00:00:00 2001 From: Tian Gao Date: Mon, 21 Oct 2024 08:43:08 -0700 Subject: [PATCH 08/36] gh-125590: Allow FrameLocalsProxy to delete and pop keys from extra locals (#125616) --- Lib/test/test_frame.py | 30 +++++++- ...-10-16-20-32-40.gh-issue-125590.stHzOP.rst | 1 + Objects/frameobject.c | 76 +++++++++++++++++-- 3 files changed, 99 insertions(+), 8 deletions(-) create mode 100644 Misc/NEWS.d/next/Library/2024-10-16-20-32-40.gh-issue-125590.stHzOP.rst diff --git a/Lib/test/test_frame.py b/Lib/test/test_frame.py index 32de8ed9a13f80..11f191700ccef0 100644 --- a/Lib/test/test_frame.py +++ b/Lib/test/test_frame.py @@ -397,15 +397,41 @@ def test_repr(self): def test_delete(self): x = 1 d = sys._getframe().f_locals - with self.assertRaises(TypeError): + + # This needs to be tested before f_extra_locals is created + with self.assertRaisesRegex(KeyError, 'non_exist'): + del d['non_exist'] + + with self.assertRaises(KeyError): + d.pop('non_exist') + + with self.assertRaisesRegex(ValueError, 'local variables'): del d['x'] with self.assertRaises(AttributeError): d.clear() - with self.assertRaises(AttributeError): + with self.assertRaises(ValueError): d.pop('x') + with self.assertRaises(ValueError): + d.pop('x', None) + + # 'm', 'n' is stored in f_extra_locals + d['m'] = 1 + d['n'] = 1 + + with self.assertRaises(KeyError): + d.pop('non_exist') + + del d['m'] + self.assertEqual(d.pop('n'), 1) + + self.assertNotIn('m', d) + self.assertNotIn('n', d) + + self.assertEqual(d.pop('n', 2), 2) + @support.cpython_only def test_sizeof(self): proxy = sys._getframe().f_locals diff --git a/Misc/NEWS.d/next/Library/2024-10-16-20-32-40.gh-issue-125590.stHzOP.rst b/Misc/NEWS.d/next/Library/2024-10-16-20-32-40.gh-issue-125590.stHzOP.rst new file mode 100644 index 00000000000000..dc6765ada641a9 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2024-10-16-20-32-40.gh-issue-125590.stHzOP.rst @@ -0,0 +1 @@ +Allow ``FrameLocalsProxy`` to delete and pop if the key is not a fast variable. diff --git a/Objects/frameobject.c b/Objects/frameobject.c index f3a66ffc9aac8f..5ef48919a081be 100644 --- a/Objects/frameobject.c +++ b/Objects/frameobject.c @@ -5,6 +5,7 @@ #include "pycore_code.h" // CO_FAST_LOCAL, etc. #include "pycore_function.h" // _PyFunction_FromConstructor() #include "pycore_moduleobject.h" // _PyModule_GetDict() +#include "pycore_modsupport.h" // _PyArg_CheckPositional() #include "pycore_object.h" // _PyObject_GC_UNTRACK() #include "pycore_opcode_metadata.h" // _PyOpcode_Deopt, _PyOpcode_Caches @@ -158,16 +159,16 @@ framelocalsproxy_setitem(PyObject *self, PyObject *key, PyObject *value) _PyStackRef *fast = _PyFrame_GetLocalsArray(frame->f_frame); PyCodeObject *co = _PyFrame_GetCode(frame->f_frame); - if (value == NULL) { - PyErr_SetString(PyExc_TypeError, "cannot remove variables from FrameLocalsProxy"); - return -1; - } - int i = framelocalsproxy_getkeyindex(frame, key, false); if (i == -2) { return -1; } if (i >= 0) { + if (value == NULL) { + PyErr_SetString(PyExc_ValueError, "cannot remove local variables from FrameLocalsProxy"); + return -1; + } + _Py_Executors_InvalidateDependency(PyInterpreterState_Get(), co, 1); _PyLocals_Kind kind = _PyLocals_GetKind(co->co_localspluskinds, i); @@ -202,6 +203,10 @@ framelocalsproxy_setitem(PyObject *self, PyObject *key, PyObject *value) PyObject *extra = frame->f_extra_locals; if (extra == NULL) { + if (value == NULL) { + _PyErr_SetKeyError(key); + return -1; + } extra = PyDict_New(); if (extra == NULL) { return -1; @@ -211,7 +216,11 @@ framelocalsproxy_setitem(PyObject *self, PyObject *key, PyObject *value) assert(PyDict_Check(extra)); - return PyDict_SetItem(extra, key, value); + if (value == NULL) { + return PyDict_DelItem(extra, key); + } else { + return PyDict_SetItem(extra, key, value); + } } static int @@ -676,6 +685,59 @@ framelocalsproxy_setdefault(PyObject* self, PyObject *const *args, Py_ssize_t na return result; } +static PyObject* +framelocalsproxy_pop(PyObject* self, PyObject *const *args, Py_ssize_t nargs) +{ + if (!_PyArg_CheckPositional("pop", nargs, 1, 2)) { + return NULL; + } + + PyObject *key = args[0]; + PyObject *default_value = NULL; + + if (nargs == 2) { + default_value = args[1]; + } + + PyFrameObject *frame = ((PyFrameLocalsProxyObject*)self)->frame; + + int i = framelocalsproxy_getkeyindex(frame, key, false); + if (i == -2) { + return NULL; + } + + if (i >= 0) { + PyErr_SetString(PyExc_ValueError, "cannot remove local variables from FrameLocalsProxy"); + return NULL; + } + + PyObject *result = NULL; + + if (frame->f_extra_locals == NULL) { + if (default_value != NULL) { + return Py_XNewRef(default_value); + } else { + _PyErr_SetKeyError(key); + return NULL; + } + } + + if (PyDict_Pop(frame->f_extra_locals, key, &result) < 0) { + return NULL; + } + + if (result == NULL) { + if (default_value != NULL) { + return Py_XNewRef(default_value); + } else { + _PyErr_SetKeyError(key); + return NULL; + } + } + + return result; +} + static PyObject* framelocalsproxy_copy(PyObject *self, PyObject *Py_UNUSED(ignored)) { @@ -743,6 +805,8 @@ static PyMethodDef framelocalsproxy_methods[] = { NULL}, {"get", _PyCFunction_CAST(framelocalsproxy_get), METH_FASTCALL, NULL}, + {"pop", _PyCFunction_CAST(framelocalsproxy_pop), METH_FASTCALL, + NULL}, {"setdefault", _PyCFunction_CAST(framelocalsproxy_setdefault), METH_FASTCALL, NULL}, {NULL, NULL} /* sentinel */ From d67bf2d89ab57f94608d7d2cf949dc4a8749485d Mon Sep 17 00:00:00 2001 From: partev Date: Mon, 21 Oct 2024 12:18:10 -0400 Subject: [PATCH 09/36] gh-125766: Docs: minor rewording of installation on Linux section (GH-125743) --- Doc/using/unix.rst | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/Doc/using/unix.rst b/Doc/using/unix.rst index 58838c28e6eb86..a2bcdab09a9282 100644 --- a/Doc/using/unix.rst +++ b/Doc/using/unix.rst @@ -17,12 +17,12 @@ On Linux Python comes preinstalled on most Linux distributions, and is available as a package on all others. However there are certain features you might want to use -that are not available on your distro's package. You can easily compile the +that are not available on your distro's package. You can compile the latest version of Python from source. -In the event that Python doesn't come preinstalled and isn't in the repositories as -well, you can easily make packages for your own distro. Have a look at the -following links: +In the event that the latest version of Python doesn't come preinstalled and isn't +in the repositories as well, you can make packages for your own distro. Have a +look at the following links: .. seealso:: From d880c83ff7fb2e464bc4f469d74cc3fc3eca082c Mon Sep 17 00:00:00 2001 From: Richard Hansen Date: Mon, 21 Oct 2024 12:46:37 -0400 Subject: [PATCH 10/36] Doc: C API: Move `tp_dealloc` paragraph to `tp_dealloc` section (#125737) It looks like commit 43cf44ddcce6b225f959ea2a53e4817244ca6054 (gh-31501) accidentally moved the paragraph to the `tp_finalize` section when the intent was to move it to the `tp_dealloc` section (according to the commit message). Also: * Convert the paragraph to a warning. * Apply the appropriate font style to `tp_dealloc`. * Unlinkify the first mention of `tp_dealloc` since the paragraph is already in the `tp_dealloc` section. --- Doc/c-api/typeobj.rst | 24 +++++++++++++----------- 1 file changed, 13 insertions(+), 11 deletions(-) diff --git a/Doc/c-api/typeobj.rst b/Doc/c-api/typeobj.rst index da1b5092fbf787..0c59b3da0795cb 100644 --- a/Doc/c-api/typeobj.rst +++ b/Doc/c-api/typeobj.rst @@ -682,6 +682,19 @@ and :c:data:`PyType_Type` effectively act as defaults.) Py_DECREF(tp); } + .. warning:: + + In a garbage collected Python, :c:member:`!tp_dealloc` may be called from + any Python thread, not just the thread which created the object (if the + object becomes part of a refcount cycle, that cycle might be collected by + a garbage collection on any thread). This is not a problem for Python + API calls, since the thread on which :c:member:`!tp_dealloc` is called + will own the Global Interpreter Lock (GIL). However, if the object being + destroyed in turn destroys objects from some other C or C++ library, care + should be taken to ensure that destroying those objects on the thread + which called :c:member:`!tp_dealloc` will not violate any assumptions of + the library. + **Inheritance:** @@ -2109,17 +2122,6 @@ and :c:data:`PyType_Type` effectively act as defaults.) PyErr_Restore(error_type, error_value, error_traceback); } - Also, note that, in a garbage collected Python, - :c:member:`~PyTypeObject.tp_dealloc` may be called from - any Python thread, not just the thread which created the object (if the object - becomes part of a refcount cycle, that cycle might be collected by a garbage - collection on any thread). This is not a problem for Python API calls, since - the thread on which tp_dealloc is called will own the Global Interpreter Lock - (GIL). However, if the object being destroyed in turn destroys objects from some - other C or C++ library, care should be taken to ensure that destroying those - objects on the thread which called tp_dealloc will not violate any assumptions - of the library. - **Inheritance:** This field is inherited by subtypes. From 9b0bfba2a265b8108610b037945c004d8e81f2b4 Mon Sep 17 00:00:00 2001 From: Sam Gross Date: Mon, 21 Oct 2024 12:51:29 -0400 Subject: [PATCH 11/36] gh-124218: Use per-thread reference counting for globals and builtins (#125713) Use per-thread refcounting for the reference from function objects to the globals and builtins dictionaries. --- Include/cpython/dictobject.h | 4 +++- Include/internal/pycore_dict.h | 34 ++++++++++++++++++++++++++ Include/internal/pycore_object.h | 14 +++++++++++ Include/internal/pycore_uniqueid.h | 3 +++ Objects/dictobject.c | 18 ++++++++++++++ Objects/funcobject.c | 38 +++++++++++++++++++++++++----- Objects/moduleobject.c | 3 ++- Python/uniqueid.c | 12 +++++++--- 8 files changed, 115 insertions(+), 11 deletions(-) diff --git a/Include/cpython/dictobject.h b/Include/cpython/dictobject.h index b113c7fdcf6515..78473e54898fa5 100644 --- a/Include/cpython/dictobject.h +++ b/Include/cpython/dictobject.h @@ -17,7 +17,9 @@ typedef struct { /* This is a private field for CPython's internal use. * Bits 0-7 are for dict watchers. * Bits 8-11 are for the watched mutation counter (used by tier2 optimization) - * The remaining bits are not currently used. */ + * Bits 12-31 are currently unused + * Bits 32-63 are a unique id in the free threading build (used for per-thread refcounting) + */ uint64_t _ma_watcher_tag; PyDictKeysObject *ma_keys; diff --git a/Include/internal/pycore_dict.h b/Include/internal/pycore_dict.h index 1920724c1d4f57..1d185559b3ef43 100644 --- a/Include/internal/pycore_dict.h +++ b/Include/internal/pycore_dict.h @@ -229,6 +229,8 @@ static inline PyDictUnicodeEntry* DK_UNICODE_ENTRIES(PyDictKeysObject *dk) { #define DICT_VERSION_INCREMENT (1 << (DICT_MAX_WATCHERS + DICT_WATCHED_MUTATION_BITS)) #define DICT_WATCHER_MASK ((1 << DICT_MAX_WATCHERS) - 1) #define DICT_WATCHER_AND_MODIFICATION_MASK ((1 << (DICT_MAX_WATCHERS + DICT_WATCHED_MUTATION_BITS)) - 1) +#define DICT_UNIQUE_ID_SHIFT (32) +#define DICT_UNIQUE_ID_MAX ((UINT64_C(1) << (64 - DICT_UNIQUE_ID_SHIFT)) - 1) PyAPI_FUNC(void) @@ -307,8 +309,40 @@ _PyInlineValuesSize(PyTypeObject *tp) int _PyDict_DetachFromObject(PyDictObject *dict, PyObject *obj); +// Enables per-thread ref counting on this dict in the free threading build +extern void _PyDict_EnablePerThreadRefcounting(PyObject *op); + PyDictObject *_PyObject_MaterializeManagedDict_LockHeld(PyObject *); +// See `_Py_INCREF_TYPE()` in pycore_object.h +#ifndef Py_GIL_DISABLED +# define _Py_INCREF_DICT Py_INCREF +# define _Py_DECREF_DICT Py_DECREF +#else +static inline Py_ssize_t +_PyDict_UniqueId(PyDictObject *mp) +{ + // Offset by one so that _ma_watcher_tag=0 represents an unassigned id + return (Py_ssize_t)(mp->_ma_watcher_tag >> DICT_UNIQUE_ID_SHIFT) - 1; +} + +static inline void +_Py_INCREF_DICT(PyObject *op) +{ + assert(PyDict_Check(op)); + Py_ssize_t id = _PyDict_UniqueId((PyDictObject *)op); + _Py_THREAD_INCREF_OBJECT(op, id); +} + +static inline void +_Py_DECREF_DICT(PyObject *op) +{ + assert(PyDict_Check(op)); + Py_ssize_t id = _PyDict_UniqueId((PyDictObject *)op); + _Py_THREAD_DECREF_OBJECT(op, id); +} +#endif + #ifdef __cplusplus } #endif diff --git a/Include/internal/pycore_object.h b/Include/internal/pycore_object.h index 96f6d61e1c620b..c7af720b1ce43d 100644 --- a/Include/internal/pycore_object.h +++ b/Include/internal/pycore_object.h @@ -293,6 +293,20 @@ extern PyStatus _PyObject_InitState(PyInterpreterState *interp); extern void _PyObject_FiniState(PyInterpreterState *interp); extern bool _PyRefchain_IsTraced(PyInterpreterState *interp, PyObject *obj); +// Macros used for per-thread reference counting in the free threading build. +// They resolve to normal Py_INCREF/DECREF calls in the default build. +// +// The macros are used for only a few references that would otherwise cause +// scaling bottlenecks in the free threading build: +// - The reference from an object to `ob_type`. +// - The reference from a function to `func_code`. +// - The reference from a function to `func_globals` and `func_builtins`. +// +// It's safe, but not performant or necessary, to use these macros for other +// references to code, type, or dict objects. It's also safe to mix their +// usage with normal Py_INCREF/DECREF calls. +// +// See also Include/internal/pycore_dict.h for _Py_INCREF_DICT/_Py_DECREF_DICT. #ifndef Py_GIL_DISABLED # define _Py_INCREF_TYPE Py_INCREF # define _Py_DECREF_TYPE Py_DECREF diff --git a/Include/internal/pycore_uniqueid.h b/Include/internal/pycore_uniqueid.h index ad5dd38ea08483..d3db49ddb78103 100644 --- a/Include/internal/pycore_uniqueid.h +++ b/Include/internal/pycore_uniqueid.h @@ -48,6 +48,9 @@ struct _Py_unique_id_pool { // Assigns the next id from the pool of ids. extern Py_ssize_t _PyObject_AssignUniqueId(PyObject *obj); +// Releases the allocated id back to the pool. +extern void _PyObject_ReleaseUniqueId(Py_ssize_t unique_id); + // Releases the allocated id back to the pool. extern void _PyObject_DisablePerThreadRefcounting(PyObject *obj); diff --git a/Objects/dictobject.c b/Objects/dictobject.c index 806096f5814062..c4e11a3e9c0bc7 100644 --- a/Objects/dictobject.c +++ b/Objects/dictobject.c @@ -1636,6 +1636,24 @@ _PyDict_MaybeUntrack(PyObject *op) _PyObject_GC_UNTRACK(op); } +void +_PyDict_EnablePerThreadRefcounting(PyObject *op) +{ + assert(PyDict_Check(op)); +#ifdef Py_GIL_DISABLED + Py_ssize_t id = _PyObject_AssignUniqueId(op); + if ((uint64_t)id >= (uint64_t)DICT_UNIQUE_ID_MAX) { + _PyObject_ReleaseUniqueId(id); + return; + } + + PyDictObject *mp = (PyDictObject *)op; + assert((mp->_ma_watcher_tag >> DICT_UNIQUE_ID_SHIFT) == 0); + // Plus 1 so that _ma_watcher_tag=0 represents an unassigned id + mp->_ma_watcher_tag += ((uint64_t)id + 1) << DICT_UNIQUE_ID_SHIFT; +#endif +} + static inline int is_unusable_slot(Py_ssize_t ix) { diff --git a/Objects/funcobject.c b/Objects/funcobject.c index 3cb247691386bf..44fb4ac0907d7b 100644 --- a/Objects/funcobject.c +++ b/Objects/funcobject.c @@ -3,6 +3,7 @@ #include "Python.h" #include "pycore_ceval.h" // _PyEval_BuiltinsFromGlobals() +#include "pycore_dict.h" // _Py_INCREF_DICT() #include "pycore_long.h" // _PyLong_GetOne() #include "pycore_modsupport.h" // _PyArg_NoKeywords() #include "pycore_object.h" // _PyObject_GC_UNTRACK() @@ -112,8 +113,15 @@ _PyFunction_FromConstructor(PyFrameConstructor *constr) Py_XDECREF(module); return NULL; } - op->func_globals = Py_NewRef(constr->fc_globals); - op->func_builtins = Py_NewRef(constr->fc_builtins); + _Py_INCREF_DICT(constr->fc_globals); + op->func_globals = constr->fc_globals; + if (PyDict_Check(constr->fc_builtins)) { + _Py_INCREF_DICT(constr->fc_builtins); + } + else { + Py_INCREF(constr->fc_builtins); + } + op->func_builtins = constr->fc_builtins; op->func_name = Py_NewRef(constr->fc_name); op->func_qualname = Py_NewRef(constr->fc_qualname); _Py_INCREF_CODE((PyCodeObject *)constr->fc_code); @@ -143,7 +151,7 @@ PyFunction_NewWithQualName(PyObject *code, PyObject *globals, PyObject *qualname { assert(globals != NULL); assert(PyDict_Check(globals)); - Py_INCREF(globals); + _Py_INCREF_DICT(globals); PyThreadState *tstate = _PyThreadState_GET(); @@ -184,7 +192,12 @@ PyFunction_NewWithQualName(PyObject *code, PyObject *globals, PyObject *qualname if (builtins == NULL) { goto error; } - Py_INCREF(builtins); + if (PyDict_Check(builtins)) { + _Py_INCREF_DICT(builtins); + } + else { + Py_INCREF(builtins); + } PyFunctionObject *op = PyObject_GC_New(PyFunctionObject, &PyFunction_Type); if (op == NULL) { @@ -1057,8 +1070,21 @@ func_clear(PyObject *self) { PyFunctionObject *op = _PyFunction_CAST(self); func_clear_version(_PyInterpreterState_GET(), op); - Py_CLEAR(op->func_globals); - Py_CLEAR(op->func_builtins); + PyObject *globals = op->func_globals; + op->func_globals = NULL; + if (globals != NULL) { + _Py_DECREF_DICT(globals); + } + PyObject *builtins = op->func_builtins; + op->func_builtins = NULL; + if (builtins != NULL) { + if (PyDict_Check(builtins)) { + _Py_DECREF_DICT(builtins); + } + else { + Py_DECREF(builtins); + } + } Py_CLEAR(op->func_module); Py_CLEAR(op->func_defaults); Py_CLEAR(op->func_kwdefaults); diff --git a/Objects/moduleobject.c b/Objects/moduleobject.c index f63ae4e048bcd9..c06badd5f3edfe 100644 --- a/Objects/moduleobject.c +++ b/Objects/moduleobject.c @@ -3,6 +3,7 @@ #include "Python.h" #include "pycore_call.h" // _PyObject_CallNoArgs() +#include "pycore_dict.h" // _PyDict_EnablePerThreadRefcounting() #include "pycore_fileutils.h" // _Py_wgetcwd #include "pycore_interp.h" // PyInterpreterState.importlib #include "pycore_long.h" // _PyLong_GetOne() @@ -105,7 +106,7 @@ new_module_notrack(PyTypeObject *mt) static void track_module(PyModuleObject *m) { - _PyObject_SetDeferredRefcount(m->md_dict); + _PyDict_EnablePerThreadRefcounting(m->md_dict); PyObject_GC_Track(m->md_dict); _PyObject_SetDeferredRefcount((PyObject *)m); diff --git a/Python/uniqueid.c b/Python/uniqueid.c index 0cbb35c6cd2f8b..b9f30713feeb57 100644 --- a/Python/uniqueid.c +++ b/Python/uniqueid.c @@ -1,5 +1,6 @@ #include "Python.h" +#include "pycore_dict.h" // _PyDict_UniqueId() #include "pycore_lock.h" // PyMutex_LockFlags() #include "pycore_pystate.h" // _PyThreadState_GET() #include "pycore_object.h" // _Py_IncRefTotal @@ -98,8 +99,8 @@ _PyObject_AssignUniqueId(PyObject *obj) return unique_id; } -static void -release_unique_id(Py_ssize_t unique_id) +void +_PyObject_ReleaseUniqueId(Py_ssize_t unique_id) { PyInterpreterState *interp = _PyInterpreterState_GET(); struct _Py_unique_id_pool *pool = &interp->unique_ids; @@ -128,6 +129,11 @@ clear_unique_id(PyObject *obj) id = co->_co_unique_id; co->_co_unique_id = -1; } + else if (PyDict_Check(obj)) { + PyDictObject *mp = (PyDictObject *)obj; + id = _PyDict_UniqueId(mp); + mp->_ma_watcher_tag &= ~(UINT64_MAX << DICT_UNIQUE_ID_SHIFT); + } return id; } @@ -136,7 +142,7 @@ _PyObject_DisablePerThreadRefcounting(PyObject *obj) { Py_ssize_t id = clear_unique_id(obj); if (id >= 0) { - release_unique_id(id); + _PyObject_ReleaseUniqueId(id); } } From 695814c6e97aad0ae2b116cedca3e77d25d5b968 Mon Sep 17 00:00:00 2001 From: Irit Katriel <1055913+iritkatriel@users.noreply.github.com> Date: Mon, 21 Oct 2024 18:54:24 +0100 Subject: [PATCH 12/36] gh-119786: move interpreter doc from devguide to InternalDocs (#125715) --- InternalDocs/README.md | 31 ++- InternalDocs/code_objects.md | 5 + InternalDocs/generators.md | 9 + InternalDocs/interpreter.md | 364 +++++++++++++++++++++++++++++++++++ 4 files changed, 400 insertions(+), 9 deletions(-) create mode 100644 InternalDocs/code_objects.md create mode 100644 InternalDocs/generators.md create mode 100644 InternalDocs/interpreter.md diff --git a/InternalDocs/README.md b/InternalDocs/README.md index 0a6ecf899458ed..48c893bde2a631 100644 --- a/InternalDocs/README.md +++ b/InternalDocs/README.md @@ -11,19 +11,32 @@ The core dev team attempts to keep this documentation up to date. If it is not, please report that through the [issue tracker](https://github.com/python/cpython/issues). -Index: ------ -[Guide to the parser](parser.md) +Compiling Python Source Code +--- -[Compiler Design](compiler.md) +- [Guide to the parser](parser.md) -[Frames](frames.md) +- [Compiler Design](compiler.md) -[Adaptive Instruction Families](adaptive.md) +Runtime Objects +--- -[The Source Code Locations Table](locations.md) +- [Code Objects (coming soon)](code_objects.md) -[Garbage collector design](garbage_collector.md) +- [The Source Code Locations Table](locations.md) -[Exception Handling](exception_handling.md) +- [Generators (coming soon)](generators.md) + +- [Frames](frames.md) + +Program Execution +--- + +- [The Interpreter](interpreter.md) + +- [Adaptive Instruction Families](adaptive.md) + +- [Garbage Collector Design](garbage_collector.md) + +- [Exception Handling](exception_handling.md) diff --git a/InternalDocs/code_objects.md b/InternalDocs/code_objects.md new file mode 100644 index 00000000000000..284a8b7aee5765 --- /dev/null +++ b/InternalDocs/code_objects.md @@ -0,0 +1,5 @@ + +Code objects +============ + +Coming soon. diff --git a/InternalDocs/generators.md b/InternalDocs/generators.md new file mode 100644 index 00000000000000..d53f0f9bdff4e4 --- /dev/null +++ b/InternalDocs/generators.md @@ -0,0 +1,9 @@ + +Generators +========== + +Coming soon. + + diff --git a/InternalDocs/interpreter.md b/InternalDocs/interpreter.md new file mode 100644 index 00000000000000..dcfddc99370c0e --- /dev/null +++ b/InternalDocs/interpreter.md @@ -0,0 +1,364 @@ + +The bytecode interpreter +======================== + +Overview +-------- + +This document describes the workings and implementation of the bytecode +interpreter, the part of python that executes compiled Python code. Its +entry point is in [Python/ceval.c](../Python/ceval.c). + +At a high level, the interpreter consists of a loop that iterates over the +bytecode instructions, executing each of them via a switch statement that +has a case implementing each opcode. This switch statement is generated +from the instruction definitions in [Python/bytecodes.c](../Python/bytecodes.c) +which are written in [a DSL](../Tools/cases_generator/interpreter_definition.md) +developed for this purpose. + +Recall that the [Python Compiler](compiler.md) produces a [`CodeObject`](code_object.md), +which contains the bytecode instructions along with static data that is required to execute them, +such as the consts list, variable names, +[exception table](exception_handling.md#format-of-the-exception-table), and so on. + +When the interpreter's +[`PyEval_EvalCode()`](https://docs.python.org/3.14/c-api/veryhigh.html#c.PyEval_EvalCode) +function is called to execute a `CodeObject`, it constructs a [`Frame`](frames.md) and calls +[`_PyEval_EvalFrame()`](https://docs.python.org/3.14/c-api/veryhigh.html#c.PyEval_EvalCode) +to execute the code object in this frame. The frame hold the dynamic state of the +`CodeObject`'s execution, including the instruction pointer, the globals and builtins. +It also has a reference to the `CodeObject` itself. + +In addition to the frame, `_PyEval_EvalFrame()` also receives a +[`Thread State`](https://docs.python.org/3/c-api/init.html#c.PyThreadState) +object, `tstate`, which includes things like the exception state and the +recursion depth. The thread state also provides access to the per-interpreter +state (`tstate->interp`), which has a pointer to the per-runtime (that is, +truly global) state (`tstate->interp->runtime`). + +Finally, `_PyEval_EvalFrame()` receives an integer argument `throwflag` +which, when nonzero, indicates that the interpreter should just raise the current exception +(this is used in the implementation of +[`gen.throw`](https://docs.python.org/3.14/reference/expressions.html#generator.throw). + +By default, [`_PyEval_EvalFrame()`](https://docs.python.org/3.14/c-api/veryhigh.html#c.PyEval_EvalCode) +simply calls [`_PyEval_EvalFrameDefault()`] to execute the frame. However, as per +[`PEP 523`](https://peps.python.org/pep-0523/) this is configurable by setting +`interp->eval_frame`. In the following, we describe the default function, +`_PyEval_EvalFrameDefault()`. + + +Instruction decoding +-------------------- + +The first task of the interpreter is to decode the bytecode instructions. +Bytecode is stored as an array of 16-bit code units (`_Py_CODEUNIT`). +Each code unit contains an 8-bit `opcode` and an 8-bit argument (`oparg`), both unsigned. +In order to make the bytecode format independent of the machine byte order when stored on disk, +`opcode` is always the first byte and `oparg` is always the second byte. +Macros are used to extract the `opcode` and `oparg` from a code unit +(`_Py_OPCODE(word)` and `_Py_OPARG(word)`). +Some instructions (for example, `NOP` or `POP_TOP`) have no argument -- in this case +we ignore `oparg`. + +A simplified version of the interpreter's main loop looks like this: + +```c + _Py_CODEUNIT *first_instr = code->co_code_adaptive; + _Py_CODEUNIT *next_instr = first_instr; + while (1) { + _Py_CODEUNIT word = *next_instr++; + unsigned char opcode = _Py_OPCODE(word); + unsigned int oparg = _Py_OPARG(word); + switch (opcode) { + // ... A case for each opcode ... + } + } +``` + +This loop iterates over the instructions, decoding each into its `opcode` +and `oparg`, and then executes the switch case that implements this `opcode`. + +The instruction format supports 256 different opcodes, which is sufficient. +However, it also limits `oparg` to 8-bit values, which is too restrictive. +To overcome this, the `EXTENDED_ARG` opcode allows us to prefix any instruction +with one or more additional data bytes, which combine into a larger oparg. +For example, this sequence of code units: + + EXTENDED_ARG 1 + EXTENDED_ARG 0 + LOAD_CONST 2 + +would set `opcode` to `LOAD_CONST` and `oparg` to `65538` (that is, `0x1_00_02`). +The compiler should limit itself to at most three `EXTENDED_ARG` prefixes, to allow the +resulting `oparg` to fit in 32 bits, but the interpreter does not check this. + +In the following, a `code unit` is always two bytes, while an `instruction` is a +sequence of code units consisting of zero to three `EXTENDED_ARG` opcodes followed by +a primary opcode. + +The following loop, to be inserted just above the `switch` statement, will make the above +snippet decode a complete instruction: + +```c + while (opcode == EXTENDED_ARG) { + word = *next_instr++; + opcode = _Py_OPCODE(word); + oparg = (oparg << 8) | _Py_OPARG(word); + } +``` + +For various reasons we'll get to later (mostly efficiency, given that `EXTENDED_ARG` +is rare) the actual code is different. + +Jumps +===== + +Note that when the `switch` statement is reached, `next_instr` (the "instruction offset") +already points to the next instruction. +Thus, jump instructions can be implemented by manipulating `next_instr`: + +- A jump forward (`JUMP_FORWARD`) sets `next_instr += oparg`. +- A jump backward sets `next_instr -= oparg`. + +Inline cache entries +==================== + +Some (specialized or specializable) instructions have an associated "inline cache". +The inline cache consists of one or more two-byte entries included in the bytecode +array as additional words following the `opcode`/`oparg` pair. +The size of the inline cache for a particular instruction is fixed by its `opcode`. +Moreover, the inline cache size for all instructions in a +[family of specialized/specializable instructions](adaptive.md) +(for example, `LOAD_ATTR`, `LOAD_ATTR_SLOT`, `LOAD_ATTR_MODULE`) must all be +the same. Cache entries are reserved by the compiler and initialized with zeros. +Although they are represented by code units, cache entries do not conform to the +`opcode` / `oparg` format. + +If an instruction has an inline cache, the layout of its cache is described by +a `struct` definition in (`pycore_code.h`)[../Include/internal/pycore_code.h]. +This allows us to access the cache by casting `next_instr` to a pointer to this `struct`. +The size of such a `struct` must be independent of the machine architecture, word size +and alignment requirements. For a 32-bit field, the `struct` should use `_Py_CODEUNIT field[2]`. + +The instruction implementation is responsible for advancing `next_instr` past the inline cache. +For example, if an instruction's inline cache is four bytes (that is, two code units) in size, +the code for the instruction must contain `next_instr += 2;`. +This is equivalent to a relative forward jump by that many code units. +(In the interpreter definition DSL, this is coded as `JUMPBY(n)`, where `n` is the number +of code units to jump, typically given as a named constant.) + +Serializing non-zero cache entries would present a problem because the serialization +(:mod:`marshal`) format must be independent of the machine byte order. + +More information about the use of inline caches can be found in +[PEP 659](https://peps.python.org/pep-0659/#ancillary-data). + +The evaluation stack +-------------------- + +Most instructions read or write some data in the form of object references (`PyObject *`). +The CPython bytecode interpreter is a stack machine, meaning that its instructions operate +by pushing data onto and popping it off the stack. +The stack is forms part of the frame for the code object. Its maximum depth is calculated +by the compiler and stored in the `co_stacksize` field of the code object, so that the +stack can be pre-allocated is a contiguous array of `PyObject*` pointers, when the frame +is created. + +The stack effects of each instruction are also exposed through the +[opcode metadata](../Include/internal/pycore_opcode_metadata.h) through two +functions that report how many stack elements the instructions consumes, +and how many it produces (`_PyOpcode_num_popped` and `_PyOpcode_num_pushed`). +For example, the `BINARY_OP` instruction pops two objects from the stack and pushes the +result back onto the stack. + +The stack grows up in memory; the operation `PUSH(x)` is equivalent to `*stack_pointer++ = x`, +whereas `x = POP()` means `x = *--stack_pointer`. +Overflow and underflow checks are active in debug mode, but are otherwise optimized away. + +At any point during execution, the stack level is knowable based on the instruction pointer +alone, and some properties of each item on the stack are also known. +In particular, only a few instructions may push a `NULL` onto the stack, and the positions +that may be `NULL` are known. +A few other instructions (`GET_ITER`, `FOR_ITER`) push or pop an object that is known to +be an iterator. + +Instruction sequences that do not allow statically knowing the stack depth are deemed illegal; +the bytecode compiler never generates such sequences. +For example, the following sequence is illegal, because it keeps pushing items on the stack: + + LOAD_FAST 0 + JUMP_BACKWARD 2 + +> [!NOTE] +> Do not confuse the evaluation stack with the call stack, which is used to implement calling +> and returning from functions. + +Error handling +-------------- + +When the implementation of an opcode raises an exception, it jumps to the +`exception_unwind` label in [Python/ceval.c](../Python/ceval.c). +The exception is then handled as described in the +[`exception handling documentation`](exception_handling.md#handling-exceptions). + +Python-to-Python calls +---------------------- + +The `_PyEval_EvalFrameDefault()` function is recursive, because sometimes +the interpreter calls some C function that calls back into the interpreter. +In 3.10 and before, this was the case even when a Python function called +another Python function: +The `CALL` opcode would call the `tp_call` dispatch function of the +callee, which would extract the code object, create a new frame for the call +stack, and then call back into the interpreter. This approach is very general +but consumes several C stack frames for each nested Python call, thereby +increasing the risk of an (unrecoverable) C stack overflow. + +Since 3.11, the `CALL` instruction special-cases function objects to "inline" +the call. When a call gets inlined, a new frame gets pushed onto the call +stack and the interpreter "jumps" to the start of the callee's bytecode. +When an inlined callee executes a `RETURN_VALUE` instruction, the frame is +popped off the call stack and the interpreter returns to its caller, +by popping a frame off the call stack and "jumping" to the return address. +There is a flag in the frame (`frame->is_entry`) that indicates whether +the frame was inlined (set if it wasn't). +If `RETURN_VALUE` finds this flag set, it performs the usual cleanup and +returns from `_PyEval_EvalFrameDefault()` altogether, to a C caller. + +A similar check is performed when an unhandled exception occurs. + +The call stack +-------------- + +Up through 3.10, the call stack was implemented as a singly-linked list of +[frame objects](frames.md). This was expensive because each call would require a +heap allocation for the stack frame. + +Since 3.11, frames are no longer fully-fledged objects. Instead, a leaner internal +`_PyInterpreterFrame` structure is used, which is allocated using a custom allocator +function (`_PyThreadState_BumpFramePointer()`), which allocates and initializes a +frame structure. Usually a frame allocation is just a pointer bump, which improves +memory locality. + +Sometimes an actual `PyFrameObject` is needed, such as when Python code calls +`sys._getframe()` or an extension module calls +[`PyEval_GetFrame()`](https://docs.python.org/3/c-api/reflection.html#c.PyEval_GetFrame). +In this case we allocate a proper `PyFrameObject` and initialize it from the +`_PyInterpreterFrame`. + +Things get more complicated when generators are involved, since those do not +follow the push/pop model. This includes async functions, which are based on +the same mechanism. A generator object has space for a `_PyInterpreterFrame` +structure, including the variable-size part (used for locals and the eval stack). +When a generator (or async) function is first called, a special opcode +`RETURN_GENERATOR` is executed, which is responsible for creating the +generator object. The generator object's `_PyInterpreterFrame` is initialized +with a copy of the current stack frame. The current stack frame is then popped +off the frame stack and the generator object is returned. +(Details differ depending on the `is_entry` flag.) +When the generator is resumed, the interpreter pushes its `_PyInterpreterFrame` +onto the frame stack and resumes execution. +See also the [generators](generators.md) section. + + + + + +Introducing a new bytecode instruction +-------------------------------------- + +It is occasionally necessary to add a new opcode in order to implement +a new feature or change the way that existing features are compiled. +This section describes the changes required to do this. + +First, you must choose a name for the bytecode, implement it in +[`Python/bytecodes.c`](../Python/bytecodes.c) and add a documentation +entry in [`Doc/library/dis.rst`](../Doc/library/dis.rst). +Then run `make regen-cases` to assign a number for it (see +[`Include/opcode_ids.h`](../Include/opcode_ids.h)) and regenerate a +number of files with the actual implementation of the bytecode in +[`Python/generated_cases.c.h`](../Python/generated_cases.c.h) and +metadata about it in additional files. + +With a new bytecode you must also change what is called the "magic number" for +.pyc files: bump the value of the variable `MAGIC_NUMBER` in +[`Lib/importlib/_bootstrap_external.py`](../Lib/importlib/_bootstrap_external.py). +Changing this number will lead to all .pyc files with the old `MAGIC_NUMBER` +to be recompiled by the interpreter on import. Whenever `MAGIC_NUMBER` is +changed, the ranges in the `magic_values` array in +[`PC/launcher.c`](../PC/launcher.c) may also need to be updated. Changes to +[`Lib/importlib/_bootstrap_external.py`](../Lib/importlib/_bootstrap_external.py) +will take effect only after running `make regen-importlib`. + +> [!NOTE] +> Running `make regen-importlib` before adding the new bytecode target to +> [`Python/bytecodes.c`](../Python/bytecodes.c) +> (followed by `make regen-cases`) will result in an error. You should only run +> `make regen-importlib` after the new bytecode target has been added. + +> [!NOTE] +> On Windows, running the `./build.bat` script will automatically +> regenerate the required files without requiring additional arguments. + +Finally, you need to introduce the use of the new bytecode. Update +[`Python/codegen.c`](../Python/codegen.c) to emit code with this bytecode. +Optimizations in [`Python/flowgraph.c`](../Python/flowgraph.c) may also +need to be updated. If the new opcode affects a control flow or the block +stack, you may have to update the `frame_setlineno()` function in +[`Objects/frameobject.c`](../Objects/frameobject.c). It may also be necessary +to update [`Lib/dis.py`](../Lib/dis.py) if the new opcode interprets its +argument in a special way (like `FORMAT_VALUE` or `MAKE_FUNCTION`). + +If you make a change here that can affect the output of bytecode that +is already in existence and you do not change the magic number, make +sure to delete your old .py(c|o) files! Even though you will end up changing +the magic number if you change the bytecode, while you are debugging your work +you may be changing the bytecode output without constantly bumping up the +magic number. This can leave you with stale .pyc files that will not be +recreated. +Running `find . -name '*.py[co]' -exec rm -f '{}' +` should delete all .pyc +files you have, forcing new ones to be created and thus allow you test out your +new bytecode properly. Run `make regen-importlib` for updating the +bytecode of frozen importlib files. You have to run `make` again after this +to recompile the generated C files. + +Additional resources +-------------------- + +* Brandt Bucher's talk about the specializing interpreter at PyCon US 2023. + [Slides](https://github.com/brandtbucher/brandtbucher/blob/master/2023/04/21/inside_cpython_311s_new_specializing_adaptive_interpreter.pdf) + [Video](https://www.youtube.com/watch?v=PGZPSWZSkJI&t=1470s) From de5a6c7c7d00ac37d66cba9849202b374e9cdfb7 Mon Sep 17 00:00:00 2001 From: mpage Date: Mon, 21 Oct 2024 11:08:13 -0700 Subject: [PATCH 13/36] gh-121459: Fix a couple of uses of `PyStackRef_FromPyObjectSteal` (#125711) * Fix usage of PyStackRef_FromPyObjectSteal in CALL_TUPLE_1 This was missed in gh-124894 * Fix usage of PyStackRef_FromPyObjectSteal in _CALL_STR_1 This was missed in gh-124894 * Regenerate code --- Python/bytecodes.c | 10 ++++++---- Python/executor_cases.c.h | 10 ++++++---- Python/generated_cases.c.h | 10 ++++++---- 3 files changed, 18 insertions(+), 12 deletions(-) diff --git a/Python/bytecodes.c b/Python/bytecodes.c index c59a35c3e828ca..62e9b5ddd1584c 100644 --- a/Python/bytecodes.c +++ b/Python/bytecodes.c @@ -3629,11 +3629,12 @@ dummy_func( DEOPT_IF(!PyStackRef_IsNull(null)); DEOPT_IF(callable_o != (PyObject *)&PyUnicode_Type); STAT_INC(CALL, hit); - res = PyStackRef_FromPyObjectSteal(PyObject_Str(arg_o)); + PyObject *res_o = PyObject_Str(arg_o); DEAD(null); DEAD(callable); PyStackRef_CLOSE(arg); - ERROR_IF(PyStackRef_IsNull(res), error); + ERROR_IF(res_o == NULL, error); + res = PyStackRef_FromPyObjectSteal(res_o); } macro(CALL_STR_1) = @@ -3650,11 +3651,12 @@ dummy_func( DEOPT_IF(!PyStackRef_IsNull(null)); DEOPT_IF(callable_o != (PyObject *)&PyTuple_Type); STAT_INC(CALL, hit); - res = PyStackRef_FromPyObjectSteal(PySequence_Tuple(arg_o)); + PyObject *res_o = PySequence_Tuple(arg_o); DEAD(null); DEAD(callable); PyStackRef_CLOSE(arg); - ERROR_IF(PyStackRef_IsNull(res), error); + ERROR_IF(res_o == NULL, error); + res = PyStackRef_FromPyObjectSteal(res_o); } macro(CALL_TUPLE_1) = diff --git a/Python/executor_cases.c.h b/Python/executor_cases.c.h index 15a6c7bc1a7966..5df4986cd838b5 100644 --- a/Python/executor_cases.c.h +++ b/Python/executor_cases.c.h @@ -4299,10 +4299,11 @@ } STAT_INC(CALL, hit); _PyFrame_SetStackPointer(frame, stack_pointer); - res = PyStackRef_FromPyObjectSteal(PyObject_Str(arg_o)); + PyObject *res_o = PyObject_Str(arg_o); stack_pointer = _PyFrame_GetStackPointer(frame); PyStackRef_CLOSE(arg); - if (PyStackRef_IsNull(res)) JUMP_TO_ERROR(); + if (res_o == NULL) JUMP_TO_ERROR(); + res = PyStackRef_FromPyObjectSteal(res_o); stack_pointer[-3] = res; stack_pointer += -2; assert(WITHIN_STACK_BOUNDS()); @@ -4331,10 +4332,11 @@ } STAT_INC(CALL, hit); _PyFrame_SetStackPointer(frame, stack_pointer); - res = PyStackRef_FromPyObjectSteal(PySequence_Tuple(arg_o)); + PyObject *res_o = PySequence_Tuple(arg_o); stack_pointer = _PyFrame_GetStackPointer(frame); PyStackRef_CLOSE(arg); - if (PyStackRef_IsNull(res)) JUMP_TO_ERROR(); + if (res_o == NULL) JUMP_TO_ERROR(); + res = PyStackRef_FromPyObjectSteal(res_o); stack_pointer[-3] = res; stack_pointer += -2; assert(WITHIN_STACK_BOUNDS()); diff --git a/Python/generated_cases.c.h b/Python/generated_cases.c.h index a9290986c24f45..388031af87a79f 100644 --- a/Python/generated_cases.c.h +++ b/Python/generated_cases.c.h @@ -2978,10 +2978,11 @@ DEOPT_IF(callable_o != (PyObject *)&PyUnicode_Type, CALL); STAT_INC(CALL, hit); _PyFrame_SetStackPointer(frame, stack_pointer); - res = PyStackRef_FromPyObjectSteal(PyObject_Str(arg_o)); + PyObject *res_o = PyObject_Str(arg_o); stack_pointer = _PyFrame_GetStackPointer(frame); PyStackRef_CLOSE(arg); - if (PyStackRef_IsNull(res)) goto pop_3_error; + if (res_o == NULL) goto pop_3_error; + res = PyStackRef_FromPyObjectSteal(res_o); } // _CHECK_PERIODIC { @@ -3028,10 +3029,11 @@ DEOPT_IF(callable_o != (PyObject *)&PyTuple_Type, CALL); STAT_INC(CALL, hit); _PyFrame_SetStackPointer(frame, stack_pointer); - res = PyStackRef_FromPyObjectSteal(PySequence_Tuple(arg_o)); + PyObject *res_o = PySequence_Tuple(arg_o); stack_pointer = _PyFrame_GetStackPointer(frame); PyStackRef_CLOSE(arg); - if (PyStackRef_IsNull(res)) goto pop_3_error; + if (res_o == NULL) goto pop_3_error; + res = PyStackRef_FromPyObjectSteal(res_o); } // _CHECK_PERIODIC { From 5ca4e34bc1aab8321911aac6d5b2b9e75ff764d8 Mon Sep 17 00:00:00 2001 From: Serhiy Storchaka Date: Mon, 21 Oct 2024 21:30:45 +0300 Subject: [PATCH 14/36] gh-125767: Fix pickling and copying of super objects (GH-125781) Previously, copying a super object returned a copy of the instance invoking super(). Pickling a super object could pickle the instance invoking super() or fail, depending on its type and protocol. Now deep copying returns a new super object and pickling pickles the super object. Shallow copying returns the same super object. --- Doc/library/functions.rst | 4 ++ Doc/whatsnew/3.14.rst | 4 ++ Lib/copy.py | 2 +- Lib/copyreg.py | 5 ++ Lib/test/test_super.py | 70 +++++++++++++++++++ ...-10-21-13-52-37.gh-issue-125767.0kK4lX.rst | 2 + 6 files changed, 86 insertions(+), 1 deletion(-) create mode 100644 Misc/NEWS.d/next/Library/2024-10-21-13-52-37.gh-issue-125767.0kK4lX.rst diff --git a/Doc/library/functions.rst b/Doc/library/functions.rst index 0638df04c6ff40..290c63827ff766 100644 --- a/Doc/library/functions.rst +++ b/Doc/library/functions.rst @@ -2032,6 +2032,10 @@ are always available. They are listed here in alphabetical order. :func:`super`, see `guide to using super() `_. + .. versionchanged:: 3.14 + :class:`super` objects are now :mod:`pickleable ` and + :mod:`copyable `. + .. _func-tuple: .. class:: tuple() diff --git a/Doc/whatsnew/3.14.rst b/Doc/whatsnew/3.14.rst index ad841538ccc547..d52faa614db94e 100644 --- a/Doc/whatsnew/3.14.rst +++ b/Doc/whatsnew/3.14.rst @@ -190,6 +190,10 @@ Other language changes They raise an error if the argument is a string. (Contributed by Serhiy Storchaka in :gh:`84978`.) +* :class:`super` objects are now :mod:`pickleable ` and + :mod:`copyable `. + (Contributed by Serhiy Storchaka in :gh:`125767`.) + New modules =========== diff --git a/Lib/copy.py b/Lib/copy.py index a79976d3a658f0..f27e109973cfb7 100644 --- a/Lib/copy.py +++ b/Lib/copy.py @@ -106,7 +106,7 @@ def _copy_immutable(x): bytes, frozenset, type, range, slice, property, types.BuiltinFunctionType, types.EllipsisType, types.NotImplementedType, types.FunctionType, types.CodeType, - weakref.ref): + weakref.ref, super): d[t] = _copy_immutable d[list] = list.copy diff --git a/Lib/copyreg.py b/Lib/copyreg.py index 578392409b403c..17c5dde67c887c 100644 --- a/Lib/copyreg.py +++ b/Lib/copyreg.py @@ -36,6 +36,11 @@ def pickle_union(obj): pickle(type(int | str), pickle_union) +def pickle_super(obj): + return super, (obj.__thisclass__, obj.__self__) + +pickle(super, pickle_super) + # Support for pickling new-style objects def _reconstructor(cls, base, state): diff --git a/Lib/test/test_super.py b/Lib/test/test_super.py index 1222ec6a3c4109..149016635522c3 100644 --- a/Lib/test/test_super.py +++ b/Lib/test/test_super.py @@ -1,5 +1,7 @@ """Unit tests for zero-argument super() & related machinery.""" +import copy +import pickle import textwrap import threading import unittest @@ -539,6 +541,74 @@ def work(): for thread in threads: thread.join() + def test_special_methods(self): + for e in E(), E: + s = super(C, e) + self.assertEqual(s.__reduce__, e.__reduce__) + self.assertEqual(s.__reduce_ex__, e.__reduce_ex__) + self.assertEqual(s.__getstate__, e.__getstate__) + self.assertFalse(hasattr(s, '__getnewargs__')) + self.assertFalse(hasattr(s, '__getnewargs_ex__')) + self.assertFalse(hasattr(s, '__setstate__')) + self.assertFalse(hasattr(s, '__copy__')) + self.assertFalse(hasattr(s, '__deepcopy__')) + + def test_pickling(self): + e = E() + e.x = 1 + s = super(C, e) + for proto in range(pickle.HIGHEST_PROTOCOL + 1): + with self.subTest(proto=proto): + u = pickle.loads(pickle.dumps(s, proto)) + self.assertEqual(u.f(), s.f()) + self.assertIs(type(u), type(s)) + self.assertIs(type(u.__self__), E) + self.assertEqual(u.__self__.x, 1) + self.assertIs(u.__thisclass__, C) + self.assertIs(u.__self_class__, E) + + s = super(C, E) + for proto in range(pickle.HIGHEST_PROTOCOL + 1): + with self.subTest(proto=proto): + u = pickle.loads(pickle.dumps(s, proto)) + self.assertEqual(u.cm(), s.cm()) + self.assertEqual(u.f, s.f) + self.assertIs(type(u), type(s)) + self.assertIs(u.__self__, E) + self.assertIs(u.__thisclass__, C) + self.assertIs(u.__self_class__, E) + + def test_shallow_copying(self): + s = super(C, E()) + self.assertIs(copy.copy(s), s) + s = super(C, E) + self.assertIs(copy.copy(s), s) + + def test_deep_copying(self): + e = E() + e.x = [1] + s = super(C, e) + u = copy.deepcopy(s) + self.assertEqual(u.f(), s.f()) + self.assertIs(type(u), type(s)) + self.assertIsNot(u, s) + self.assertIs(type(u.__self__), E) + self.assertIsNot(u.__self__, e) + self.assertIsNot(u.__self__.x, e.x) + self.assertEqual(u.__self__.x, [1]) + self.assertIs(u.__thisclass__, C) + self.assertIs(u.__self_class__, E) + + s = super(C, E) + u = copy.deepcopy(s) + self.assertEqual(u.cm(), s.cm()) + self.assertEqual(u.f, s.f) + self.assertIsNot(u, s) + self.assertIs(type(u), type(s)) + self.assertIs(u.__self__, E) + self.assertIs(u.__thisclass__, C) + self.assertIs(u.__self_class__, E) + if __name__ == "__main__": unittest.main() diff --git a/Misc/NEWS.d/next/Library/2024-10-21-13-52-37.gh-issue-125767.0kK4lX.rst b/Misc/NEWS.d/next/Library/2024-10-21-13-52-37.gh-issue-125767.0kK4lX.rst new file mode 100644 index 00000000000000..bfda740a79d10e --- /dev/null +++ b/Misc/NEWS.d/next/Library/2024-10-21-13-52-37.gh-issue-125767.0kK4lX.rst @@ -0,0 +1,2 @@ +:class:`super` objects are now :mod:`pickleable ` and +:mod:`copyable `. From dcc4fb2c9068f60353f0c0978948b7681f7745e6 Mon Sep 17 00:00:00 2001 From: Serhiy Storchaka Date: Mon, 21 Oct 2024 21:54:12 +0300 Subject: [PATCH 15/36] gh-124969: Make locale.nl_langinfo(locale.ALT_DIGITS) returning a string again (GH-125774) This is a follow up of GH-124974. Only Glibc needed a fix. Now the returned value is a string consisting of semicolon-separated symbols on all Posix platforms. --- Doc/library/locale.rst | 7 ++-- Lib/test/test__locale.py | 30 ++++++++++----- ...-10-21-12-06-55.gh-issue-124969.xiY8UP.rst | 2 + Modules/_localemodule.c | 38 +++++++++++-------- 4 files changed, 50 insertions(+), 27 deletions(-) create mode 100644 Misc/NEWS.d/next/Library/2024-10-21-12-06-55.gh-issue-124969.xiY8UP.rst diff --git a/Doc/library/locale.rst b/Doc/library/locale.rst index 5f3c4840b5cc70..f172a55080efc9 100644 --- a/Doc/library/locale.rst +++ b/Doc/library/locale.rst @@ -158,8 +158,7 @@ The :mod:`locale` module defines the following exception and functions: .. function:: nl_langinfo(option) - Return some locale-specific information as a string (or a tuple for - ``ALT_DIGITS``). This function is not + Return some locale-specific information as a string. This function is not available on all systems, and the set of possible options might also vary across platforms. The possible argument values are numbers, for which symbolic constants are available in the locale module. @@ -312,7 +311,9 @@ The :mod:`locale` module defines the following exception and functions: .. data:: ALT_DIGITS - Get a tuple of up to 100 strings used to represent the values 0 to 99. + Get a string consisting of up to 100 semicolon-separated symbols used + to represent the values 0 to 99 in a locale-specific way. + In most locales this is an empty string. The function temporarily sets the ``LC_CTYPE`` locale to the locale of the category that determines the requested value (``LC_TIME``, diff --git a/Lib/test/test__locale.py b/Lib/test/test__locale.py index e403c2a822788d..7e6e296c069abb 100644 --- a/Lib/test/test__locale.py +++ b/Lib/test/test__locale.py @@ -26,7 +26,10 @@ 'bs_BA', 'fr_LU', 'kl_GL', 'fa_IR', 'de_BE', 'sv_SE', 'it_CH', 'uk_UA', 'eu_ES', 'vi_VN', 'af_ZA', 'nb_NO', 'en_DK', 'tg_TJ', 'ps_AF', 'en_US', 'fr_FR.ISO8859-1', 'fr_FR.UTF-8', 'fr_FR.ISO8859-15@euro', - 'ru_RU.KOI8-R', 'ko_KR.eucKR'] + 'ru_RU.KOI8-R', 'ko_KR.eucKR', + 'ja_JP.UTF-8', 'lzh_TW.UTF-8', 'my_MM.UTF-8', 'or_IN.UTF-8', 'shn_MM.UTF-8', + 'ar_AE.UTF-8', 'bn_IN.UTF-8', 'mr_IN.UTF-8', 'th_TH.TIS620', +] def setUpModule(): global candidate_locales @@ -78,11 +81,13 @@ def accept(loc): 'C': (0, {}), 'en_US': (0, {}), 'fa_IR': (100, {0: '\u06f0\u06f0', 10: '\u06f1\u06f0', 99: '\u06f9\u06f9'}), - 'ja_JP': (100, {0: '\u3007', 10: '\u5341', 99: '\u4e5d\u5341\u4e5d'}), + 'ja_JP': (100, {1: '\u4e00', 10: '\u5341', 99: '\u4e5d\u5341\u4e5d'}), 'lzh_TW': (32, {0: '\u3007', 10: '\u5341', 31: '\u5345\u4e00'}), 'my_MM': (100, {0: '\u1040\u1040', 10: '\u1041\u1040', 99: '\u1049\u1049'}), 'or_IN': (100, {0: '\u0b66', 10: '\u0b67\u0b66', 99: '\u0b6f\u0b6f'}), 'shn_MM': (100, {0: '\u1090\u1090', 10: '\u1091\u1090', 99: '\u1099\u1099'}), + 'ar_AE': (100, {0: '\u0660', 10: '\u0661\u0660', 99: '\u0669\u0669'}), + 'bn_IN': (100, {0: '\u09e6', 10: '\u09e7\u09e6', 99: '\u09ef\u09ef'}), } if sys.platform == 'win32': @@ -199,21 +204,28 @@ def test_lc_numeric_basic(self): def test_alt_digits_nl_langinfo(self): # Test nl_langinfo(ALT_DIGITS) tested = False - for loc, (count, samples) in known_alt_digits.items(): + for loc in candidate_locales: with self.subTest(locale=loc): try: setlocale(LC_TIME, loc) except Error: self.skipTest(f'no locale {loc!r}') continue + with self.subTest(locale=loc): alt_digits = nl_langinfo(locale.ALT_DIGITS) - self.assertIsInstance(alt_digits, tuple) - if count and not alt_digits and support.is_apple: - self.skipTest(f'ALT_DIGITS is not set for locale {loc!r} on Apple platforms') - self.assertEqual(len(alt_digits), count) - for i in samples: - self.assertEqual(alt_digits[i], samples[i]) + self.assertIsInstance(alt_digits, str) + alt_digits = alt_digits.split(';') if alt_digits else [] + if alt_digits: + self.assertGreaterEqual(len(alt_digits), 10, alt_digits) + loc1 = loc.split('.', 1)[0] + if loc1 in known_alt_digits: + count, samples = known_alt_digits[loc1] + if count and not alt_digits: + self.skipTest(f'ALT_DIGITS is not set for locale {loc!r} on this platform') + self.assertEqual(len(alt_digits), count, alt_digits) + for i in samples: + self.assertEqual(alt_digits[i], samples[i]) tested = True if not tested: self.skipTest('no suitable locales') diff --git a/Misc/NEWS.d/next/Library/2024-10-21-12-06-55.gh-issue-124969.xiY8UP.rst b/Misc/NEWS.d/next/Library/2024-10-21-12-06-55.gh-issue-124969.xiY8UP.rst new file mode 100644 index 00000000000000..c44550184e0000 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2024-10-21-12-06-55.gh-issue-124969.xiY8UP.rst @@ -0,0 +1,2 @@ +``locale.nl_langinfo(locale.ALT_DIGITS)`` now returns a string again. The +returned value consists of up to 100 semicolon-separated symbols. diff --git a/Modules/_localemodule.c b/Modules/_localemodule.c index 0daec646605775..2a789ea74d27da 100644 --- a/Modules/_localemodule.c +++ b/Modules/_localemodule.c @@ -667,28 +667,36 @@ _locale_nl_langinfo_impl(PyObject *module, int item) return NULL; } PyObject *pyresult; +#ifdef __GLIBC__ #ifdef ALT_DIGITS - if (item == ALT_DIGITS) { - /* The result is a sequence of up to 100 NUL-separated strings. */ - const char *s = result; + if (item == ALT_DIGITS && *result) { + /* According to the POSIX specification the result must be + * a sequence of up to 100 semicolon-separated strings. + * But in Glibc they are NUL-separated. */ + Py_ssize_t i = 0; int count = 0; - for (; count < 100 && *s; count++) { - s += strlen(s) + 1; + for (; count < 100 && result[i]; count++) { + i += strlen(result + i) + 1; } - pyresult = PyTuple_New(count); - if (pyresult != NULL) { - for (int i = 0; i < count; i++) { - PyObject *unicode = PyUnicode_DecodeLocale(result, NULL); - if (unicode == NULL) { - Py_CLEAR(pyresult); - break; - } - PyTuple_SET_ITEM(pyresult, i, unicode); - result += strlen(result) + 1; + char *buf = PyMem_Malloc(i); + if (buf == NULL) { + PyErr_NoMemory(); + pyresult = NULL; + } + else { + memcpy(buf, result, i); + /* Replace all NULs with semicolons. */ + i = 0; + while (--count) { + i += strlen(buf + i); + buf[i++] = ';'; } + pyresult = PyUnicode_DecodeLocale(buf, NULL); + PyMem_Free(buf); } } else +#endif #endif { pyresult = PyUnicode_DecodeLocale(result, NULL); From 9dde4638e44639d45bd7d72e70a8d410995a585a Mon Sep 17 00:00:00 2001 From: Serhiy Storchaka Date: Mon, 21 Oct 2024 22:17:39 +0300 Subject: [PATCH 16/36] gh-53203: Fix test_strptime on Solaris (GH-125785) Use fixed timezone. Skip roundtrip tests on locales with 2-digit year. --- Lib/test/test_strptime.py | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/Lib/test/test_strptime.py b/Lib/test/test_strptime.py index 09f6f656bfcb0d..9f5cfca9c7f124 100644 --- a/Lib/test/test_strptime.py +++ b/Lib/test/test_strptime.py @@ -515,12 +515,17 @@ def test_date_time_locale(self): # NB: Dates before 1969 do not roundtrip on some locales: # az_IR, bo_CN, bo_IN, dz_BT, eu_ES, eu_FR, fa_IR, or_IN. + @support.run_with_tz('STD-1DST,M4.1.0,M10.1.0') @run_with_locales('LC_TIME', 'C', 'en_US', 'fr_FR', 'de_DE', 'ja_JP', 'he_IL', 'ar_AE', 'mfe_MU', 'yo_NG', 'csb_PL', 'br_FR', 'gez_ET', 'brx_IN', 'my_MM', 'shn_MM') def test_date_time_locale2(self): # Test %c directive + loc = locale.getlocale(locale.LC_TIME)[0] + if sys.platform.startswith('sunos'): + if loc in ('ar_AE',): + self.skipTest(f'locale {loc!r} may not work on this platform') self.roundtrip('%c', slice(0, 6), (1900, 1, 1, 0, 0, 0, 0, 1, 0)) self.roundtrip('%c', slice(0, 6), (1800, 1, 1, 0, 0, 0, 0, 1, 0)) @@ -553,6 +558,10 @@ def test_date_locale(self): 'eu_ES', 'ar_AE', 'my_MM', 'shn_MM') def test_date_locale2(self): # Test %x directive + loc = locale.getlocale(locale.LC_TIME)[0] + if sys.platform.startswith('sunos'): + if loc in ('en_US', 'de_DE', 'ar_AE'): + self.skipTest(f'locale {loc!r} may not work on this platform') self.roundtrip('%x', slice(0, 3), (1900, 1, 1, 0, 0, 0, 0, 1, 0)) self.roundtrip('%x', slice(0, 3), (1800, 1, 1, 0, 0, 0, 0, 1, 0)) From 44f841f01af0fb038e142a07f15eda1ecdd5b08a Mon Sep 17 00:00:00 2001 From: Eric Snow Date: Mon, 21 Oct 2024 13:39:07 -0600 Subject: [PATCH 17/36] gh-125716: Raise an Exception If _globals_init() Fails In the _interpqueues Module (gh-125802) The fix applies to the _interpchannels module as well. I've also included a drive-by typo fix for _interpqueues. --- Modules/_interpchannelsmodule.c | 3 ++- Modules/_interpqueuesmodule.c | 5 +++-- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/Modules/_interpchannelsmodule.c b/Modules/_interpchannelsmodule.c index a8b4a8d76b0eaa..c52cde6da500f7 100644 --- a/Modules/_interpchannelsmodule.c +++ b/Modules/_interpchannelsmodule.c @@ -3482,7 +3482,8 @@ The 'interpreters' module provides a more convenient interface."); static int module_exec(PyObject *mod) { - if (_globals_init() != 0) { + int err = _globals_init(); + if (handle_channel_error(err, mod, -1)) { return -1; } diff --git a/Modules/_interpqueuesmodule.c b/Modules/_interpqueuesmodule.c index 55c43199ee4d79..aa70134d82b046 100644 --- a/Modules/_interpqueuesmodule.c +++ b/Modules/_interpqueuesmodule.c @@ -1312,7 +1312,7 @@ _queueid_xid_new(int64_t qid) struct _queueid_xid *data = PyMem_RawMalloc(sizeof(struct _queueid_xid)); if (data == NULL) { - _queues_incref(queues, qid); + _queues_decref(queues, qid); return NULL; } data->qid = qid; @@ -1894,7 +1894,8 @@ The 'interpreters' module provides a more convenient interface."); static int module_exec(PyObject *mod) { - if (_globals_init() != 0) { + int err = _globals_init(); + if (handle_queue_error(err, mod, -1)) { return -1; } From d48cc82ed25e26b02eb97c6263d95dcaa1e9111b Mon Sep 17 00:00:00 2001 From: Y5 <124019959+y5c4l3@users.noreply.github.com> Date: Tue, 22 Oct 2024 04:48:04 +0800 Subject: [PATCH 18/36] gh-124651: Quote template strings in `venv` activation scripts (GH-124712) This patch properly quotes template strings in `venv` activation scripts. This mitigates potential command injection. --- Lib/test/test_venv.py | 81 +++++++++++++++++++ Lib/venv/__init__.py | 42 ++++++++-- Lib/venv/scripts/common/activate | 10 +-- Lib/venv/scripts/common/activate.fish | 8 +- Lib/venv/scripts/nt/activate.bat | 6 +- Lib/venv/scripts/posix/activate.csh | 8 +- ...-09-28-02-03-04.gh-issue-124651.bLBGtH.rst | 1 + 7 files changed, 135 insertions(+), 21 deletions(-) create mode 100644 Misc/NEWS.d/next/Library/2024-09-28-02-03-04.gh-issue-124651.bLBGtH.rst diff --git a/Lib/test/test_venv.py b/Lib/test/test_venv.py index 1ef08da326c18c..6b2127bd31e40a 100644 --- a/Lib/test/test_venv.py +++ b/Lib/test/test_venv.py @@ -17,6 +17,7 @@ import sys import sysconfig import tempfile +import shlex from test.support import (captured_stdout, captured_stderr, skip_if_broken_multiprocessing_synchronize, verbose, requires_subprocess, is_android, is_apple_mobile, @@ -110,6 +111,10 @@ def get_text_file_contents(self, *args, encoding='utf-8'): result = f.read() return result + def assertEndsWith(self, string, tail): + if not string.endswith(tail): + self.fail(f"String {string!r} does not end with {tail!r}") + class BasicTest(BaseTest): """Test venv module functionality.""" @@ -488,6 +493,82 @@ def test_executable_symlinks(self): 'import sys; print(sys.executable)']) self.assertEqual(out.strip(), envpy.encode()) + # gh-124651: test quoted strings + @unittest.skipIf(os.name == 'nt', 'contains invalid characters on Windows') + def test_special_chars_bash(self): + """ + Test that the template strings are quoted properly (bash) + """ + rmtree(self.env_dir) + bash = shutil.which('bash') + if bash is None: + self.skipTest('bash required for this test') + env_name = '"\';&&$e|\'"' + env_dir = os.path.join(os.path.realpath(self.env_dir), env_name) + builder = venv.EnvBuilder(clear=True) + builder.create(env_dir) + activate = os.path.join(env_dir, self.bindir, 'activate') + test_script = os.path.join(self.env_dir, 'test_special_chars.sh') + with open(test_script, "w") as f: + f.write(f'source {shlex.quote(activate)}\n' + 'python -c \'import sys; print(sys.executable)\'\n' + 'python -c \'import os; print(os.environ["VIRTUAL_ENV"])\'\n' + 'deactivate\n') + out, err = check_output([bash, test_script]) + lines = out.splitlines() + self.assertTrue(env_name.encode() in lines[0]) + self.assertEndsWith(lines[1], env_name.encode()) + + # gh-124651: test quoted strings + @unittest.skipIf(os.name == 'nt', 'contains invalid characters on Windows') + def test_special_chars_csh(self): + """ + Test that the template strings are quoted properly (csh) + """ + rmtree(self.env_dir) + csh = shutil.which('tcsh') or shutil.which('csh') + if csh is None: + self.skipTest('csh required for this test') + env_name = '"\';&&$e|\'"' + env_dir = os.path.join(os.path.realpath(self.env_dir), env_name) + builder = venv.EnvBuilder(clear=True) + builder.create(env_dir) + activate = os.path.join(env_dir, self.bindir, 'activate.csh') + test_script = os.path.join(self.env_dir, 'test_special_chars.csh') + with open(test_script, "w") as f: + f.write(f'source {shlex.quote(activate)}\n' + 'python -c \'import sys; print(sys.executable)\'\n' + 'python -c \'import os; print(os.environ["VIRTUAL_ENV"])\'\n' + 'deactivate\n') + out, err = check_output([csh, test_script]) + lines = out.splitlines() + self.assertTrue(env_name.encode() in lines[0]) + self.assertEndsWith(lines[1], env_name.encode()) + + # gh-124651: test quoted strings on Windows + @unittest.skipUnless(os.name == 'nt', 'only relevant on Windows') + def test_special_chars_windows(self): + """ + Test that the template strings are quoted properly on Windows + """ + rmtree(self.env_dir) + env_name = "'&&^$e" + env_dir = os.path.join(os.path.realpath(self.env_dir), env_name) + builder = venv.EnvBuilder(clear=True) + builder.create(env_dir) + activate = os.path.join(env_dir, self.bindir, 'activate.bat') + test_batch = os.path.join(self.env_dir, 'test_special_chars.bat') + with open(test_batch, "w") as f: + f.write('@echo off\n' + f'"{activate}" & ' + f'{self.exe} -c "import sys; print(sys.executable)" & ' + f'{self.exe} -c "import os; print(os.environ[\'VIRTUAL_ENV\'])" & ' + 'deactivate') + out, err = check_output([test_batch]) + lines = out.splitlines() + self.assertTrue(env_name.encode() in lines[0]) + self.assertEndsWith(lines[1], env_name.encode()) + @unittest.skipUnless(os.name == 'nt', 'only relevant on Windows') def test_unicode_in_batch_file(self): """ diff --git a/Lib/venv/__init__.py b/Lib/venv/__init__.py index a5d348ba4cf121..ca1af84e6705fe 100644 --- a/Lib/venv/__init__.py +++ b/Lib/venv/__init__.py @@ -11,6 +11,7 @@ import sys import sysconfig import types +import shlex CORE_VENV_DEPS = ('pip',) @@ -484,11 +485,41 @@ def replace_variables(self, text, context): :param context: The information for the environment creation request being processed. """ - text = text.replace('__VENV_DIR__', context.env_dir) - text = text.replace('__VENV_NAME__', context.env_name) - text = text.replace('__VENV_PROMPT__', context.prompt) - text = text.replace('__VENV_BIN_NAME__', context.bin_name) - text = text.replace('__VENV_PYTHON__', context.env_exe) + replacements = { + '__VENV_DIR__': context.env_dir, + '__VENV_NAME__': context.env_name, + '__VENV_PROMPT__': context.prompt, + '__VENV_BIN_NAME__': context.bin_name, + '__VENV_PYTHON__': context.env_exe, + } + + def quote_ps1(s): + """ + This should satisfy PowerShell quoting rules [1], unless the quoted + string is passed directly to Windows native commands [2]. + [1]: https://learn.microsoft.com/en-us/powershell/module/microsoft.powershell.core/about/about_quoting_rules + [2]: https://learn.microsoft.com/en-us/powershell/module/microsoft.powershell.core/about/about_parsing#passing-arguments-that-contain-quote-characters + """ + s = s.replace("'", "''") + return f"'{s}'" + + def quote_bat(s): + return s + + # gh-124651: need to quote the template strings properly + quote = shlex.quote + script_path = context.script_path + if script_path.endswith('.ps1'): + quote = quote_ps1 + elif script_path.endswith('.bat'): + quote = quote_bat + else: + # fallbacks to POSIX shell compliant quote + quote = shlex.quote + + replacements = {key: quote(s) for key, s in replacements.items()} + for key, quoted in replacements.items(): + text = text.replace(key, quoted) return text def install_scripts(self, context, path): @@ -538,6 +569,7 @@ def skip_file(f): with open(srcfile, 'rb') as f: data = f.read() try: + context.script_path = srcfile new_data = ( self.replace_variables(data.decode('utf-8'), context) .encode('utf-8') diff --git a/Lib/venv/scripts/common/activate b/Lib/venv/scripts/common/activate index 44f137672e9d2e..70673a265d41f8 100644 --- a/Lib/venv/scripts/common/activate +++ b/Lib/venv/scripts/common/activate @@ -41,20 +41,20 @@ case "$(uname)" in CYGWIN*|MSYS*|MINGW*) # transform D:\path\to\venv to /d/path/to/venv on MSYS and MINGW # and to /cygdrive/d/path/to/venv on Cygwin - VIRTUAL_ENV=$(cygpath "__VENV_DIR__") + VIRTUAL_ENV=$(cygpath __VENV_DIR__) export VIRTUAL_ENV ;; *) # use the path as-is - export VIRTUAL_ENV="__VENV_DIR__" + export VIRTUAL_ENV=__VENV_DIR__ ;; esac _OLD_VIRTUAL_PATH="$PATH" -PATH="$VIRTUAL_ENV/__VENV_BIN_NAME__:$PATH" +PATH="$VIRTUAL_ENV/"__VENV_BIN_NAME__":$PATH" export PATH -VIRTUAL_ENV_PROMPT="__VENV_PROMPT__" +VIRTUAL_ENV_PROMPT=__VENV_PROMPT__ export VIRTUAL_ENV_PROMPT # unset PYTHONHOME if set @@ -67,7 +67,7 @@ fi if [ -z "${VIRTUAL_ENV_DISABLE_PROMPT:-}" ] ; then _OLD_VIRTUAL_PS1="${PS1:-}" - PS1="(__VENV_PROMPT__) ${PS1:-}" + PS1="("__VENV_PROMPT__") ${PS1:-}" export PS1 fi diff --git a/Lib/venv/scripts/common/activate.fish b/Lib/venv/scripts/common/activate.fish index 25c42756789bbc..284a7469c99b57 100644 --- a/Lib/venv/scripts/common/activate.fish +++ b/Lib/venv/scripts/common/activate.fish @@ -33,11 +33,11 @@ end # Unset irrelevant variables. deactivate nondestructive -set -gx VIRTUAL_ENV "__VENV_DIR__" +set -gx VIRTUAL_ENV __VENV_DIR__ set -gx _OLD_VIRTUAL_PATH $PATH -set -gx PATH "$VIRTUAL_ENV/__VENV_BIN_NAME__" $PATH -set -gx VIRTUAL_ENV_PROMPT "__VENV_PROMPT__" +set -gx PATH "$VIRTUAL_ENV/"__VENV_BIN_NAME__ $PATH +set -gx VIRTUAL_ENV_PROMPT __VENV_PROMPT__ # Unset PYTHONHOME if set. if set -q PYTHONHOME @@ -57,7 +57,7 @@ if test -z "$VIRTUAL_ENV_DISABLE_PROMPT" set -l old_status $status # Output the venv prompt; color taken from the blue of the Python logo. - printf "%s(%s)%s " (set_color 4B8BBE) "__VENV_PROMPT__" (set_color normal) + printf "%s(%s)%s " (set_color 4B8BBE) __VENV_PROMPT__ (set_color normal) # Restore the return status of the previous command. echo "exit $old_status" | . diff --git a/Lib/venv/scripts/nt/activate.bat b/Lib/venv/scripts/nt/activate.bat index dd5ea8eb67b90a..35533e4b551155 100644 --- a/Lib/venv/scripts/nt/activate.bat +++ b/Lib/venv/scripts/nt/activate.bat @@ -8,7 +8,7 @@ if defined _OLD_CODEPAGE ( "%SystemRoot%\System32\chcp.com" 65001 > nul ) -set VIRTUAL_ENV=__VENV_DIR__ +set "VIRTUAL_ENV=__VENV_DIR__" if not defined PROMPT set PROMPT=$P$G @@ -24,8 +24,8 @@ set PYTHONHOME= if defined _OLD_VIRTUAL_PATH set PATH=%_OLD_VIRTUAL_PATH% if not defined _OLD_VIRTUAL_PATH set _OLD_VIRTUAL_PATH=%PATH% -set PATH=%VIRTUAL_ENV%\__VENV_BIN_NAME__;%PATH% -set VIRTUAL_ENV_PROMPT=__VENV_PROMPT__ +set "PATH=%VIRTUAL_ENV%\__VENV_BIN_NAME__;%PATH%" +set "VIRTUAL_ENV_PROMPT=__VENV_PROMPT__" :END if defined _OLD_CODEPAGE ( diff --git a/Lib/venv/scripts/posix/activate.csh b/Lib/venv/scripts/posix/activate.csh index b5db4a0f847e06..2a3fa835476ab9 100644 --- a/Lib/venv/scripts/posix/activate.csh +++ b/Lib/venv/scripts/posix/activate.csh @@ -9,17 +9,17 @@ alias deactivate 'test $?_OLD_VIRTUAL_PATH != 0 && setenv PATH "$_OLD_VIRTUAL_PA # Unset irrelevant variables. deactivate nondestructive -setenv VIRTUAL_ENV "__VENV_DIR__" +setenv VIRTUAL_ENV __VENV_DIR__ set _OLD_VIRTUAL_PATH="$PATH" -setenv PATH "$VIRTUAL_ENV/__VENV_BIN_NAME__:$PATH" -setenv VIRTUAL_ENV_PROMPT "__VENV_PROMPT__" +setenv PATH "$VIRTUAL_ENV/"__VENV_BIN_NAME__":$PATH" +setenv VIRTUAL_ENV_PROMPT __VENV_PROMPT__ set _OLD_VIRTUAL_PROMPT="$prompt" if (! "$?VIRTUAL_ENV_DISABLE_PROMPT") then - set prompt = "(__VENV_PROMPT__) $prompt:q" + set prompt = "("__VENV_PROMPT__") $prompt:q" endif alias pydoc python -m pydoc diff --git a/Misc/NEWS.d/next/Library/2024-09-28-02-03-04.gh-issue-124651.bLBGtH.rst b/Misc/NEWS.d/next/Library/2024-09-28-02-03-04.gh-issue-124651.bLBGtH.rst new file mode 100644 index 00000000000000..17fc9171390dd9 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2024-09-28-02-03-04.gh-issue-124651.bLBGtH.rst @@ -0,0 +1 @@ +Properly quote template strings in :mod:`venv` activation scripts. From 4848b0b92ce2737cea08fa3b322fd0f0a671bb07 Mon Sep 17 00:00:00 2001 From: Eric Snow Date: Mon, 21 Oct 2024 15:49:58 -0600 Subject: [PATCH 19/36] gh-125716: Use A Global Mutex When Initializing Global State For The _interpqueues Module (gh-125803) This includes a drive-by cleanup in _queues_init() and _queues_fini(). This change also applies to the _interpchannels module. --- Modules/_interpchannelsmodule.c | 64 +++++++++++++++++------------ Modules/_interpqueuesmodule.c | 72 ++++++++++++++++++--------------- 2 files changed, 79 insertions(+), 57 deletions(-) diff --git a/Modules/_interpchannelsmodule.c b/Modules/_interpchannelsmodule.c index c52cde6da500f7..8e6b21db76e01c 100644 --- a/Modules/_interpchannelsmodule.c +++ b/Modules/_interpchannelsmodule.c @@ -28,6 +28,7 @@ This module has the following process-global state: _globals (static struct globals): + mutex (PyMutex) module_count (int) channels (struct _channels): numopen (int64_t) @@ -1349,21 +1350,29 @@ typedef struct _channels { static void _channels_init(_channels *channels, PyThread_type_lock mutex) { - channels->mutex = mutex; - channels->head = NULL; - channels->numopen = 0; - channels->next_id = 0; + assert(mutex != NULL); + assert(channels->mutex == NULL); + *channels = (_channels){ + .mutex = mutex, + .head = NULL, + .numopen = 0, + .next_id = 0, + }; } static void -_channels_fini(_channels *channels) +_channels_fini(_channels *channels, PyThread_type_lock *p_mutex) { + PyThread_type_lock mutex = channels->mutex; + assert(mutex != NULL); + + PyThread_acquire_lock(mutex, WAIT_LOCK); assert(channels->numopen == 0); assert(channels->head == NULL); - if (channels->mutex != NULL) { - PyThread_free_lock(channels->mutex); - channels->mutex = NULL; - } + *channels = (_channels){0}; + PyThread_release_lock(mutex); + + *p_mutex = mutex; } static int64_t @@ -2812,6 +2821,7 @@ set_channelend_types(PyObject *mod, PyTypeObject *send, PyTypeObject *recv) the data that we need to share between interpreters, so it cannot hold PyObject values. */ static struct globals { + PyMutex mutex; int module_count; _channels channels; } _globals = {0}; @@ -2819,32 +2829,36 @@ static struct globals { static int _globals_init(void) { - // XXX This isn't thread-safe. + PyMutex_Lock(&_globals.mutex); + assert(_globals.module_count >= 0); _globals.module_count++; - if (_globals.module_count > 1) { - // Already initialized. - return 0; - } - - assert(_globals.channels.mutex == NULL); - PyThread_type_lock mutex = PyThread_allocate_lock(); - if (mutex == NULL) { - return ERR_CHANNELS_MUTEX_INIT; + if (_globals.module_count == 1) { + // Called for the first time. + PyThread_type_lock mutex = PyThread_allocate_lock(); + if (mutex == NULL) { + _globals.module_count--; + PyMutex_Unlock(&_globals.mutex); + return ERR_CHANNELS_MUTEX_INIT; + } + _channels_init(&_globals.channels, mutex); } - _channels_init(&_globals.channels, mutex); + PyMutex_Unlock(&_globals.mutex); return 0; } static void _globals_fini(void) { - // XXX This isn't thread-safe. + PyMutex_Lock(&_globals.mutex); + assert(_globals.module_count > 0); _globals.module_count--; - if (_globals.module_count > 0) { - return; + if (_globals.module_count == 0) { + PyThread_type_lock mutex; + _channels_fini(&_globals.channels, &mutex); + assert(mutex != NULL); + PyThread_free_lock(mutex); } - - _channels_fini(&_globals.channels); + PyMutex_Unlock(&_globals.mutex); } static _channels * diff --git a/Modules/_interpqueuesmodule.c b/Modules/_interpqueuesmodule.c index aa70134d82b046..297a1763a98ce6 100644 --- a/Modules/_interpqueuesmodule.c +++ b/Modules/_interpqueuesmodule.c @@ -845,28 +845,31 @@ typedef struct _queues { static void _queues_init(_queues *queues, PyThread_type_lock mutex) { - queues->mutex = mutex; - queues->head = NULL; - queues->count = 0; - queues->next_id = 1; + assert(mutex != NULL); + assert(queues->mutex == NULL); + *queues = (_queues){ + .mutex = mutex, + .head = NULL, + .count = 0, + .next_id = 1, + }; } static void -_queues_fini(_queues *queues) +_queues_fini(_queues *queues, PyThread_type_lock *p_mutex) { + PyThread_type_lock mutex = queues->mutex; + assert(mutex != NULL); + + PyThread_acquire_lock(mutex, WAIT_LOCK); if (queues->count > 0) { - PyThread_acquire_lock(queues->mutex, WAIT_LOCK); - assert((queues->count == 0) != (queues->head != NULL)); - _queueref *head = queues->head; - queues->head = NULL; - queues->count = 0; - PyThread_release_lock(queues->mutex); - _queuerefs_clear(head); - } - if (queues->mutex != NULL) { - PyThread_free_lock(queues->mutex); - queues->mutex = NULL; + assert(queues->head != NULL); + _queuerefs_clear(queues->head); } + *queues = (_queues){0}; + PyThread_release_lock(mutex); + + *p_mutex = mutex; } static int64_t @@ -1398,6 +1401,7 @@ _queueobj_shared(PyThreadState *tstate, PyObject *queueobj, the data that we need to share between interpreters, so it cannot hold PyObject values. */ static struct globals { + PyMutex mutex; int module_count; _queues queues; } _globals = {0}; @@ -1405,32 +1409,36 @@ static struct globals { static int _globals_init(void) { - // XXX This isn't thread-safe. + PyMutex_Lock(&_globals.mutex); + assert(_globals.module_count >= 0); _globals.module_count++; - if (_globals.module_count > 1) { - // Already initialized. - return 0; - } - - assert(_globals.queues.mutex == NULL); - PyThread_type_lock mutex = PyThread_allocate_lock(); - if (mutex == NULL) { - return ERR_QUEUES_ALLOC; + if (_globals.module_count == 1) { + // Called for the first time. + PyThread_type_lock mutex = PyThread_allocate_lock(); + if (mutex == NULL) { + _globals.module_count--; + PyMutex_Unlock(&_globals.mutex); + return ERR_QUEUES_ALLOC; + } + _queues_init(&_globals.queues, mutex); } - _queues_init(&_globals.queues, mutex); + PyMutex_Unlock(&_globals.mutex); return 0; } static void _globals_fini(void) { - // XXX This isn't thread-safe. + PyMutex_Lock(&_globals.mutex); + assert(_globals.module_count > 0); _globals.module_count--; - if (_globals.module_count > 0) { - return; + if (_globals.module_count == 0) { + PyThread_type_lock mutex; + _queues_fini(&_globals.queues, &mutex); + assert(mutex != NULL); + PyThread_free_lock(mutex); } - - _queues_fini(&_globals.queues); + PyMutex_Unlock(&_globals.mutex); } static _queues * From d0bfff47fb2aea9272b56ac05984eaacc32379cc Mon Sep 17 00:00:00 2001 From: Irit Katriel <1055913+iritkatriel@users.noreply.github.com> Date: Mon, 21 Oct 2024 23:37:31 +0100 Subject: [PATCH 20/36] gh-119786: [doc] more consistent syntax in InternalDocs (#125815) --- InternalDocs/adaptive.md | 3 +- InternalDocs/compiler.md | 418 +++++++++++++---------------- InternalDocs/exception_handling.md | 28 +- InternalDocs/frames.md | 18 +- InternalDocs/garbage_collector.md | 128 ++++----- InternalDocs/parser.md | 214 +++++++-------- 6 files changed, 380 insertions(+), 429 deletions(-) diff --git a/InternalDocs/adaptive.md b/InternalDocs/adaptive.md index 09245730b271fa..4ae9e85b387f39 100644 --- a/InternalDocs/adaptive.md +++ b/InternalDocs/adaptive.md @@ -31,8 +31,7 @@ although these are not fundamental and may change: ## Example family -The `LOAD_GLOBAL` instruction (in -[Python/bytecodes.c](https://github.com/python/cpython/blob/main/Python/bytecodes.c)) +The `LOAD_GLOBAL` instruction (in [Python/bytecodes.c](../Python/bytecodes.c)) already has an adaptive family that serves as a relatively simple example. The `LOAD_GLOBAL` instruction performs adaptive specialization, diff --git a/InternalDocs/compiler.md b/InternalDocs/compiler.md index e9608977b0cbb3..0da4670c792cb5 100644 --- a/InternalDocs/compiler.md +++ b/InternalDocs/compiler.md @@ -7,17 +7,16 @@ Abstract In CPython, the compilation from source code to bytecode involves several steps: -1. Tokenize the source code - [Parser/lexer/](https://github.com/python/cpython/blob/main/Parser/lexer/) - and [Parser/tokenizer/](https://github.com/python/cpython/blob/main/Parser/tokenizer/). +1. Tokenize the source code [Parser/lexer/](../Parser/lexer/) + and [Parser/tokenizer/](../Parser/tokenizer/). 2. Parse the stream of tokens into an Abstract Syntax Tree - [Parser/parser.c](https://github.com/python/cpython/blob/main/Parser/parser.c). + [Parser/parser.c](../Parser/parser.c). 3. Transform AST into an instruction sequence - [Python/compile.c](https://github.com/python/cpython/blob/main/Python/compile.c). + [Python/compile.c](../Python/compile.c). 4. Construct a Control Flow Graph and apply optimizations to it - [Python/flowgraph.c](https://github.com/python/cpython/blob/main/Python/flowgraph.c). + [Python/flowgraph.c](../Python/flowgraph.c). 5. Emit bytecode based on the Control Flow Graph - [Python/assemble.c](https://github.com/python/cpython/blob/main/Python/assemble.c). + [Python/assemble.c](../Python/assemble.c). This document outlines how these steps of the process work. @@ -36,12 +35,10 @@ of tokens rather than a stream of characters which is more common with PEG parsers. The grammar file for Python can be found in -[Grammar/python.gram](https://github.com/python/cpython/blob/main/Grammar/python.gram). -The definitions for literal tokens (such as ``:``, numbers, etc.) can be found in -[Grammar/Tokens](https://github.com/python/cpython/blob/main/Grammar/Tokens). -Various C files, including -[Parser/parser.c](https://github.com/python/cpython/blob/main/Parser/parser.c) -are generated from these. +[Grammar/python.gram](../Grammar/python.gram). +The definitions for literal tokens (such as `:`, numbers, etc.) can be found in +[Grammar/Tokens](../Grammar/Tokens). Various C files, including +[Parser/parser.c](../Parser/parser.c) are generated from these. See Also: @@ -63,7 +60,7 @@ specification of the AST nodes is specified using the Zephyr Abstract Syntax Definition Language (ASDL) [^1], [^2]. The definition of the AST nodes for Python is found in the file -[Parser/Python.asdl](https://github.com/python/cpython/blob/main/Parser/Python.asdl). +[Parser/Python.asdl](../Parser/Python.asdl). Each AST node (representing statements, expressions, and several specialized types, like list comprehensions and exception handlers) is @@ -87,14 +84,14 @@ approach and syntax: The preceding example describes two different kinds of statements and an expression: function definitions, return statements, and yield expressions. -All three kinds are considered of type ``stmt`` as shown by ``|`` separating +All three kinds are considered of type `stmt` as shown by `|` separating the various kinds. They all take arguments of various kinds and amounts. -Modifiers on the argument type specify the number of values needed; ``?`` -means it is optional, ``*`` means 0 or more, while no modifier means only one -value for the argument and it is required. ``FunctionDef``, for instance, -takes an ``identifier`` for the *name*, ``arguments`` for *args*, zero or more -``stmt`` arguments for *body*, and zero or more ``expr`` arguments for +Modifiers on the argument type specify the number of values needed; `?` +means it is optional, `*` means 0 or more, while no modifier means only one +value for the argument and it is required. `FunctionDef`, for instance, +takes an `identifier` for the *name*, `arguments` for *args*, zero or more +`stmt` arguments for *body*, and zero or more `expr` arguments for *decorators*. Do notice that something like 'arguments', which is a node type, is @@ -132,9 +129,9 @@ The statement definitions above generate the following C structure type: ``` Also generated are a series of constructor functions that allocate (in -this case) a ``stmt_ty`` struct with the appropriate initialization. The -``kind`` field specifies which component of the union is initialized. The -``FunctionDef()`` constructor function sets 'kind' to ``FunctionDef_kind`` and +this case) a `stmt_ty` struct with the appropriate initialization. The +`kind` field specifies which component of the union is initialized. The +`FunctionDef()` constructor function sets 'kind' to `FunctionDef_kind` and initializes the *name*, *args*, *body*, and *attributes* fields. See also @@ -156,13 +153,13 @@ In general, unless you are working on the critical core of the compiler, memory management can be completely ignored. But if you are working at either the very beginning of the compiler or the end, you need to care about how the arena works. All code relating to the arena is in either -[Include/internal/pycore_pyarena.h](https://github.com/python/cpython/blob/main/Include/internal/pycore_pyarena.h) -or [Python/pyarena.c](https://github.com/python/cpython/blob/main/Python/pyarena.c). +[Include/internal/pycore_pyarena.h](../Include/internal/pycore_pyarena.h) +or [Python/pyarena.c](../Python/pyarena.c). -``PyArena_New()`` will create a new arena. The returned ``PyArena`` structure +`PyArena_New()` will create a new arena. The returned `PyArena` structure will store pointers to all memory given to it. This does the bookkeeping of what memory needs to be freed when the compiler is finished with the memory it -used. That freeing is done with ``PyArena_Free()``. This only needs to be +used. That freeing is done with `PyArena_Free()`. This only needs to be called in strategic areas where the compiler exits. As stated above, in general you should not have to worry about memory @@ -173,25 +170,25 @@ The only exception comes about when managing a PyObject. Since the rest of Python uses reference counting, there is extra support added to the arena to cleanup each PyObject that was allocated. These cases are very rare. However, if you've allocated a PyObject, you must tell -the arena about it by calling ``PyArena_AddPyObject()``. +the arena about it by calling `PyArena_AddPyObject()`. Source code to AST ================== The AST is generated from source code using the function -``_PyParser_ASTFromString()`` or ``_PyParser_ASTFromFile()`` -[Parser/peg_api.c](https://github.com/python/cpython/blob/main/Parser/peg_api.c). +`_PyParser_ASTFromString()` or `_PyParser_ASTFromFile()` +[Parser/peg_api.c](../Parser/peg_api.c). After some checks, a helper function in -[Parser/parser.c](https://github.com/python/cpython/blob/main/Parser/parser.c) +[Parser/parser.c](../Parser/parser.c) begins applying production rules on the source code it receives; converting source code to tokens and matching these tokens recursively to their corresponding rule. The production rule's corresponding rule function is called on every match. These rule functions follow the format `xx_rule`. Where *xx* is the grammar rule that the function handles and is automatically derived from -[Grammar/python.gram](https://github.com/python/cpython/blob/main/Grammar/python.gram) by -[Tools/peg_generator/pegen/c_generator.py](https://github.com/python/cpython/blob/main/Tools/peg_generator/pegen/c_generator.py). +[Grammar/python.gram](../Grammar/python.gram) by +[Tools/peg_generator/pegen/c_generator.py](../Tools/peg_generator/pegen/c_generator.py). Each rule function in turn creates an AST node as it goes along. It does this by allocating all the new nodes it needs, calling the proper AST node creation @@ -202,18 +199,15 @@ there are no more rules, an error is set and the parsing ends. The AST node creation helper functions have the name `_PyAST_{xx}` where *xx* is the AST node that the function creates. These are defined by the -ASDL grammar and contained in -[Python/Python-ast.c](https://github.com/python/cpython/blob/main/Python/Python-ast.c) -(which is generated by -[Parser/asdl_c.py](https://github.com/python/cpython/blob/main/Parser/asdl_c.py) -from -[Parser/Python.asdl](https://github.com/python/cpython/blob/main/Parser/Python.asdl)). -This all leads to a sequence of AST nodes stored in ``asdl_seq`` structs. +ASDL grammar and contained in [Python/Python-ast.c](../Python/Python-ast.c) +(which is generated by [Parser/asdl_c.py](../Parser/asdl_c.py) +from [Parser/Python.asdl](../Parser/Python.asdl)). +This all leads to a sequence of AST nodes stored in `asdl_seq` structs. To demonstrate everything explained so far, here's the rule function responsible for a simple named import statement such as -``import sys``. Note that error-checking and debugging code has been -omitted. Removed parts are represented by ``...``. +`import sys`. Note that error-checking and debugging code has been +omitted. Removed parts are represented by `...`. Furthermore, some comments have been added for explanation. These comments may not be present in the actual code. @@ -255,55 +249,52 @@ may not be present in the actual code. To improve backtracking performance, some rules (chosen by applying a -``(memo)`` flag in the grammar file) are memoized. Each rule function checks if +`(memo)` flag in the grammar file) are memoized. Each rule function checks if a memoized version exists and returns that if so, else it continues in the manner stated in the previous paragraphs. -There are macros for creating and using ``asdl_xx_seq *`` types, where *xx* is +There are macros for creating and using `asdl_xx_seq *` types, where *xx* is a type of the ASDL sequence. Three main types are defined -manually -- ``generic``, ``identifier`` and ``int``. These types are found in -[Python/asdl.c](https://github.com/python/cpython/blob/main/Python/asdl.c) -and its corresponding header file -[Include/internal/pycore_asdl.h](https://github.com/python/cpython/blob/main/Include/internal/pycore_asdl.h). -Functions and macros for creating ``asdl_xx_seq *`` types are as follows: - -``_Py_asdl_generic_seq_new(Py_ssize_t, PyArena *)`` - Allocate memory for an ``asdl_generic_seq`` of the specified length -``_Py_asdl_identifier_seq_new(Py_ssize_t, PyArena *)`` - Allocate memory for an ``asdl_identifier_seq`` of the specified length -``_Py_asdl_int_seq_new(Py_ssize_t, PyArena *)`` - Allocate memory for an ``asdl_int_seq`` of the specified length +manually -- `generic`, `identifier` and `int`. These types are found in +[Python/asdl.c](../Python/asdl.c) and its corresponding header file +[Include/internal/pycore_asdl.h](../Include/internal/pycore_asdl.h). +Functions and macros for creating `asdl_xx_seq *` types are as follows: + +`_Py_asdl_generic_seq_new(Py_ssize_t, PyArena *)` + Allocate memory for an `asdl_generic_seq` of the specified length +`_Py_asdl_identifier_seq_new(Py_ssize_t, PyArena *)` + Allocate memory for an `asdl_identifier_seq` of the specified length +`_Py_asdl_int_seq_new(Py_ssize_t, PyArena *)` + Allocate memory for an `asdl_int_seq` of the specified length In addition to the three types mentioned above, some ASDL sequence types are -automatically generated by -[Parser/asdl_c.py](https://github.com/python/cpython/blob/main/Parser/asdl_c.py) -and found in -[Include/internal/pycore_ast.h](https://github.com/python/cpython/blob/main/Include/internal/pycore_ast.h). +automatically generated by [Parser/asdl_c.py](../Parser/asdl_c.py) and found in +[Include/internal/pycore_ast.h](../Include/internal/pycore_ast.h). Macros for using both manually defined and automatically generated ASDL sequence types are as follows: -``asdl_seq_GET(asdl_xx_seq *, int)`` - Get item held at a specific position in an ``asdl_xx_seq`` -``asdl_seq_SET(asdl_xx_seq *, int, stmt_ty)`` - Set a specific index in an ``asdl_xx_seq`` to the specified value +`asdl_seq_GET(asdl_xx_seq *, int)` + Get item held at a specific position in an `asdl_xx_seq` +`asdl_seq_SET(asdl_xx_seq *, int, stmt_ty)` + Set a specific index in an `asdl_xx_seq` to the specified value Untyped counterparts exist for some of the typed macros. These are useful when a function needs to manipulate a generic ASDL sequence: -``asdl_seq_GET_UNTYPED(asdl_seq *, int)`` - Get item held at a specific position in an ``asdl_seq`` -``asdl_seq_SET_UNTYPED(asdl_seq *, int, stmt_ty)`` - Set a specific index in an ``asdl_seq`` to the specified value -``asdl_seq_LEN(asdl_seq *)`` - Return the length of an ``asdl_seq`` or ``asdl_xx_seq`` +`asdl_seq_GET_UNTYPED(asdl_seq *, int)` + Get item held at a specific position in an `asdl_seq` +`asdl_seq_SET_UNTYPED(asdl_seq *, int, stmt_ty)` + Set a specific index in an `asdl_seq` to the specified value +`asdl_seq_LEN(asdl_seq *)` + Return the length of an `asdl_seq` or `asdl_xx_seq` Note that typed macros and functions are recommended over their untyped counterparts. Typed macros carry out checks in debug mode and aid -debugging errors caused by incorrectly casting from ``void *``. +debugging errors caused by incorrectly casting from `void *`. If you are working with statements, you must also worry about keeping track of what line number generated the statement. Currently the line -number is passed as the last parameter to each ``stmt_ty`` function. +number is passed as the last parameter to each `stmt_ty` function. See also [PEP 617: New PEG parser for CPython](https://peps.python.org/pep-0617/). @@ -333,19 +324,19 @@ else: end() ``` -The ``x < 10`` guard is represented by its own basic block that -compares ``x`` with ``10`` and then ends in a conditional jump based on +The `x < 10` guard is represented by its own basic block that +compares `x` with `10` and then ends in a conditional jump based on the result of the comparison. This conditional jump allows the block -to point to both the body of the ``if`` and the body of the ``else``. The -``if`` basic block contains the ``f1()`` and ``f2()`` calls and points to -the ``end()`` basic block. The ``else`` basic block contains the ``g()`` -call and similarly points to the ``end()`` block. +to point to both the body of the `if` and the body of the `else`. The +`if` basic block contains the `f1()` and `f2()` calls and points to +the `end()` basic block. The `else` basic block contains the `g()` +call and similarly points to the `end()` block. -Note that more complex code in the guard, the ``if`` body, or the ``else`` +Note that more complex code in the guard, the `if` body, or the `else` body may be represented by multiple basic blocks. For instance, -short-circuiting boolean logic in a guard like ``if x or y:`` -will produce one basic block that tests the truth value of ``x`` -and then points both (1) to the start of the ``if`` body and (2) to +short-circuiting boolean logic in a guard like `if x or y:` +will produce one basic block that tests the truth value of `x` +and then points both (1) to the start of the `if` body and (2) to a different basic block that tests the truth value of y. CFGs are useful as an intermediate representation of the code because @@ -354,27 +345,24 @@ they are a convenient data structure for optimizations. AST to CFG to bytecode ====================== -The conversion of an ``AST`` to bytecode is initiated by a call to the function -``_PyAST_Compile()`` in -[Python/compile.c](https://github.com/python/cpython/blob/main/Python/compile.c). +The conversion of an `AST` to bytecode is initiated by a call to the function +`_PyAST_Compile()` in [Python/compile.c](../Python/compile.c). The first step is to construct the symbol table. This is implemented by -``_PySymtable_Build()`` in -[Python/symtable.c](https://github.com/python/cpython/blob/main/Python/symtable.c). +`_PySymtable_Build()` in [Python/symtable.c](../Python/symtable.c). This function begins by entering the starting code block for the AST (passed-in) and then calling the proper `symtable_visit_{xx}` function (with *xx* being the AST node type). Next, the AST tree is walked with the various code blocks that delineate the reach of a local variable as blocks are entered and exited using -``symtable_enter_block()`` and ``symtable_exit_block()``, respectively. - -Once the symbol table is created, the ``AST`` is transformed by ``compiler_codegen()`` -in [Python/compile.c](https://github.com/python/cpython/blob/main/Python/compile.c) -into a sequence of pseudo instructions. These are similar to bytecode, but -in some cases they are more abstract, and are resolved later into actual -bytecode. The construction of this instruction sequence is handled by several -functions that break the task down by various AST node types. The functions are -all named `compiler_visit_{xx}` where *xx* is the name of the node type (such -as ``stmt``, ``expr``, etc.). Each function receives a ``struct compiler *`` +`symtable_enter_block()` and `symtable_exit_block()`, respectively. + +Once the symbol table is created, the `AST` is transformed by `compiler_codegen()` +in [Python/compile.c](../Python/compile.c) into a sequence of pseudo instructions. +These are similar to bytecode, but in some cases they are more abstract, and are +resolved later into actual bytecode. The construction of this instruction sequence +is handled by several functions that break the task down by various AST node types. +The functions are all named `compiler_visit_{xx}` where *xx* is the name of the node +type (such as `stmt`, `expr`, etc.). Each function receives a `struct compiler *` and `{xx}_ty` where *xx* is the AST node type. Typically these functions consist of a large 'switch' statement, branching based on the kind of node type passed to it. Simple things are handled inline in the @@ -382,242 +370,224 @@ node type passed to it. Simple things are handled inline in the functions named `compiler_{xx}` with *xx* being a descriptive name of what is being handled. -When transforming an arbitrary AST node, use the ``VISIT()`` macro. +When transforming an arbitrary AST node, use the `VISIT()` macro. The appropriate `compiler_visit_{xx}` function is called, based on the value passed in for (so `VISIT({c}, expr, {node})` calls -`compiler_visit_expr({c}, {node})`). The ``VISIT_SEQ()`` macro is very similar, +`compiler_visit_expr({c}, {node})`). The `VISIT_SEQ()` macro is very similar, but is called on AST node sequences (those values that were created as arguments to a node that used the '*' modifier). Emission of bytecode is handled by the following macros: -* ``ADDOP(struct compiler *, location, int)`` +* `ADDOP(struct compiler *, location, int)` add a specified opcode -* ``ADDOP_IN_SCOPE(struct compiler *, location, int)`` - like ``ADDOP``, but also exits current scope; used for adding return value +* `ADDOP_IN_SCOPE(struct compiler *, location, int)` + like `ADDOP`, but also exits current scope; used for adding return value opcodes in lambdas and closures -* ``ADDOP_I(struct compiler *, location, int, Py_ssize_t)`` +* `ADDOP_I(struct compiler *, location, int, Py_ssize_t)` add an opcode that takes an integer argument -* ``ADDOP_O(struct compiler *, location, int, PyObject *, TYPE)`` +* `ADDOP_O(struct compiler *, location, int, PyObject *, TYPE)` add an opcode with the proper argument based on the position of the specified PyObject in PyObject sequence object, but with no handling of mangled names; used for when you need to do named lookups of objects such as globals, consts, or parameters where name mangling is not possible and the scope of the name is known; *TYPE* is the name of PyObject sequence - (``names`` or ``varnames``) -* ``ADDOP_N(struct compiler *, location, int, PyObject *, TYPE)`` - just like ``ADDOP_O``, but steals a reference to PyObject -* ``ADDOP_NAME(struct compiler *, location, int, PyObject *, TYPE)`` - just like ``ADDOP_O``, but name mangling is also handled; used for + (`names` or `varnames`) +* `ADDOP_N(struct compiler *, location, int, PyObject *, TYPE)` + just like `ADDOP_O`, but steals a reference to PyObject +* `ADDOP_NAME(struct compiler *, location, int, PyObject *, TYPE)` + just like `ADDOP_O`, but name mangling is also handled; used for attribute loading or importing based on name -* ``ADDOP_LOAD_CONST(struct compiler *, location, PyObject *)`` - add the ``LOAD_CONST`` opcode with the proper argument based on the +* `ADDOP_LOAD_CONST(struct compiler *, location, PyObject *)` + add the `LOAD_CONST` opcode with the proper argument based on the position of the specified PyObject in the consts table. -* ``ADDOP_LOAD_CONST_NEW(struct compiler *, location, PyObject *)`` - just like ``ADDOP_LOAD_CONST_NEW``, but steals a reference to PyObject -* ``ADDOP_JUMP(struct compiler *, location, int, basicblock *)`` +* `ADDOP_LOAD_CONST_NEW(struct compiler *, location, PyObject *)` + just like `ADDOP_LOAD_CONST_NEW`, but steals a reference to PyObject +* `ADDOP_JUMP(struct compiler *, location, int, basicblock *)` create a jump to a basic block -The ``location`` argument is a struct with the source location to be +The `location` argument is a struct with the source location to be associated with this instruction. It is typically extracted from an -``AST`` node with the ``LOC`` macro. The ``NO_LOCATION`` can be used +`AST` node with the `LOC` macro. The `NO_LOCATION` can be used for *synthetic* instructions, which we do not associate with a line -number at this stage. For example, the implicit ``return None`` +number at this stage. For example, the implicit `return None` which is added at the end of a function is not associated with any line in the source code. There are several helper functions that will emit pseudo-instructions and are named `compiler_{xx}()` where *xx* is what the function helps -with (``list``, ``boolop``, etc.). A rather useful one is ``compiler_nameop()``. +with (`list`, `boolop`, etc.). A rather useful one is `compiler_nameop()`. This function looks up the scope of a variable and, based on the expression context, emits the proper opcode to load, store, or delete the variable. Once the instruction sequence is created, it is transformed into a CFG -by ``_PyCfg_FromInstructionSequence()``. Then ``_PyCfg_OptimizeCodeUnit()`` +by `_PyCfg_FromInstructionSequence()`. Then `_PyCfg_OptimizeCodeUnit()` applies various peephole optimizations, and -``_PyCfg_OptimizedCfgToInstructionSequence()`` converts the optimized ``CFG`` +`_PyCfg_OptimizedCfgToInstructionSequence()` converts the optimized `CFG` back into an instruction sequence. These conversions and optimizations are -implemented in -[Python/flowgraph.c](https://github.com/python/cpython/blob/main/Python/flowgraph.c). +implemented in [Python/flowgraph.c](../Python/flowgraph.c). Finally, the sequence of pseudo-instructions is converted into actual bytecode. This includes transforming pseudo instructions into actual instructions, converting jump targets from logical labels to relative offsets, and -construction of the -[exception table](exception_handling.md) and -[locations table](https://github.com/python/cpython/blob/main/InternalDocs/locations.md). -The bytecode and tables are then wrapped into a ``PyCodeObject`` along with additional -metadata, including the ``consts`` and ``names`` arrays, information about function +construction of the [exception table](exception_handling.md) and +[locations table](locations.md). +The bytecode and tables are then wrapped into a `PyCodeObject` along with additional +metadata, including the `consts` and `names` arrays, information about function reference to the source code (filename, etc). All of this is implemented by -``_PyAssemble_MakeCodeObject()`` in -[Python/assemble.c](https://github.com/python/cpython/blob/main/Python/assemble.c). +`_PyAssemble_MakeCodeObject()` in [Python/assemble.c](../Python/assemble.c). Code objects ============ -The result of ``PyAST_CompileObject()`` is a ``PyCodeObject`` which is defined in -[Include/cpython/code.h](https://github.com/python/cpython/blob/main/Include/cpython/code.h). +The result of `PyAST_CompileObject()` is a `PyCodeObject` which is defined in +[Include/cpython/code.h](../Include/cpython/code.h). And with that you now have executable Python bytecode! -The code objects (byte code) are executed in -[Python/ceval.c](https://github.com/python/cpython/blob/main/Python/ceval.c). +The code objects (byte code) are executed in [Python/ceval.c](../Python/ceval.c). This file will also need a new case statement for the new opcode in the big switch -statement in ``_PyEval_EvalFrameDefault()``. +statement in `_PyEval_EvalFrameDefault()`. Important files =============== -* [Parser/](https://github.com/python/cpython/blob/main/Parser/) +* [Parser/](../Parser/) - * [Parser/Python.asdl](https://github.com/python/cpython/blob/main/Parser/Python.asdl): + * [Parser/Python.asdl](../Parser/Python.asdl): ASDL syntax file. - * [Parser/asdl.py](https://github.com/python/cpython/blob/main/Parser/asdl.py): + * [Parser/asdl.py](../Parser/asdl.py): Parser for ASDL definition files. Reads in an ASDL description and parses it into an AST that describes it. - * [Parser/asdl_c.py](https://github.com/python/cpython/blob/main/Parser/asdl_c.py): + * [Parser/asdl_c.py](../Parser/asdl_c.py): Generate C code from an ASDL description. Generates - [Python/Python-ast.c](https://github.com/python/cpython/blob/main/Python/Python-ast.c) - and - [Include/internal/pycore_ast.h](https://github.com/python/cpython/blob/main/Include/internal/pycore_ast.h). - - * [Parser/parser.c](https://github.com/python/cpython/blob/main/Parser/parser.c): - The new PEG parser introduced in Python 3.9. - Generated by - [Tools/peg_generator/pegen/c_generator.py](https://github.com/python/cpython/blob/main/Tools/peg_generator/pegen/c_generator.py) - from the grammar [Grammar/python.gram](https://github.com/python/cpython/blob/main/Grammar/python.gram). + [Python/Python-ast.c](../Python/Python-ast.c) and + [Include/internal/pycore_ast.h](../Include/internal/pycore_ast.h). + + * [Parser/parser.c](../Parser/parser.c): + The new PEG parser introduced in Python 3.9. Generated by + [Tools/peg_generator/pegen/c_generator.py](../Tools/peg_generator/pegen/c_generator.py) + from the grammar [Grammar/python.gram](../Grammar/python.gram). Creates the AST from source code. Rule functions for their corresponding production rules are found here. - * [Parser/peg_api.c](https://github.com/python/cpython/blob/main/Parser/peg_api.c): - Contains high-level functions which are - used by the interpreter to create an AST from source code. + * [Parser/peg_api.c](../Parser/peg_api.c): + Contains high-level functions which are used by the interpreter to create + an AST from source code. - * [Parser/pegen.c](https://github.com/python/cpython/blob/main/Parser/pegen.c): + * [Parser/pegen.c](../Parser/pegen.c): Contains helper functions which are used by functions in - [Parser/parser.c](https://github.com/python/cpython/blob/main/Parser/parser.c) - to construct the AST. Also contains helper functions which help raise better error messages - when parsing source code. + [Parser/parser.c](../Parser/parser.c) to construct the AST. Also contains + helper functions which help raise better error messages when parsing source code. - * [Parser/pegen.h](https://github.com/python/cpython/blob/main/Parser/pegen.h): - Header file for the corresponding - [Parser/pegen.c](https://github.com/python/cpython/blob/main/Parser/pegen.c). - Also contains definitions of the ``Parser`` and ``Token`` structs. + * [Parser/pegen.h](../Parser/pegen.h): + Header file for the corresponding [Parser/pegen.c](../Parser/pegen.c). + Also contains definitions of the `Parser` and `Token` structs. -* [Python/](https://github.com/python/cpython/blob/main/Python) +* [Python/](../Python) - * [Python/Python-ast.c](https://github.com/python/cpython/blob/main/Python/Python-ast.c): + * [Python/Python-ast.c](../Python/Python-ast.c): Creates C structs corresponding to the ASDL types. Also contains code for marshalling AST nodes (core ASDL types have marshalling code in - [Python/asdl.c](https://github.com/python/cpython/blob/main/Python/asdl.c)). - File automatically generated by - [Parser/asdl_c.py](https://github.com/python/cpython/blob/main/Parser/asdl_c.py). + [Python/asdl.c](../Python/asdl.c)). + File automatically generated by [Parser/asdl_c.py](../Parser/asdl_c.py). This file must be committed separately after every grammar change - is committed since the ``__version__`` value is set to the latest + is committed since the `__version__` value is set to the latest grammar change revision number. - * [Python/asdl.c](https://github.com/python/cpython/blob/main/Python/asdl.c): + * [Python/asdl.c](../Python/asdl.c): Contains code to handle the ASDL sequence type. Also has code to handle marshalling the core ASDL types, such as number - and identifier. Used by - [Python/Python-ast.c](https://github.com/python/cpython/blob/main/Python/Python-ast.c) + and identifier. Used by [Python/Python-ast.c](../Python/Python-ast.c) for marshalling AST nodes. - * [Python/ast.c](https://github.com/python/cpython/blob/main/Python/ast.c): + * [Python/ast.c](../Python/ast.c): Used for validating the AST. - * [Python/ast_opt.c](https://github.com/python/cpython/blob/main/Python/ast_opt.c): + * [Python/ast_opt.c](../Python/ast_opt.c): Optimizes the AST. - * [Python/ast_unparse.c](https://github.com/python/cpython/blob/main/Python/ast_unparse.c): + * [Python/ast_unparse.c](../Python/ast_unparse.c): Converts the AST expression node back into a string (for string annotations). - * [Python/ceval.c](https://github.com/python/cpython/blob/main/Python/ceval.c): + * [Python/ceval.c](../Python/ceval.c): Executes byte code (aka, eval loop). - * [Python/symtable.c](https://github.com/python/cpython/blob/main/Python/symtable.c): + * [Python/symtable.c](../Python/symtable.c): Generates a symbol table from AST. - * [Python/pyarena.c](https://github.com/python/cpython/blob/main/Python/pyarena.c): + * [Python/pyarena.c](../Python/pyarena.c): Implementation of the arena memory manager. - * [Python/compile.c](https://github.com/python/cpython/blob/main/Python/compile.c): + * [Python/compile.c](../Python/compile.c): Emits pseudo bytecode based on the AST. - * [Python/flowgraph.c](https://github.com/python/cpython/blob/main/Python/flowgraph.c): + * [Python/flowgraph.c](../Python/flowgraph.c): Implements peephole optimizations. - * [Python/assemble.c](https://github.com/python/cpython/blob/main/Python/assemble.c): + * [Python/assemble.c](../Python/assemble.c): Constructs a code object from a sequence of pseudo instructions. - * [Python/instruction_sequence.c](https://github.com/python/cpython/blob/main/Python/instruction_sequence.c): + * [Python/instruction_sequence.c](../Python/instruction_sequence.c): A data structure representing a sequence of bytecode-like pseudo-instructions. -* [Include/](https://github.com/python/cpython/blob/main/Include/) +* [Include/](../Include/) - * [Include/cpython/code.h](https://github.com/python/cpython/blob/main/Include/cpython/code.h) - : Header file for - [Objects/codeobject.c](https://github.com/python/cpython/blob/main/Objects/codeobject.c); - contains definition of ``PyCodeObject``. + * [Include/cpython/code.h](../Include/cpython/code.h) + : Header file for [Objects/codeobject.c](../Objects/codeobject.c); + contains definition of `PyCodeObject`. - * [Include/opcode.h](https://github.com/python/cpython/blob/main/Include/opcode.h) - : One of the files that must be modified if - [Lib/opcode.py](https://github.com/python/cpython/blob/main/Lib/opcode.py) is. + * [Include/opcode.h](../Include/opcode.h) + : One of the files that must be modified whenever + [Lib/opcode.py](../Lib/opcode.py) is. - * [Include/internal/pycore_ast.h](https://github.com/python/cpython/blob/main/Include/internal/pycore_ast.h) + * [Include/internal/pycore_ast.h](../Include/internal/pycore_ast.h) : Contains the actual definitions of the C structs as generated by - [Python/Python-ast.c](https://github.com/python/cpython/blob/main/Python/Python-ast.c) - Automatically generated by - [Parser/asdl_c.py](https://github.com/python/cpython/blob/main/Parser/asdl_c.py). - - * [Include/internal/pycore_asdl.h](https://github.com/python/cpython/blob/main/Include/internal/pycore_asdl.h) - : Header for the corresponding - [Python/ast.c](https://github.com/python/cpython/blob/main/Python/ast.c). - - * [Include/internal/pycore_ast.h](https://github.com/python/cpython/blob/main/Include/internal/pycore_ast.h) - : Declares ``_PyAST_Validate()`` external (from - [Python/ast.c](https://github.com/python/cpython/blob/main/Python/ast.c)). - - * [Include/internal/pycore_symtable.h](https://github.com/python/cpython/blob/main/Include/internal/pycore_symtable.h) - : Header for - [Python/symtable.c](https://github.com/python/cpython/blob/main/Python/symtable.c). - ``struct symtable`` and ``PySTEntryObject`` are defined here. - - * [Include/internal/pycore_parser.h](https://github.com/python/cpython/blob/main/Include/internal/pycore_parser.h) - : Header for the corresponding - [Parser/peg_api.c](https://github.com/python/cpython/blob/main/Parser/peg_api.c). - - * [Include/internal/pycore_pyarena.h](https://github.com/python/cpython/blob/main/Include/internal/pycore_pyarena.h) - : Header file for the corresponding - [Python/pyarena.c](https://github.com/python/cpython/blob/main/Python/pyarena.c). - - * [Include/opcode_ids.h](https://github.com/python/cpython/blob/main/Include/opcode_ids.h) - : List of opcodes. Generated from - [Python/bytecodes.c](https://github.com/python/cpython/blob/main/Python/bytecodes.c) + [Python/Python-ast.c](../Python/Python-ast.c). + Automatically generated by [Parser/asdl_c.py](../Parser/asdl_c.py). + + * [Include/internal/pycore_asdl.h](../Include/internal/pycore_asdl.h) + : Header for the corresponding [Python/ast.c](../Python/ast.c). + + * [Include/internal/pycore_ast.h](../Include/internal/pycore_ast.h) + : Declares `_PyAST_Validate()` external (from [Python/ast.c](../Python/ast.c)). + + * [Include/internal/pycore_symtable.h](../Include/internal/pycore_symtable.h) + : Header for [Python/symtable.c](../Python/symtable.c). + `struct symtable` and `PySTEntryObject` are defined here. + + * [Include/internal/pycore_parser.h](../Include/internal/pycore_parser.h) + : Header for the corresponding [Parser/peg_api.c](../Parser/peg_api.c). + + * [Include/internal/pycore_pyarena.h](../Include/internal/pycore_pyarena.h) + : Header file for the corresponding [Python/pyarena.c](../Python/pyarena.c). + + * [Include/opcode_ids.h](../Include/opcode_ids.h) + : List of opcodes. Generated from [Python/bytecodes.c](../Python/bytecodes.c) by - [Tools/cases_generator/opcode_id_generator.py](https://github.com/python/cpython/blob/main/Tools/cases_generator/opcode_id_generator.py). + [Tools/cases_generator/opcode_id_generator.py](../Tools/cases_generator/opcode_id_generator.py). -* [Objects/](https://github.com/python/cpython/blob/main/Objects/) +* [Objects/](../Objects/) - * [Objects/codeobject.c](https://github.com/python/cpython/blob/main/Objects/codeobject.c) + * [Objects/codeobject.c](../Objects/codeobject.c) : Contains PyCodeObject-related code. - * [Objects/frameobject.c](https://github.com/python/cpython/blob/main/Objects/frameobject.c) - : Contains the ``frame_setlineno()`` function which should determine whether it is allowed + * [Objects/frameobject.c](../Objects/frameobject.c) + : Contains the `frame_setlineno()` function which should determine whether it is allowed to make a jump between two points in a bytecode. -* [Lib/](https://github.com/python/cpython/blob/main/Lib/) +* [Lib/](../Lib/) - * [Lib/opcode.py](https://github.com/python/cpython/blob/main/Lib/opcode.py) + * [Lib/opcode.py](../Lib/opcode.py) : opcode utilities exposed to Python. - * [Include/core/pycore_magic_number.h](https://github.com/python/cpython/blob/main/Include/internal/pycore_magic_number.h) - : Home of the magic number (named ``MAGIC_NUMBER``) for bytecode versioning. + * [Include/core/pycore_magic_number.h](../Include/internal/pycore_magic_number.h) + : Home of the magic number (named `MAGIC_NUMBER`) for bytecode versioning. Objects @@ -625,7 +595,7 @@ Objects * [Locations](locations.md): Describes the location table * [Frames](frames.md): Describes frames and the frame stack -* [Objects/object_layout.md](https://github.com/python/cpython/blob/main/Objects/object_layout.md): Describes object layout for 3.11 and later +* [Objects/object_layout.md](../Objects/object_layout.md): Describes object layout for 3.11 and later * [Exception Handling](exception_handling.md): Describes the exception table diff --git a/InternalDocs/exception_handling.md b/InternalDocs/exception_handling.md index 64a346b55b8413..14066a5864b4da 100644 --- a/InternalDocs/exception_handling.md +++ b/InternalDocs/exception_handling.md @@ -68,18 +68,16 @@ Handling Exceptions ------------------- At runtime, when an exception occurs, the interpreter calls -``get_exception_handler()`` in -[Python/ceval.c](https://github.com/python/cpython/blob/main/Python/ceval.c) +`get_exception_handler()` in [Python/ceval.c](../Python/ceval.c) to look up the offset of the current instruction in the exception table. If it finds a handler, control flow transfers to it. Otherwise, the exception bubbles up to the caller, and the caller's frame is checked for a handler covering the `CALL` instruction. This repeats until a handler is found or the topmost frame is reached. If no handler is found, then the interpreter function -(``_PyEval_EvalFrameDefault()``) returns NULL. During unwinding, +(`_PyEval_EvalFrameDefault()`) returns NULL. During unwinding, the traceback is constructed as each frame is added to it by -``PyTraceBack_Here()``, which is in -[Python/traceback.c](https://github.com/python/cpython/blob/main/Python/traceback.c). +`PyTraceBack_Here()`, which is in [Python/traceback.c](../Python/traceback.c). Along with the location of an exception handler, each entry of the exception table also contains the stack depth of the `try` instruction @@ -174,22 +172,20 @@ which is then encoded as: for a total of five bytes. -The code to construct the exception table is in ``assemble_exception_table()`` -in [Python/assemble.c](https://github.com/python/cpython/blob/main/Python/assemble.c). +The code to construct the exception table is in `assemble_exception_table()` +in [Python/assemble.c](../Python/assemble.c). The interpreter's function to lookup the table by instruction offset is -``get_exception_handler()`` in -[Python/ceval.c](https://github.com/python/cpython/blob/main/Python/ceval.c). -The Python function ``_parse_exception_table()`` in -[Lib/dis.py](https://github.com/python/cpython/blob/main/Lib/dis.py) +`get_exception_handler()` in [Python/ceval.c](../Python/ceval.c). +The Python function `_parse_exception_table()` in [Lib/dis.py](../Lib/dis.py) returns the exception table content as a list of namedtuple instances. Exception Chaining Implementation --------------------------------- [Exception chaining](https://docs.python.org/dev/tutorial/errors.html#exception-chaining) -refers to setting the ``__context__`` and ``__cause__`` fields of an exception as it is -being raised. The ``__context__`` field is set by ``_PyErr_SetObject()`` in -[Python/errors.c](https://github.com/python/cpython/blob/main/Python/errors.c) -(which is ultimately called by all ``PyErr_Set*()`` functions). -The ``__cause__`` field (explicit chaining) is set by the ``RAISE_VARARGS`` bytecode. +refers to setting the `__context__` and `__cause__` fields of an exception as it is +being raised. The `__context__` field is set by `_PyErr_SetObject()` in +[Python/errors.c](../Python/errors.c) (which is ultimately called by all +`PyErr_Set*()` functions). The `__cause__` field (explicit chaining) is set by +the `RAISE_VARARGS` bytecode. diff --git a/InternalDocs/frames.md b/InternalDocs/frames.md index 34682adb1b422e..06dc8f0702c3d9 100644 --- a/InternalDocs/frames.md +++ b/InternalDocs/frames.md @@ -10,20 +10,19 @@ of three conceptual sections: globals dict, code object, instruction pointer, stack depth, the previous frame, etc. -The definition of the ``_PyInterpreterFrame`` struct is in -[Include/internal/pycore_frame.h](https://github.com/python/cpython/blob/main/Include/internal/pycore_frame.h). +The definition of the `_PyInterpreterFrame` struct is in +[Include/internal/pycore_frame.h](../Include/internal/pycore_frame.h). # Allocation Python semantics allows frames to outlive the activation, so they need to be allocated outside the C call stack. To reduce overhead and improve locality of reference, most frames are allocated contiguously in a per-thread stack -(see ``_PyThreadState_PushFrame`` in -[Python/pystate.c](https://github.com/python/cpython/blob/main/Python/pystate.c)). +(see `_PyThreadState_PushFrame` in [Python/pystate.c](../Python/pystate.c)). Frames of generators and coroutines are embedded in the generator and coroutine -objects, so are not allocated in the per-thread stack. See ``PyGenObject`` in -[Include/internal/pycore_genobject.h](https://github.com/python/cpython/blob/main/Include/internal/pycore_genobject.h). +objects, so are not allocated in the per-thread stack. See `PyGenObject` in +[Include/internal/pycore_genobject.h](../Include/internal/pycore_genobject.h). ## Layout @@ -82,16 +81,15 @@ frames for each activation, but with low runtime overhead. ### Generators and Coroutines -Generators (objects of type ``PyGen_Type``, ``PyCoro_Type`` or -``PyAsyncGen_Type``) have a `_PyInterpreterFrame` embedded in them, so +Generators (objects of type `PyGen_Type`, `PyCoro_Type` or +`PyAsyncGen_Type`) have a `_PyInterpreterFrame` embedded in them, so that they can be created with a single memory allocation. When such an embedded frame is iterated or awaited, it can be linked with frames on the per-thread stack via the linkage fields. If a frame object associated with a generator outlives the generator, then the embedded `_PyInterpreterFrame` is copied into the frame object (see -``take_ownership()`` in -[Python/frame.c](https://github.com/python/cpython/blob/main/Python/frame.c)). +`take_ownership()` in [Python/frame.c](../Python/frame.c)). ### Field names diff --git a/InternalDocs/garbage_collector.md b/InternalDocs/garbage_collector.md index fd0246fa1a60e2..a6ee5c09e19efd 100644 --- a/InternalDocs/garbage_collector.md +++ b/InternalDocs/garbage_collector.md @@ -12,7 +12,7 @@ a local variable in some C function. When an object’s reference count becomes the object is deallocated. If it contains references to other objects, their reference counts are decremented. Those other objects may be deallocated in turn, if this decrement makes their reference count become zero, and so on. The reference -count field can be examined using the ``sys.getrefcount()`` function (notice that the +count field can be examined using the `sys.getrefcount()` function (notice that the value returned by this function is always 1 more as the function also has a reference to the object when called): @@ -39,7 +39,7 @@ cycles. For instance, consider this code: >>> del container ``` -In this example, ``container`` holds a reference to itself, so even when we remove +In this example, `container` holds a reference to itself, so even when we remove our reference to it (the variable "container") the reference count never falls to 0 because it still has its own internal reference. Therefore it would never be cleaned just by simple reference counting. For this reason some additional machinery @@ -127,7 +127,7 @@ GC for the free-threaded build ------------------------------ In the free-threaded build, Python objects contain a 1-byte field -``ob_gc_bits`` that is used to track garbage collection related state. The +`ob_gc_bits` that is used to track garbage collection related state. The field exists in all objects, including ones that do not support cyclic garbage collection. The field is used to identify objects that are tracked by the collector, ensure that finalizers are called only once per object, @@ -146,14 +146,14 @@ and, during garbage collection, differentiate reachable vs. unreachable objects. | ... | ``` -Note that not all fields are to scale. ``pad`` is two bytes, ``ob_mutex`` and -``ob_gc_bits`` are each one byte, and ``ob_ref_local`` is four bytes. The -other fields, ``ob_tid``, ``ob_ref_shared``, and ``ob_type``, are all +Note that not all fields are to scale. `pad` is two bytes, `ob_mutex` and +`ob_gc_bits` are each one byte, and `ob_ref_local` is four bytes. The +other fields, `ob_tid`, `ob_ref_shared`, and `ob_type`, are all pointer-sized (that is, eight bytes on a 64-bit platform). -The garbage collector also temporarily repurposes the ``ob_tid`` (thread ID) -and ``ob_ref_local`` (local reference count) fields for other purposes during +The garbage collector also temporarily repurposes the `ob_tid` (thread ID) +and `ob_ref_local` (local reference count) fields for other purposes during collections. @@ -165,17 +165,17 @@ objects with GC support. These APIs can be found in the [Garbage Collector C API documentation](https://docs.python.org/3/c-api/gcsupport.html). Apart from this object structure, the type object for objects supporting garbage -collection must include the ``Py_TPFLAGS_HAVE_GC`` in its ``tp_flags`` slot and -provide an implementation of the ``tp_traverse`` handler. Unless it can be proven +collection must include the `Py_TPFLAGS_HAVE_GC` in its `tp_flags` slot and +provide an implementation of the `tp_traverse` handler. Unless it can be proven that the objects cannot form reference cycles with only objects of its type or unless -the type is immutable, a ``tp_clear`` implementation must also be provided. +the type is immutable, a `tp_clear` implementation must also be provided. Identifying reference cycles ============================ The algorithm that CPython uses to detect those reference cycles is -implemented in the ``gc`` module. The garbage collector **only focuses** +implemented in the `gc` module. The garbage collector **only focuses** on cleaning container objects (that is, objects that can contain a reference to one or more objects). These can be arrays, dictionaries, lists, custom class instances, classes in extension modules, etc. One could think that @@ -195,7 +195,7 @@ the interpreter create cycles everywhere. Some notable examples: To correctly dispose of these objects once they become unreachable, they need to be identified first. To understand how the algorithm works, let’s take the case of a circular linked list which has one link referenced by a -variable ``A``, and one self-referencing object which is completely +variable `A`, and one self-referencing object which is completely unreachable: ```pycon @@ -234,7 +234,7 @@ objects have a refcount larger than the number of incoming references from within the candidate set. Every object that supports garbage collection will have an extra reference -count field initialized to the reference count (``gc_ref`` in the figures) +count field initialized to the reference count (`gc_ref` in the figures) of that object when the algorithm starts. This is because the algorithm needs to modify the reference count to do the computations and in this way the interpreter will not modify the real reference count field. @@ -243,43 +243,43 @@ interpreter will not modify the real reference count field. The GC then iterates over all containers in the first list and decrements by one the `gc_ref` field of any other object that container is referencing. Doing -this makes use of the ``tp_traverse`` slot in the container class (implemented +this makes use of the `tp_traverse` slot in the container class (implemented using the C API or inherited by a superclass) to know what objects are referenced by each container. After all the objects have been scanned, only the objects that have -references from outside the “objects to scan” list will have ``gc_ref > 0``. +references from outside the “objects to scan” list will have `gc_ref > 0`. ![gc-image2](images/python-cyclic-gc-2-new-page.png) -Notice that having ``gc_ref == 0`` does not imply that the object is unreachable. -This is because another object that is reachable from the outside (``gc_ref > 0``) -can still have references to it. For instance, the ``link_2`` object in our example -ended having ``gc_ref == 0`` but is referenced still by the ``link_1`` object that +Notice that having `gc_ref == 0` does not imply that the object is unreachable. +This is because another object that is reachable from the outside (`gc_ref > 0`) +can still have references to it. For instance, the `link_2` object in our example +ended having `gc_ref == 0` but is referenced still by the `link_1` object that is reachable from the outside. To obtain the set of objects that are really unreachable, the garbage collector re-scans the container objects using the -``tp_traverse`` slot; this time with a different traverse function that marks objects with -``gc_ref == 0`` as "tentatively unreachable" and then moves them to the +`tp_traverse` slot; this time with a different traverse function that marks objects with +`gc_ref == 0` as "tentatively unreachable" and then moves them to the tentatively unreachable list. The following image depicts the state of the lists in a -moment when the GC processed the ``link_3`` and ``link_4`` objects but has not -processed ``link_1`` and ``link_2`` yet. +moment when the GC processed the `link_3` and `link_4` objects but has not +processed `link_1` and `link_2` yet. ![gc-image3](images/python-cyclic-gc-3-new-page.png) -Then the GC scans the next ``link_1`` object. Because it has ``gc_ref == 1``, +Then the GC scans the next `link_1` object. Because it has `gc_ref == 1`, the gc does not do anything special because it knows it has to be reachable (and is already in what will become the reachable list): ![gc-image4](images/python-cyclic-gc-4-new-page.png) -When the GC encounters an object which is reachable (``gc_ref > 0``), it traverses -its references using the ``tp_traverse`` slot to find all the objects that are +When the GC encounters an object which is reachable (`gc_ref > 0`), it traverses +its references using the `tp_traverse` slot to find all the objects that are reachable from it, moving them to the end of the list of reachable objects (where -they started originally) and setting its ``gc_ref`` field to 1. This is what happens -to ``link_2`` and ``link_3`` below as they are reachable from ``link_1``. From the -state in the previous image and after examining the objects referred to by ``link_1`` -the GC knows that ``link_3`` is reachable after all, so it is moved back to the -original list and its ``gc_ref`` field is set to 1 so that if the GC visits it again, +they started originally) and setting its `gc_ref` field to 1. This is what happens +to `link_2` and `link_3` below as they are reachable from `link_1`. From the +state in the previous image and after examining the objects referred to by `link_1` +the GC knows that `link_3` is reachable after all, so it is moved back to the +original list and its `gc_ref` field is set to 1 so that if the GC visits it again, it will know that it's reachable. To avoid visiting an object twice, the GC marks all -objects that have already been visited once (by unsetting the ``PREV_MASK_COLLECTING`` +objects that have already been visited once (by unsetting the `PREV_MASK_COLLECTING` flag) so that if an object that has already been processed is referenced by some other object, the GC does not process it twice. @@ -295,7 +295,7 @@ list are really unreachable and can thus be garbage collected. Pragmatically, it's important to note that no recursion is required by any of this, and neither does it in any other way require additional memory proportional to the number of objects, number of pointers, or the lengths of pointer chains. Apart from -``O(1)`` storage for internal C needs, the objects themselves contain all the storage +`O(1)` storage for internal C needs, the objects themselves contain all the storage the GC algorithms require. Why moving unreachable objects is better @@ -331,7 +331,7 @@ with the objective of completely destroying these objects. Roughly, the process follows these steps in order: 1. Handle and clear weak references (if any). Weak references to unreachable objects - are set to ``None``. If the weak reference has an associated callback, the callback + are set to `None`. If the weak reference has an associated callback, the callback is enqueued to be called once the clearing of weak references is finished. We only invoke callbacks for weak references that are themselves reachable. If both the weak reference and the pointed-to object are unreachable we do not execute the callback. @@ -339,15 +339,15 @@ follows these steps in order: object and support for weak references predates support for object resurrection. Ignoring the weak reference's callback is fine because both the object and the weakref are going away, so it's legitimate to say the weak reference is going away first. -2. If an object has legacy finalizers (``tp_del`` slot) move it to the - ``gc.garbage`` list. -3. Call the finalizers (``tp_finalize`` slot) and mark the objects as already +2. If an object has legacy finalizers (`tp_del` slot) move it to the + `gc.garbage` list. +3. Call the finalizers (`tp_finalize` slot) and mark the objects as already finalized to avoid calling finalizers twice if the objects are resurrected or if other finalizers have removed the object first. 4. Deal with resurrected objects. If some objects have been resurrected, the GC finds the new subset of objects that are still unreachable by running the cycle detection algorithm again and continues with them. -5. Call the ``tp_clear`` slot of every object so all internal links are broken and +5. Call the `tp_clear` slot of every object so all internal links are broken and the reference counts fall to 0, triggering the destruction of all unreachable objects. @@ -376,9 +376,9 @@ generations. Every collection operates on the entire heap. In order to decide when to run, the collector keeps track of the number of object allocations and deallocations since the last collection. When the number of -allocations minus the number of deallocations exceeds ``threshold_0``, +allocations minus the number of deallocations exceeds `threshold_0`, collection starts. Initially only generation 0 is examined. If generation 0 has -been examined more than ``threshold_1`` times since generation 1 has been +been examined more than `threshold_1` times since generation 1 has been examined, then generation 1 is examined as well. With generation 2, things are a bit more complicated; see [Collecting the oldest generation](#Collecting-the-oldest-generation) for @@ -393,8 +393,8 @@ function: ``` The content of these generations can be examined using the -``gc.get_objects(generation=NUM)`` function and collections can be triggered -specifically in a generation by calling ``gc.collect(generation=NUM)``. +`gc.get_objects(generation=NUM)` function and collections can be triggered +specifically in a generation by calling `gc.collect(generation=NUM)`. ```pycon >>> import gc @@ -433,7 +433,7 @@ Collecting the oldest generation -------------------------------- In addition to the various configurable thresholds, the GC only triggers a full -collection of the oldest generation if the ratio ``long_lived_pending / long_lived_total`` +collection of the oldest generation if the ratio `long_lived_pending / long_lived_total` is above a given value (hardwired to 25%). The reason is that, while "non-full" collections (that is, collections of the young and middle generations) will always examine roughly the same number of objects (determined by the aforementioned @@ -463,12 +463,12 @@ used for tags or to keep other information – most often as a bit field (each bit a separate tag) – as long as code that uses the pointer masks out these bits before accessing memory. For example, on a 32-bit architecture (for both addresses and word size), a word is 32 bits = 4 bytes, so word-aligned -addresses are always a multiple of 4, hence end in ``00``, leaving the last 2 bits +addresses are always a multiple of 4, hence end in `00`, leaving the last 2 bits available; while on a 64-bit architecture, a word is 64 bits = 8 bytes, so -word-aligned addresses end in ``000``, leaving the last 3 bits available. +word-aligned addresses end in `000`, leaving the last 3 bits available. The CPython GC makes use of two fat pointers that correspond to the extra fields -of ``PyGC_Head`` discussed in the `Memory layout and object structure`_ section: +of `PyGC_Head` discussed in the `Memory layout and object structure`_ section: > [!WARNING] > Because the presence of extra information, "tagged" or "fat" pointers cannot be @@ -478,23 +478,23 @@ of ``PyGC_Head`` discussed in the `Memory layout and object structure`_ section: > normally assume the pointers inside the lists are in a consistent state. -- The ``_gc_prev`` field is normally used as the "previous" pointer to maintain the +- The `_gc_prev` field is normally used as the "previous" pointer to maintain the doubly linked list but its lowest two bits are used to keep the flags - ``PREV_MASK_COLLECTING`` and ``_PyGC_PREV_MASK_FINALIZED``. Between collections, - the only flag that can be present is ``_PyGC_PREV_MASK_FINALIZED`` that indicates - if an object has been already finalized. During collections ``_gc_prev`` is - temporarily used for storing a copy of the reference count (``gc_ref``), in + `PREV_MASK_COLLECTING` and `_PyGC_PREV_MASK_FINALIZED`. Between collections, + the only flag that can be present is `_PyGC_PREV_MASK_FINALIZED` that indicates + if an object has been already finalized. During collections `_gc_prev` is + temporarily used for storing a copy of the reference count (`gc_ref`), in addition to two flags, and the GC linked list becomes a singly linked list until - ``_gc_prev`` is restored. + `_gc_prev` is restored. -- The ``_gc_next`` field is used as the "next" pointer to maintain the doubly linked +- The `_gc_next` field is used as the "next" pointer to maintain the doubly linked list but during collection its lowest bit is used to keep the - ``NEXT_MASK_UNREACHABLE`` flag that indicates if an object is tentatively + `NEXT_MASK_UNREACHABLE` flag that indicates if an object is tentatively unreachable during the cycle detection algorithm. This is a drawback to using only doubly linked lists to implement partitions: while most needed operations are constant-time, there is no efficient way to determine which partition an object is currently in. Instead, when that's needed, ad hoc tricks (like the - ``NEXT_MASK_UNREACHABLE`` flag) are employed. + `NEXT_MASK_UNREACHABLE` flag) are employed. Optimization: delay tracking containers ======================================= @@ -531,7 +531,7 @@ benefit from delayed tracking: full garbage collection (all generations), the collector will untrack any dictionaries whose contents are not tracked. -The garbage collector module provides the Python function ``is_tracked(obj)``, which returns +The garbage collector module provides the Python function `is_tracked(obj)`, which returns the current tracking status of the object. Subsequent garbage collections may change the tracking status of the object. @@ -556,20 +556,20 @@ Differences between GC implementations This section summarizes the differences between the GC implementation in the default build and the implementation in the free-threaded build. -The default build implementation makes extensive use of the ``PyGC_Head`` data +The default build implementation makes extensive use of the `PyGC_Head` data structure, while the free-threaded build implementation does not use that data structure. - The default build implementation stores all tracked objects in a doubly - linked list using ``PyGC_Head``. The free-threaded build implementation + linked list using `PyGC_Head`. The free-threaded build implementation instead relies on the embedded mimalloc memory allocator to scan the heap for tracked objects. -- The default build implementation uses ``PyGC_Head`` for the unreachable +- The default build implementation uses `PyGC_Head` for the unreachable object list. The free-threaded build implementation repurposes the - ``ob_tid`` field to store a unreachable objects linked list. -- The default build implementation stores flags in the ``_gc_prev`` field of - ``PyGC_Head``. The free-threaded build implementation stores these flags - in ``ob_gc_bits``. + `ob_tid` field to store a unreachable objects linked list. +- The default build implementation stores flags in the `_gc_prev` field of + `PyGC_Head`. The free-threaded build implementation stores these flags + in `ob_gc_bits`. The default build implementation relies on the diff --git a/InternalDocs/parser.md b/InternalDocs/parser.md index 11aaf11253646d..6398ba6cd2838f 100644 --- a/InternalDocs/parser.md +++ b/InternalDocs/parser.md @@ -9,12 +9,12 @@ Python's Parser is currently a [`PEG` (Parser Expression Grammar)](https://en.wikipedia.org/wiki/Parsing_expression_grammar) parser. It was introduced in [PEP 617: New PEG parser for CPython](https://peps.python.org/pep-0617/) to replace -the original [``LL(1)``](https://en.wikipedia.org/wiki/LL_parser) parser. +the original [`LL(1)`](https://en.wikipedia.org/wiki/LL_parser) parser. The code implementing the parser is generated from a grammar definition by a [parser generator](https://en.wikipedia.org/wiki/Compiler-compiler). Therefore, changes to the Python language are made by modifying the -[grammar file](https://github.com/python/cpython/blob/main/Grammar/python.gram). +[grammar file](../Grammar/python.gram). Developers rarely need to modify the generator itself. See the devguide's [Changing CPython's grammar](https://devguide.python.org/developer-workflow/grammar/#grammar) @@ -33,9 +33,9 @@ is ordered. This means that when writing: rule: A | B | C ``` -a parser that implements a context-free-grammar (such as an ``LL(1)`` parser) will +a parser that implements a context-free-grammar (such as an `LL(1)` parser) will generate constructions that, given an input string, *deduce* which alternative -(``A``, ``B`` or ``C``) must be expanded. On the other hand, a PEG parser will +(`A`, `B` or `C`) must be expanded. On the other hand, a PEG parser will check each alternative, in the order in which they are specified, and select that first one that succeeds. @@ -67,21 +67,21 @@ time complexity with a technique called which not only loads the entire program in memory before parsing it but also allows the parser to backtrack arbitrarily. This is made efficient by memoizing the rules already matched for each position. The cost of the memoization cache -is that the parser will naturally use more memory than a simple ``LL(1)`` parser, +is that the parser will naturally use more memory than a simple `LL(1)` parser, which normally are table-based. Key ideas --------- -- Alternatives are ordered ( ``A | B`` is not the same as ``B | A`` ). +- Alternatives are ordered ( `A | B` is not the same as `B | A` ). - If a rule returns a failure, it doesn't mean that the parsing has failed, it just means "try something else". - By default PEG parsers run in exponential time, which can be optimized to linear by using memoization. - If parsing fails completely (no rule succeeds in parsing all the input text), the PEG parser doesn't have a concept of "where the - [``SyntaxError``](https://docs.python.org/3/library/exceptions.html#SyntaxError) is". + [`SyntaxError`](https://docs.python.org/3/library/exceptions.html#SyntaxError) is". > [!IMPORTANT] @@ -111,16 +111,16 @@ the following two rules (in these examples, a token is an individual character): second_rule: ('aa' | 'a' ) 'a' ``` -In a regular EBNF grammar, both rules specify the language ``{aa, aaa}`` but -in PEG, one of these two rules accepts the string ``aaa`` but not the string -``aa``. The other does the opposite -- it accepts the string ``aa`` -but not the string ``aaa``. The rule ``('a'|'aa')'a'`` does -not accept ``aaa`` because ``'a'|'aa'`` consumes the first ``a``, letting the -final ``a`` in the rule consume the second, and leaving out the third ``a``. +In a regular EBNF grammar, both rules specify the language `{aa, aaa}` but +in PEG, one of these two rules accepts the string `aaa` but not the string +`aa`. The other does the opposite -- it accepts the string `aa` +but not the string `aaa`. The rule `('a'|'aa')'a'` does +not accept `aaa` because `'a'|'aa'` consumes the first `a`, letting the +final `a` in the rule consume the second, and leaving out the third `a`. As the rule has succeeded, no attempt is ever made to go back and let -``'a'|'aa'`` try the second alternative. The expression ``('aa'|'a')'a'`` does -not accept ``aa`` because ``'aa'|'a'`` accepts all of ``aa``, leaving nothing -for the final ``a``. Again, the second alternative of ``'aa'|'a'`` is not +`'a'|'aa'` try the second alternative. The expression `('aa'|'a')'a'` does +not accept `aa` because `'aa'|'a'` accepts all of `aa`, leaving nothing +for the final `a`. Again, the second alternative of `'aa'|'a'` is not tried. > [!CAUTION] @@ -137,7 +137,7 @@ one is in almost all cases a mistake, for example: ``` In this example, the second alternative will never be tried because the first one will -succeed first (even if the input string has an ``'else' block`` that follows). To correctly +succeed first (even if the input string has an `'else' block` that follows). To correctly write this rule you can simply alter the order: ``` @@ -146,7 +146,7 @@ write this rule you can simply alter the order: | 'if' expression 'then' block ``` -In this case, if the input string doesn't have an ``'else' block``, the first alternative +In this case, if the input string doesn't have an `'else' block`, the first alternative will fail and the second will be attempted. Grammar Syntax @@ -166,8 +166,8 @@ the rule: rule_name[return_type]: expression ``` -If the return type is omitted, then a ``void *`` is returned in C and an -``Any`` in Python. +If the return type is omitted, then a `void *` is returned in C and an +`Any` in Python. Grammar expressions ------------------- @@ -214,7 +214,7 @@ Variables in the grammar ------------------------ A sub-expression can be named by preceding it with an identifier and an -``=`` sign. The name can then be used in the action (see below), like this: +`=` sign. The name can then be used in the action (see below), like this: ``` rule_name[return_type]: '(' a=some_other_rule ')' { a } @@ -387,9 +387,9 @@ returns a valid C-based Python AST: | NUMBER ``` -Here ``EXTRA`` is a macro that expands to ``start_lineno, start_col_offset, -end_lineno, end_col_offset, p->arena``, those being variables automatically -injected by the parser; ``p`` points to an object that holds on to all state +Here `EXTRA` is a macro that expands to `start_lineno, start_col_offset, +end_lineno, end_col_offset, p->arena`, those being variables automatically +injected by the parser; `p` points to an object that holds on to all state for the parser. A similar grammar written to target Python AST objects: @@ -422,50 +422,47 @@ Pegen Pegen is the parser generator used in CPython to produce the final PEG parser used by the interpreter. It is the program that can be used to read the python -grammar located in -[`Grammar/python.gram`](https://github.com/python/cpython/blob/main/Grammar/python.gram) -and produce the final C parser. It contains the following pieces: +grammar located in [`Grammar/python.gram`](../Grammar/python.gram) and produce +the final C parser. It contains the following pieces: - A parser generator that can read a grammar file and produce a PEG parser written in Python or C that can parse said grammar. The generator is located at - [`Tools/peg_generator/pegen`](https://github.com/python/cpython/blob/main/Tools/peg_generator/pegen). + [`Tools/peg_generator/pegen`](../Tools/peg_generator/pegen). - A PEG meta-grammar that automatically generates a Python parser which is used for the parser generator itself (this means that there are no manually-written parsers). The meta-grammar is located at - [`Tools/peg_generator/pegen/metagrammar.gram`](https://github.com/python/cpython/blob/main/Tools/peg_generator/pegen/metagrammar.gram). + [`Tools/peg_generator/pegen/metagrammar.gram`](../Tools/peg_generator/pegen/metagrammar.gram). - A generated parser (using the parser generator) that can directly produce C and Python AST objects. -The source code for Pegen lives at -[`Tools/peg_generator/pegen`](https://github.com/python/cpython/blob/main/Tools/peg_generator/pegen) +The source code for Pegen lives at [`Tools/peg_generator/pegen`](../Tools/peg_generator/pegen) but normally all typical commands to interact with the parser generator are executed from the main makefile. How to regenerate the parser ---------------------------- -Once you have made the changes to the grammar files, to regenerate the ``C`` +Once you have made the changes to the grammar files, to regenerate the `C` parser (the one used by the interpreter) just execute: ``` make regen-pegen ``` -using the ``Makefile`` in the main directory. If you are on Windows you can +using the `Makefile` in the main directory. If you are on Windows you can use the Visual Studio project files to regenerate the parser or to execute: ``` ./PCbuild/build.bat --regen ``` -The generated parser file is located at -[`Parser/parser.c`](https://github.com/python/cpython/blob/main/Parser/parser.c). +The generated parser file is located at [`Parser/parser.c`](../Parser/parser.c). How to regenerate the meta-parser --------------------------------- The meta-grammar (the grammar that describes the grammar for the grammar files themselves) is located at -[`Tools/peg_generator/pegen/metagrammar.gram`](https://github.com/python/cpython/blob/main/Tools/peg_generator/pegen/metagrammar.gram). +[`Tools/peg_generator/pegen/metagrammar.gram`](../Tools/peg_generator/pegen/metagrammar.gram). Although it is very unlikely that you will ever need to modify it, if you make any modifications to this file (in order to implement new Pegen features) you will need to regenerate the meta-parser (the parser that parses the grammar files). @@ -488,11 +485,11 @@ Grammatical elements and rules Pegen has some special grammatical elements and rules: -- Strings with single quotes (') (for example, ``'class'``) denote KEYWORDS. -- Strings with double quotes (") (for example, ``"match"``) denote SOFT KEYWORDS. -- Uppercase names (for example, ``NAME``) denote tokens in the - [`Grammar/Tokens`](https://github.com/python/cpython/blob/main/Grammar/Tokens) file. -- Rule names starting with ``invalid_`` are used for specialized syntax errors. +- Strings with single quotes (') (for example, `'class'`) denote KEYWORDS. +- Strings with double quotes (") (for example, `"match"`) denote SOFT KEYWORDS. +- Uppercase names (for example, `NAME`) denote tokens in the + [`Grammar/Tokens`](../Grammar/Tokens) file. +- Rule names starting with `invalid_` are used for specialized syntax errors. - These rules are NOT used in the first pass of the parser. - Only if the first pass fails to parse, a second pass including the invalid @@ -509,14 +506,13 @@ Tokenization It is common among PEG parser frameworks that the parser does both the parsing and the tokenization, but this does not happen in Pegen. The reason is that the Python language needs a custom tokenizer to handle things like indentation -boundaries, some special keywords like ``ASYNC`` and ``AWAIT`` (for +boundaries, some special keywords like `ASYNC` and `AWAIT` (for compatibility purposes), backtracking errors (such as unclosed parenthesis), dealing with encoding, interactive mode and much more. Some of these reasons are also there for historical purposes, and some others are useful even today. The list of tokens (all uppercase names in the grammar) that you can use can -be found in thei -[`Grammar/Tokens`](https://github.com/python/cpython/blob/main/Grammar/Tokens) +be found in the [`Grammar/Tokens`](../Grammar/Tokens) file. If you change this file to add new tokens, make sure to regenerate the files by executing: @@ -532,9 +528,7 @@ the tokens or to execute: ``` How tokens are generated and the rules governing this are completely up to the tokenizer -([`Parser/lexer`](https://github.com/python/cpython/blob/main/Parser/lexer) -and -[`Parser/tokenizer`](https://github.com/python/cpython/blob/main/Parser/tokenizer)); +([`Parser/lexer`](../Parser/lexer) and [`Parser/tokenizer`](../Parser/tokenizer)); the parser just receives tokens from it. Memoization @@ -548,7 +542,7 @@ both in memory and time. Although the memory cost is obvious (the parser needs memory for storing previous results in the cache) the execution time cost comes for continuously checking if the given rule has a cache hit or not. In many situations, just parsing it again can be faster. Pegen **disables memoization -by default** except for rules with the special marker ``memo`` after the rule +by default** except for rules with the special marker `memo` after the rule name (and type, if present): ``` @@ -567,8 +561,7 @@ To determine whether a new rule needs memoization or not, benchmarking is requir (comparing execution times and memory usage of some considerably large files with and without memoization). There is a very simple instrumentation API available in the generated C parse code that allows to measure how much each rule uses -memoization (check the -[`Parser/pegen.c`](https://github.com/python/cpython/blob/main/Parser/pegen.c) +memoization (check the [`Parser/pegen.c`](../Parser/pegen.c) file for more information) but it needs to be manually activated. Automatic variables @@ -578,9 +571,9 @@ To make writing actions easier, Pegen injects some automatic variables in the namespace available when writing actions. In the C parser, some of these automatic variable names are: -- ``p``: The parser structure. -- ``EXTRA``: This is a macro that expands to - ``(_start_lineno, _start_col_offset, _end_lineno, _end_col_offset, p->arena)``, +- `p`: The parser structure. +- `EXTRA`: This is a macro that expands to + `(_start_lineno, _start_col_offset, _end_lineno, _end_col_offset, p->arena)`, which is normally used to create AST nodes as almost all constructors need these attributes to be provided. All of the location variables are taken from the location information of the current token. @@ -590,13 +583,13 @@ Hard and soft keywords > [!NOTE] > In the grammar files, keywords are defined using **single quotes** (for example, -> ``'class'``) while soft keywords are defined using **double quotes** (for example, -> ``"match"``). +> `'class'`) while soft keywords are defined using **double quotes** (for example, +> `"match"`). There are two kinds of keywords allowed in pegen grammars: *hard* and *soft* keywords. The difference between hard and soft keywords is that hard keywords are always reserved words, even in positions where they make no sense -(for example, ``x = class + 1``), while soft keywords only get a special +(for example, `x = class + 1`), while soft keywords only get a special meaning in context. Trying to use a hard keyword as a variable will always fail: @@ -621,7 +614,7 @@ one where they are defined as keywords: >>> foo(match="Yeah!") ``` -The ``match`` and ``case`` keywords are soft keywords, so that they are +The `match` and `case` keywords are soft keywords, so that they are recognized as keywords at the beginning of a match statement or case block respectively, but are allowed to be used in other places as variable or argument names. @@ -662,7 +655,7 @@ is, and it will unwind the stack and report the exception. This means that if a [rule action](#grammar-actions) raises an exception, all parsing will stop at that exact point. This is done to allow to correctly propagate any exception set by calling Python's C API functions. This also includes -[``SyntaxError``](https://docs.python.org/3/library/exceptions.html#SyntaxError) +[`SyntaxError`](https://docs.python.org/3/library/exceptions.html#SyntaxError) exceptions and it is the main mechanism the parser uses to report custom syntax error messages. @@ -684,10 +677,10 @@ grammar. To report generic syntax errors, pegen uses a common heuristic in PEG parsers: the location of *generic* syntax errors is reported to be the furthest token that was attempted to be matched but failed. This is only done if parsing has failed -(the parser returns ``NULL`` in C or ``None`` in Python) but no exception has +(the parser returns `NULL` in C or `None` in Python) but no exception has been raised. -As the Python grammar was primordially written as an ``LL(1)`` grammar, this heuristic +As the Python grammar was primordially written as an `LL(1)` grammar, this heuristic has an extremely high success rate, but some PEG features, such as lookaheads, can impact this. @@ -699,19 +692,19 @@ can impact this. To generate more precise syntax errors, custom rules are used. This is a common practice also in context free grammars: the parser will try to accept some construct that is known to be incorrect just to report a specific syntax error -for that construct. In pegen grammars, these rules start with the ``invalid_`` +for that construct. In pegen grammars, these rules start with the `invalid_` prefix. This is because trying to match these rules normally has a performance impact on parsing (and can also affect the 'correct' grammar itself in some tricky cases, depending on the ordering of the rules) so the generated parser acts in two phases: 1. The first phase will try to parse the input stream without taking into - account rules that start with the ``invalid_`` prefix. If the parsing + account rules that start with the `invalid_` prefix. If the parsing succeeds it will return the generated AST and the second phase will be skipped. 2. If the first phase failed, a second parsing attempt is done including the - rules that start with an ``invalid_`` prefix. By design this attempt + rules that start with an `invalid_` prefix. By design this attempt **cannot succeed** and is only executed to give to the invalid rules a chance to detect specific situations where custom, more precise, syntax errors can be raised. This also allows to trade a bit of performance for @@ -723,15 +716,15 @@ acts in two phases: > When defining invalid rules: > > - Make sure all custom invalid rules raise -> [``SyntaxError``](https://docs.python.org/3/library/exceptions.html#SyntaxError) +> [`SyntaxError`](https://docs.python.org/3/library/exceptions.html#SyntaxError) > exceptions (or a subclass of it). -> - Make sure **all** invalid rules start with the ``invalid_`` prefix to not +> - Make sure **all** invalid rules start with the `invalid_` prefix to not > impact performance of parsing correct Python code. > - Make sure the parser doesn't behave differently for regular rules when you introduce invalid rules > (see the [how PEG parsers work](#how-peg-parsers-work) section for more information). You can find a collection of macros to raise specialized syntax errors in the -[`Parser/pegen.h`](https://github.com/python/cpython/blob/main/Parser/pegen.h) +[`Parser/pegen.h`](../Parser/pegen.h) header file. These macros allow also to report ranges for the custom errors, which will be highlighted in the tracebacks that will be displayed when the error is reported. @@ -746,35 +739,33 @@ displayed when the error is reported. $ 42 ``` -should trigger the syntax error in the ``$`` character. If your rule is not correctly defined this +should trigger the syntax error in the `$` character. If your rule is not correctly defined this won't happen. As another example, suppose that you try to define a rule to match Python 2 style -``print`` statements in order to create a better error message and you define it as: +`print` statements in order to create a better error message and you define it as: ``` invalid_print: "print" expression ``` -This will **seem** to work because the parser will correctly parse ``print(something)`` because it is valid -code and the second phase will never execute but if you try to parse ``print(something) $ 3`` the first pass -of the parser will fail (because of the ``$``) and in the second phase, the rule will match the -``print(something)`` as ``print`` followed by the variable ``something`` between parentheses and the error -will be reported there instead of the ``$`` character. +This will **seem** to work because the parser will correctly parse `print(something)` because it is valid +code and the second phase will never execute but if you try to parse `print(something) $ 3` the first pass +of the parser will fail (because of the `$`) and in the second phase, the rule will match the +`print(something)` as `print` followed by the variable `something` between parentheses and the error +will be reported there instead of the `$` character. Generating AST objects ---------------------- The output of the C parser used by CPython, which is generated from the -[grammar file](https://github.com/python/cpython/blob/main/Grammar/python.gram), -is a Python AST object (using C structures). This means that the actions in the -grammar file generate AST objects when they succeed. Constructing these objects -can be quite cumbersome (see the [AST compiler section](compiler.md#abstract-syntax-trees-ast) +[grammar file](../Grammar/python.gram), is a Python AST object (using C +structures). This means that the actions in the grammar file generate AST +objects when they succeed. Constructing these objects can be quite cumbersome +(see the [AST compiler section](compiler.md#abstract-syntax-trees-ast) for more information on how these objects are constructed and how they are used by the compiler), so special helper functions are used. These functions are -declared in the -[`Parser/pegen.h`](https://github.com/python/cpython/blob/main/Parser/pegen.h) -header file and defined in the -[`Parser/action_helpers.c`](https://github.com/python/cpython/blob/main/Parser/action_helpers.c) -file. The helpers include functions that join AST sequences, get specific elements +declared in the [`Parser/pegen.h`](../Parser/pegen.h) header file and defined +in the [`Parser/action_helpers.c`](../Parser/action_helpers.c) file. The +helpers include functions that join AST sequences, get specific elements from them or to perform extra processing on the generated tree. @@ -788,11 +779,9 @@ from them or to perform extra processing on the generated tree. As a general rule, if an action spawns multiple lines or requires something more complicated than a single expression of C code, is normally better to create a -custom helper in -[`Parser/action_helpers.c`](https://github.com/python/cpython/blob/main/Parser/action_helpers.c) -and expose it in the -[`Parser/pegen.h`](https://github.com/python/cpython/blob/main/Parser/pegen.h) -header file so that it can be used from the grammar. +custom helper in [`Parser/action_helpers.c`](../Parser/action_helpers.c) +and expose it in the [`Parser/pegen.h`](../Parser/pegen.h) header file so that +it can be used from the grammar. When parsing succeeds, the parser **must** return a **valid** AST object. @@ -801,16 +790,15 @@ Testing There are three files that contain tests for the grammar and the parser: -- [test_grammar.py](https://github.com/python/cpython/blob/main/Lib/test/test_grammar.py) -- [test_syntax.py](https://github.com/python/cpython/blob/main/Lib/test/test_syntax.py) -- [test_exceptions.py](https://github.com/python/cpython/blob/main/Lib/test/test_exceptions.py) +- [test_grammar.py](../Lib/test/test_grammar.py) +- [test_syntax.py](../Lib/test/test_syntax.py) +- [test_exceptions.py](../Lib/test/test_exceptions.py) -Check the contents of these files to know which is the best place for new tests, depending -on the nature of the new feature you are adding. +Check the contents of these files to know which is the best place for new +tests, depending on the nature of the new feature you are adding. Tests for the parser generator itself can be found in the -[test_peg_generator](https://github.com/python/cpython/blob/main/Lib/test_peg_generator) -directory. +[test_peg_generator](../Lib/test_peg_generator) directory. Debugging generated parsers @@ -825,33 +813,32 @@ correctly compile and execute Python anymore. This makes it a bit challenging to debug when something goes wrong, especially when experimenting. For this reason it is a good idea to experiment first by generating a Python -parser. To do this, you can go to the -[Tools/peg_generator](https://github.com/python/cpython/blob/main/Tools/peg_generator) +parser. To do this, you can go to the [Tools/peg_generator](../Tools/peg_generator) directory on the CPython repository and manually call the parser generator by executing: ``` $ python -m pegen python ``` -This will generate a file called ``parse.py`` in the same directory that you +This will generate a file called `parse.py` in the same directory that you can use to parse some input: ``` $ python parse.py file_with_source_code_to_test.py ``` -As the generated ``parse.py`` file is just Python code, you can modify it +As the generated `parse.py` file is just Python code, you can modify it and add breakpoints to debug or better understand some complex situations. Verbose mode ------------ -When Python is compiled in debug mode (by adding ``--with-pydebug`` when -running the configure step in Linux or by adding ``-d`` when calling the -[PCbuild/build.bat](https://github.com/python/cpython/blob/main/PCbuild/build.bat)), -it is possible to activate a **very** verbose mode in the generated parser. This -is very useful to debug the generated parser and to understand how it works, but it +When Python is compiled in debug mode (by adding `--with-pydebug` when +running the configure step in Linux or by adding `-d` when calling the +[PCbuild/build.bat](../PCbuild/build.bat)), it is possible to activate a +**very** verbose mode in the generated parser. This is very useful to +debug the generated parser and to understand how it works, but it can be a bit hard to understand at first. > [!NOTE] @@ -859,13 +846,13 @@ can be a bit hard to understand at first. > interactive mode as it can be much harder to understand, because interactive > mode involves some special steps compared to regular parsing. -To activate verbose mode you can add the ``-d`` flag when executing Python: +To activate verbose mode you can add the `-d` flag when executing Python: ``` $ python -d file_to_test.py ``` -This will print **a lot** of output to ``stderr`` so it is probably better to dump +This will print **a lot** of output to `stderr` so it is probably better to dump it to a file for further analysis. The output consists of trace lines with the following structure:: @@ -873,17 +860,17 @@ following structure:: ('>'|'-'|'+'|'!') []: ... ``` -Every line is indented by a different amount (````) depending on how +Every line is indented by a different amount (``) depending on how deep the call stack is. The next character marks the type of the trace: -- ``>`` indicates that a rule is going to be attempted to be parsed. -- ``-`` indicates that a rule has failed to be parsed. -- ``+`` indicates that a rule has been parsed correctly. -- ``!`` indicates that an exception or an error has been detected and the parser is unwinding. +- `>` indicates that a rule is going to be attempted to be parsed. +- `-` indicates that a rule has failed to be parsed. +- `+` indicates that a rule has been parsed correctly. +- `!` indicates that an exception or an error has been detected and the parser is unwinding. -The ```` part indicates the current index in the token array, -the ```` part indicates what rule is being parsed and -the ```` part indicates what alternative within that rule +The `` part indicates the current index in the token array, +the `` part indicates what rule is being parsed and +the `` part indicates what alternative within that rule is being attempted. @@ -891,4 +878,5 @@ is being attempted. > **Document history** > > Pablo Galindo Salgado - Original author +> > Irit Katriel and Jacob Coffee - Convert to Markdown From 03f9264ecef4b1df5e71586327a04ec3b9331cbe Mon Sep 17 00:00:00 2001 From: Arjun Singh <98927961+xylocone@users.noreply.github.com> Date: Tue, 22 Oct 2024 08:18:16 +0530 Subject: [PATCH 21/36] fix grammar in comment in dictobject.c (#125822) --- Objects/dictobject.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Objects/dictobject.c b/Objects/dictobject.c index c4e11a3e9c0bc7..3134f6141dc9be 100644 --- a/Objects/dictobject.c +++ b/Objects/dictobject.c @@ -34,7 +34,7 @@ As of Python 3.6, this is compact and ordered. Basic idea is described here: dk_indices is actual hashtable. It holds index in entries, or DKIX_EMPTY(-1) or DKIX_DUMMY(-2). -Size of indices is dk_size. Type of each index in indices is vary on dk_size: +Size of indices is dk_size. Type of each index in indices varies with dk_size: * int8 for dk_size <= 128 * int16 for 256 <= dk_size <= 2**15 From 4efe64aa56e7a9a96b94c0ae0201db8d402a5f53 Mon Sep 17 00:00:00 2001 From: Mikhail Efimov Date: Tue, 22 Oct 2024 11:41:30 +0300 Subject: [PATCH 22/36] gh-125811: Remove DeprecationWarnings in test_peg_generator (#125812) --- Lib/test/test_peg_generator/test_pegen.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Lib/test/test_peg_generator/test_pegen.py b/Lib/test/test_peg_generator/test_pegen.py index 86db767b99a228..54c9dce2d0c90d 100644 --- a/Lib/test/test_peg_generator/test_pegen.py +++ b/Lib/test/test_peg_generator/test_pegen.py @@ -484,7 +484,7 @@ def test_left_recursive(self) -> None: def test_python_expr(self) -> None: grammar = """ - start: expr NEWLINE? $ { ast.Expression(expr, lineno=1, col_offset=0) } + start: expr NEWLINE? $ { ast.Expression(expr) } expr: ( expr '+' term { ast.BinOp(expr, ast.Add(), term, lineno=expr.lineno, col_offset=expr.col_offset, end_lineno=term.end_lineno, end_col_offset=term.end_col_offset) } | expr '-' term { ast.BinOp(expr, ast.Sub(), term, lineno=expr.lineno, col_offset=expr.col_offset, end_lineno=term.end_lineno, end_col_offset=term.end_col_offset) } | term { term } @@ -893,7 +893,7 @@ def test_unreachable_implicit3(self) -> None: def test_locations_in_alt_action_and_group(self) -> None: grammar = """ - start: t=term NEWLINE? $ { ast.Expression(t, LOCATIONS) } + start: t=term NEWLINE? $ { ast.Expression(t) } term: | l=term '*' r=factor { ast.BinOp(l, ast.Mult(), r, LOCATIONS) } | l=term '/' r=factor { ast.BinOp(l, ast.Div(), r, LOCATIONS) } From c1bdbe84c8ab29b68bb109328e02af9464f104b3 Mon Sep 17 00:00:00 2001 From: Mikhail Efimov Date: Tue, 22 Oct 2024 11:42:56 +0300 Subject: [PATCH 23/36] gh-124889: Rework Python generator cache (#125816) --- Tools/peg_generator/pegen/python_generator.py | 75 ++++++++++++------- 1 file changed, 48 insertions(+), 27 deletions(-) diff --git a/Tools/peg_generator/pegen/python_generator.py b/Tools/peg_generator/pegen/python_generator.py index 588d3d3f6ef8f8..7057135a9061f6 100644 --- a/Tools/peg_generator/pegen/python_generator.py +++ b/Tools/peg_generator/pegen/python_generator.py @@ -1,6 +1,6 @@ import os.path import token -from typing import IO, Any, Dict, Optional, Sequence, Set, Text, Tuple +from typing import IO, Any, Callable, Dict, Optional, Sequence, Set, Text, Tuple from pegen import grammar from pegen.grammar import ( @@ -93,7 +93,7 @@ def visit_Forced(self, node: Forced) -> bool: class PythonCallMakerVisitor(GrammarVisitor): def __init__(self, parser_generator: ParserGenerator): self.gen = parser_generator - self.cache: Dict[Any, Any] = {} + self.cache: Dict[str, Tuple[str, str]] = {} def visit_NameLeaf(self, node: NameLeaf) -> Tuple[Optional[str], str]: name = node.value @@ -110,16 +110,6 @@ def visit_NameLeaf(self, node: NameLeaf) -> Tuple[Optional[str], str]: def visit_StringLeaf(self, node: StringLeaf) -> Tuple[str, str]: return "literal", f"self.expect({node.value})" - def visit_Rhs(self, node: Rhs) -> Tuple[Optional[str], str]: - if node in self.cache: - return self.cache[node] - if len(node.alts) == 1 and len(node.alts[0].items) == 1: - self.cache[node] = self.visit(node.alts[0].items[0]) - else: - name = self.gen.artificial_rule_from_rhs(node) - self.cache[node] = name, f"self.{name}()" - return self.cache[node] - def visit_NamedItem(self, node: NamedItem) -> Tuple[Optional[str], str]: name, call = self.visit(node.item) if node.name: @@ -151,26 +141,57 @@ def visit_Opt(self, node: Opt) -> Tuple[str, str]: else: return "opt", f"{call}," + def _generate_artificial_rule_call( + self, + node: Any, + prefix: str, + call_by_name_func: Callable[[str], str], + rule_generation_func: Callable[[], str], + ) -> Tuple[str, str]: + node_str = f"{node}" + key = f"{prefix}_{node_str}" + if key in self.cache: + return self.cache[key] + + name = rule_generation_func() + call = call_by_name_func(name) + self.cache[key] = name, call + return self.cache[key] + + def visit_Rhs(self, node: Rhs) -> Tuple[str, str]: + if len(node.alts) == 1 and len(node.alts[0].items) == 1: + return self.visit(node.alts[0].items[0]) + + return self._generate_artificial_rule_call( + node, + "rhs", + lambda name: f"self.{name}()", + lambda: self.gen.artificial_rule_from_rhs(node), + ) + def visit_Repeat0(self, node: Repeat0) -> Tuple[str, str]: - if node in self.cache: - return self.cache[node] - name = self.gen.artificial_rule_from_repeat(node.node, False) - self.cache[node] = name, f"self.{name}()," # Also a trailing comma! - return self.cache[node] + return self._generate_artificial_rule_call( + node, + "repeat0", + lambda name: f"self.{name}(),", # Also a trailing comma! + lambda: self.gen.artificial_rule_from_repeat(node.node, is_repeat1=False), + ) def visit_Repeat1(self, node: Repeat1) -> Tuple[str, str]: - if node in self.cache: - return self.cache[node] - name = self.gen.artificial_rule_from_repeat(node.node, True) - self.cache[node] = name, f"self.{name}()" # But no trailing comma here! - return self.cache[node] + return self._generate_artificial_rule_call( + node, + "repeat1", + lambda name: f"self.{name}()", # But no trailing comma here! + lambda: self.gen.artificial_rule_from_repeat(node.node, is_repeat1=True), + ) def visit_Gather(self, node: Gather) -> Tuple[str, str]: - if node in self.cache: - return self.cache[node] - name = self.gen.artificial_rule_from_gather(node) - self.cache[node] = name, f"self.{name}()" # No trailing comma here either! - return self.cache[node] + return self._generate_artificial_rule_call( + node, + "gather", + lambda name: f"self.{name}()", # No trailing comma here either! + lambda: self.gen.artificial_rule_from_gather(node), + ) def visit_Group(self, node: Group) -> Tuple[Optional[str], str]: return self.visit(node.rhs) From 57e3c59bb64fc2f8b2845a7e03ab0abb029ccd02 Mon Sep 17 00:00:00 2001 From: Mark Shannon Date: Tue, 22 Oct 2024 10:11:29 +0100 Subject: [PATCH 24/36] GH-125521: Remove `if (true)` from generated output to reduce C compiler warnings (GH-125700) --- Lib/test/test_generated_cases.py | 27 +++++++++ Python/generated_cases.c.h | 68 +++++++++++----------- Tools/cases_generator/analyzer.py | 2 +- Tools/cases_generator/generators_common.py | 18 ++++-- 4 files changed, 75 insertions(+), 40 deletions(-) diff --git a/Lib/test/test_generated_cases.py b/Lib/test/test_generated_cases.py index cd3718b80612bd..95813e1e32c7af 100644 --- a/Lib/test/test_generated_cases.py +++ b/Lib/test/test_generated_cases.py @@ -1270,6 +1270,33 @@ def test_push_then_error(self): """ self.run_cases_test(input, output) + def test_error_if_true(self): + + input = """ + inst(OP1, ( --)) { + ERROR_IF(true, here); + } + inst(OP2, ( --)) { + ERROR_IF(1, there); + } + """ + output = """ + TARGET(OP1) { + frame->instr_ptr = next_instr; + next_instr += 1; + INSTRUCTION_STATS(OP1); + goto here; + } + + TARGET(OP2) { + frame->instr_ptr = next_instr; + next_instr += 1; + INSTRUCTION_STATS(OP2); + goto there; + } + """ + self.run_cases_test(input, output) + def test_scalar_array_inconsistency(self): input = """ diff --git a/Python/generated_cases.c.h b/Python/generated_cases.c.h index 388031af87a79f..efbf2fba8c3106 100644 --- a/Python/generated_cases.c.h +++ b/Python/generated_cases.c.h @@ -689,7 +689,7 @@ for (int _i = oparg*2; --_i >= 0;) { PyStackRef_CLOSE(values[_i]); } - if (true) { + { stack_pointer += -oparg*2; assert(WITHIN_STACK_BOUNDS()); goto error; @@ -731,7 +731,7 @@ for (int _i = oparg; --_i >= 0;) { PyStackRef_CLOSE(values[_i]); } - if (true) { + { stack_pointer += -oparg; assert(WITHIN_STACK_BOUNDS()); goto error; @@ -748,7 +748,7 @@ } if (err != 0) { Py_DECREF(set_o); - if (true) { + { stack_pointer += -oparg; assert(WITHIN_STACK_BOUNDS()); goto error; @@ -803,7 +803,7 @@ for (int _i = oparg; --_i >= 0;) { PyStackRef_CLOSE(pieces[_i]); } - if (true) { + { stack_pointer += -oparg; assert(WITHIN_STACK_BOUNDS()); goto error; @@ -945,7 +945,7 @@ for (int i = 0; i < total_args; i++) { PyStackRef_CLOSE(args[i]); } - if (true) { + { stack_pointer += -2 - oparg; assert(WITHIN_STACK_BOUNDS()); goto error; @@ -1343,7 +1343,7 @@ for (int _i = oparg; --_i >= 0;) { PyStackRef_CLOSE(args[_i]); } - if (true) { + { stack_pointer += -2 - oparg; assert(WITHIN_STACK_BOUNDS()); goto error; @@ -1422,7 +1422,7 @@ for (int _i = oparg; --_i >= 0;) { PyStackRef_CLOSE(args[_i]); } - if (true) { + { stack_pointer += -2 - oparg; assert(WITHIN_STACK_BOUNDS()); goto error; @@ -1509,7 +1509,7 @@ for (int _i = oparg; --_i >= 0;) { PyStackRef_CLOSE(args[_i]); } - if (true) { + { stack_pointer += -2 - oparg; assert(WITHIN_STACK_BOUNDS()); goto error; @@ -1971,7 +1971,7 @@ PyStackRef_CLOSE(args[_i]); } PyStackRef_CLOSE(kwnames); - if (true) { + { stack_pointer += -3 - oparg; assert(WITHIN_STACK_BOUNDS()); goto error; @@ -2174,7 +2174,7 @@ PyStackRef_CLOSE(args[_i]); } PyStackRef_CLOSE(kwnames); - if (true) { + { stack_pointer += -3 - oparg; assert(WITHIN_STACK_BOUNDS()); goto error; @@ -2431,7 +2431,7 @@ for (int _i = oparg; --_i >= 0;) { PyStackRef_CLOSE(args[_i]); } - if (true) { + { stack_pointer += -2 - oparg; assert(WITHIN_STACK_BOUNDS()); goto error; @@ -2516,7 +2516,7 @@ for (int _i = oparg; --_i >= 0;) { PyStackRef_CLOSE(args[_i]); } - if (true) { + { stack_pointer += -2 - oparg; assert(WITHIN_STACK_BOUNDS()); goto error; @@ -2749,7 +2749,7 @@ for (int _i = oparg; --_i >= 0;) { PyStackRef_CLOSE(args[_i]); } - if (true) { + { stack_pointer += -2 - oparg; assert(WITHIN_STACK_BOUNDS()); goto error; @@ -3103,7 +3103,7 @@ if (err < 0) { PyStackRef_CLOSE(exc_value_st); PyStackRef_CLOSE(match_type_st); - if (true) goto pop_2_error; + goto pop_2_error; } PyObject *match_o = NULL; PyObject *rest_o = NULL; @@ -3149,7 +3149,7 @@ stack_pointer = _PyFrame_GetStackPointer(frame); if (err < 0) { PyStackRef_CLOSE(right); - if (true) goto pop_1_error; + goto pop_1_error; } _PyFrame_SetStackPointer(frame, stack_pointer); int res = PyErr_GivenExceptionMatches(left_o, right_o); @@ -3583,7 +3583,7 @@ PyTuple_GetItem(_PyFrame_GetCode(frame)->co_localsplusnames, oparg) ); stack_pointer = _PyFrame_GetStackPointer(frame); - if (1) goto error; + goto error; } SETLOCAL(oparg, PyStackRef_NULL); DISPATCH(); @@ -3682,7 +3682,7 @@ _PyEval_FormatKwargsError(tstate, callable_o, update_o); stack_pointer = _PyFrame_GetStackPointer(frame); PyStackRef_CLOSE(update); - if (true) goto pop_1_error; + goto pop_1_error; } PyStackRef_CLOSE(update); stack_pointer += -1; @@ -3715,7 +3715,7 @@ stack_pointer = _PyFrame_GetStackPointer(frame); } PyStackRef_CLOSE(update); - if (true) goto pop_1_error; + goto pop_1_error; } PyStackRef_CLOSE(update); stack_pointer += -1; @@ -4173,7 +4173,7 @@ type->tp_name); stack_pointer = _PyFrame_GetStackPointer(frame); PyStackRef_CLOSE(obj); - if (true) goto pop_1_error; + goto pop_1_error; } _PyFrame_SetStackPointer(frame, stack_pointer); iter_o = (*getter)(obj_o); @@ -4191,7 +4191,7 @@ Py_TYPE(iter_o)->tp_name); stack_pointer = _PyFrame_GetStackPointer(frame); Py_DECREF(iter_o); - if (true) goto error; + goto error; } iter = PyStackRef_FromPyObjectSteal(iter_o); stack_pointer[-1] = iter; @@ -4458,7 +4458,7 @@ for (int i = 0; i < total_args; i++) { PyStackRef_CLOSE(args[i]); } - if (true) { + { stack_pointer += -2 - oparg; assert(WITHIN_STACK_BOUNDS()); goto error; @@ -5210,7 +5210,7 @@ stack_pointer = _PyFrame_GetStackPointer(frame); } PyStackRef_CLOSE(iterable_st); - if (true) goto pop_1_error; + goto pop_1_error; } assert(Py_IsNone(none_val)); PyStackRef_CLOSE(iterable_st); @@ -5866,7 +5866,7 @@ _PyErr_SetString(tstate, PyExc_NameError, "__build_class__ not found"); stack_pointer = _PyFrame_GetStackPointer(frame); - if (true) goto error; + goto error; } bc = PyStackRef_FromPyObjectSteal(bc_o); stack_pointer[0] = bc; @@ -5920,7 +5920,7 @@ _PyFrame_SetStackPointer(frame, stack_pointer); _PyEval_FormatExcUnbound(tstate, _PyFrame_GetCode(frame), oparg); stack_pointer = _PyFrame_GetStackPointer(frame); - if (true) goto error; + goto error; } value = PyStackRef_FromPyObjectSteal(value_o); stack_pointer[0] = value; @@ -5969,7 +5969,7 @@ PyTuple_GetItem(_PyFrame_GetCode(frame)->co_localsplusnames, oparg) ); stack_pointer = _PyFrame_GetStackPointer(frame); - if (1) goto error; + goto error; } value = PyStackRef_DUP(value_s); stack_pointer[0] = value; @@ -6088,7 +6088,7 @@ tstate, PyExc_NameError, NAME_ERROR_MSG, name); stack_pointer = _PyFrame_GetStackPointer(frame); - if (true) goto error; + goto error; } } } @@ -6237,7 +6237,7 @@ _PyErr_SetString(tstate, PyExc_SystemError, "no locals found"); stack_pointer = _PyFrame_GetStackPointer(frame); - if (true) goto error; + goto error; } locals = PyStackRef_FromPyObjectNew(l); stack_pointer[0] = locals; @@ -6288,7 +6288,7 @@ Py_TYPE(owner_o)->tp_name); stack_pointer = _PyFrame_GetStackPointer(frame); } - if (true) goto error; + goto error; } attr = PyStackRef_FromPyObjectSteal(attr_o); self_or_null = self_or_null_o == NULL ? @@ -6348,7 +6348,7 @@ PyStackRef_CLOSE(global_super_st); PyStackRef_CLOSE(class_st); PyStackRef_CLOSE(self_st); - if (true) goto pop_3_error; + goto pop_3_error; } } // we make no attempt to optimize here; specializations should @@ -6466,7 +6466,7 @@ PyStackRef_CLOSE(class_st); if (attr_o == NULL) { PyStackRef_CLOSE(self_st); - if (true) goto pop_3_error; + goto pop_3_error; } if (method_found) { self_or_null = self_st; // transfer ownership @@ -6838,7 +6838,7 @@ stack_pointer = _PyFrame_GetStackPointer(frame); goto exception_unwind; } - if (true) goto error; + goto error; } TARGET(RERAISE) { @@ -7130,7 +7130,7 @@ } else { PyStackRef_CLOSE(v); - if (true) goto pop_1_error; + goto pop_1_error; } } PyStackRef_CLOSE(v); @@ -7202,7 +7202,7 @@ _PyErr_Format(tstate, PyExc_SystemError, "no locals found when setting up annotations"); stack_pointer = _PyFrame_GetStackPointer(frame); - if (true) goto error; + goto error; } /* check if __annotations__ in locals()... */ _PyFrame_SetStackPointer(frame, stack_pointer); @@ -7559,7 +7559,7 @@ "no locals found when storing %R", name); stack_pointer = _PyFrame_GetStackPointer(frame); PyStackRef_CLOSE(v); - if (true) goto pop_1_error; + goto pop_1_error; } if (PyDict_CheckExact(ns)) { _PyFrame_SetStackPointer(frame, stack_pointer); diff --git a/Tools/cases_generator/analyzer.py b/Tools/cases_generator/analyzer.py index 381ad3a4e2082c..f41a8d161099df 100644 --- a/Tools/cases_generator/analyzer.py +++ b/Tools/cases_generator/analyzer.py @@ -744,7 +744,7 @@ def always_exits(op: parser.InstDef) -> bool: if tkn.text == "DEOPT_IF" or tkn.text == "ERROR_IF": next(tkn_iter) # '(' t = next(tkn_iter) - if t.text == "true": + if t.text in ("true", "1"): return True return False diff --git a/Tools/cases_generator/generators_common.py b/Tools/cases_generator/generators_common.py index 7e032c21d2485c..3b158f5ac4eb48 100644 --- a/Tools/cases_generator/generators_common.py +++ b/Tools/cases_generator/generators_common.py @@ -165,16 +165,24 @@ def error_if( storage: Storage, inst: Instruction | None, ) -> bool: - self.out.emit_at("if ", tkn) lparen = next(tkn_iter) - self.emit(lparen) assert lparen.kind == "LPAREN" first_tkn = tkn_iter.peek() - emit_to(self.out, tkn_iter, "COMMA") + unconditional = always_true(first_tkn) + if unconditional: + next(tkn_iter) + comma = next(tkn_iter) + if comma.kind != "COMMA": + raise analysis_error(f"Expected comma, got '{comma.text}'", comma) + self.out.start_line() + else: + self.out.emit_at("if ", tkn) + self.emit(lparen) + emit_to(self.out, tkn_iter, "COMMA") + self.out.emit(") ") label = next(tkn_iter).text next(tkn_iter) # RPAREN next(tkn_iter) # Semi colon - self.out.emit(") ") storage.clear_inputs("at ERROR_IF") c_offset = storage.stack.peek_offset() try: @@ -196,7 +204,7 @@ def error_if( self.out.emit(label) self.out.emit(";\n") self.out.emit("}\n") - return not always_true(first_tkn) + return not unconditional def error_no_pop( self, From 759a54d28ffe7eac8c23917f5d3dfad8309856be Mon Sep 17 00:00:00 2001 From: Serhiy Storchaka Date: Tue, 22 Oct 2024 13:57:25 +0300 Subject: [PATCH 25/36] gh-125355: Rewrite parse_intermixed_args() in argparse (GH-125356) * The parser no longer changes temporarily during parsing. * Default values are not processed twice. * Required mutually exclusive groups containing positional arguments are now supported. * The missing arguments report now includes the names of all required optional and positional arguments. * Unknown options can be intermixed with positional arguments in parse_known_intermixed_args(). --- Lib/argparse.py | 99 +++++++------------ Lib/test/test_argparse.py | 56 +++++++---- ...-10-22-13-28-00.gh-issue-125355.zssHm_.rst | 7 ++ 3 files changed, 80 insertions(+), 82 deletions(-) create mode 100644 Misc/NEWS.d/next/Library/2024-10-22-13-28-00.gh-issue-125355.zssHm_.rst diff --git a/Lib/argparse.py b/Lib/argparse.py index 49271a146c7282..024622bec17c3b 100644 --- a/Lib/argparse.py +++ b/Lib/argparse.py @@ -1924,6 +1924,9 @@ def parse_args(self, args=None, namespace=None): return args def parse_known_args(self, args=None, namespace=None): + return self._parse_known_args2(args, namespace, intermixed=False) + + def _parse_known_args2(self, args, namespace, intermixed): if args is None: # args default to the system args args = _sys.argv[1:] @@ -1950,18 +1953,18 @@ def parse_known_args(self, args=None, namespace=None): # parse the arguments and exit if there are any errors if self.exit_on_error: try: - namespace, args = self._parse_known_args(args, namespace) + namespace, args = self._parse_known_args(args, namespace, intermixed) except ArgumentError as err: self.error(str(err)) else: - namespace, args = self._parse_known_args(args, namespace) + namespace, args = self._parse_known_args(args, namespace, intermixed) if hasattr(namespace, _UNRECOGNIZED_ARGS_ATTR): args.extend(getattr(namespace, _UNRECOGNIZED_ARGS_ATTR)) delattr(namespace, _UNRECOGNIZED_ARGS_ATTR) return namespace, args - def _parse_known_args(self, arg_strings, namespace): + def _parse_known_args(self, arg_strings, namespace, intermixed): # replace arg strings that are file references if self.fromfile_prefix_chars is not None: arg_strings = self._read_args_from_files(arg_strings) @@ -2052,6 +2055,7 @@ def consume_optional(start_index): # if we found no optional action, skip it if action is None: extras.append(arg_strings[start_index]) + extras_pattern.append('O') return start_index + 1 # if there is an explicit argument, try to match the @@ -2087,6 +2091,7 @@ def consume_optional(start_index): sep = '' else: extras.append(char + explicit_arg) + extras_pattern.append('O') stop = start_index + 1 break # if the action expect exactly one argument, we've @@ -2165,6 +2170,7 @@ def consume_positionals(start_index): # consume Positionals and Optionals alternately, until we have # passed the last option string extras = [] + extras_pattern = [] start_index = 0 if option_string_indices: max_option_string_index = max(option_string_indices) @@ -2178,7 +2184,7 @@ def consume_positionals(start_index): if next_option_string_index in option_string_indices: break next_option_string_index += 1 - if start_index != next_option_string_index: + if not intermixed and start_index != next_option_string_index: positionals_end_index = consume_positionals(start_index) # only try to parse the next optional if we didn't consume @@ -2194,16 +2200,35 @@ def consume_positionals(start_index): if start_index not in option_string_indices: strings = arg_strings[start_index:next_option_string_index] extras.extend(strings) + extras_pattern.extend(arg_strings_pattern[start_index:next_option_string_index]) start_index = next_option_string_index # consume the next optional and any arguments for it start_index = consume_optional(start_index) - # consume any positionals following the last Optional - stop_index = consume_positionals(start_index) + if not intermixed: + # consume any positionals following the last Optional + stop_index = consume_positionals(start_index) - # if we didn't consume all the argument strings, there were extras - extras.extend(arg_strings[stop_index:]) + # if we didn't consume all the argument strings, there were extras + extras.extend(arg_strings[stop_index:]) + else: + extras.extend(arg_strings[start_index:]) + extras_pattern.extend(arg_strings_pattern[start_index:]) + extras_pattern = ''.join(extras_pattern) + assert len(extras_pattern) == len(extras) + # consume all positionals + arg_strings = [s for s, c in zip(extras, extras_pattern) if c != 'O'] + arg_strings_pattern = extras_pattern.replace('O', '') + stop_index = consume_positionals(0) + # leave unknown optionals and non-consumed positionals in extras + for i, c in enumerate(extras_pattern): + if not stop_index: + break + if c != 'O': + stop_index -= 1 + extras[i] = None + extras = [s for s in extras if s is not None] # make sure all required actions were present and also convert # action defaults which were not given as arguments @@ -2469,10 +2494,6 @@ def parse_known_intermixed_args(self, args=None, namespace=None): # are then parsed. If the parser definition is incompatible with the # intermixed assumptions (e.g. use of REMAINDER, subparsers) a # TypeError is raised. - # - # positionals are 'deactivated' by setting nargs and default to - # SUPPRESS. This blocks the addition of that positional to the - # namespace positionals = self._get_positional_actions() a = [action for action in positionals @@ -2481,59 +2502,7 @@ def parse_known_intermixed_args(self, args=None, namespace=None): raise TypeError('parse_intermixed_args: positional arg' ' with nargs=%s'%a[0].nargs) - if [action.dest for group in self._mutually_exclusive_groups - for action in group._group_actions if action in positionals]: - raise TypeError('parse_intermixed_args: positional in' - ' mutuallyExclusiveGroup') - - try: - save_usage = self.usage - try: - if self.usage is None: - # capture the full usage for use in error messages - self.usage = self.format_usage()[7:] - for action in positionals: - # deactivate positionals - action.save_nargs = action.nargs - # action.nargs = 0 - action.nargs = SUPPRESS - action.save_default = action.default - action.default = SUPPRESS - namespace, remaining_args = self.parse_known_args(args, - namespace) - for action in positionals: - # remove the empty positional values from namespace - if (hasattr(namespace, action.dest) - and getattr(namespace, action.dest)==[]): - from warnings import warn - warn('Do not expect %s in %s' % (action.dest, namespace)) - delattr(namespace, action.dest) - finally: - # restore nargs and usage before exiting - for action in positionals: - action.nargs = action.save_nargs - action.default = action.save_default - optionals = self._get_optional_actions() - try: - # parse positionals. optionals aren't normally required, but - # they could be, so make sure they aren't. - for action in optionals: - action.save_required = action.required - action.required = False - for group in self._mutually_exclusive_groups: - group.save_required = group.required - group.required = False - namespace, extras = self.parse_known_args(remaining_args, - namespace) - finally: - # restore parser values before exiting - for action in optionals: - action.required = action.save_required - for group in self._mutually_exclusive_groups: - group.required = group.save_required - finally: - self.usage = save_usage - return namespace, extras + return self._parse_known_args2(args, namespace, intermixed=True) # ======================== # Value conversion methods diff --git a/Lib/test/test_argparse.py b/Lib/test/test_argparse.py index 4fa669718abc50..4bd7a935b9b757 100644 --- a/Lib/test/test_argparse.py +++ b/Lib/test/test_argparse.py @@ -6412,12 +6412,23 @@ def test_basic(self): # cannot parse the '1,2,3' self.assertEqual(NS(bar='y', cmd='cmd', foo='x', rest=[1]), args) self.assertEqual(["2", "3"], extras) + args, extras = parser.parse_known_intermixed_args(argv) + self.assertEqual(NS(bar='y', cmd='cmd', foo='x', rest=[1, 2, 3]), args) + self.assertEqual([], extras) + # unknown optionals go into extras + argv = 'cmd --foo x --error 1 2 --bar y 3'.split() + args, extras = parser.parse_known_intermixed_args(argv) + self.assertEqual(NS(bar='y', cmd='cmd', foo='x', rest=[1, 2, 3]), args) + self.assertEqual(['--error'], extras) argv = 'cmd --foo x 1 --error 2 --bar y 3'.split() args, extras = parser.parse_known_intermixed_args(argv) - # unknown optionals go into extras - self.assertEqual(NS(bar='y', cmd='cmd', foo='x', rest=[1]), args) - self.assertEqual(['--error', '2', '3'], extras) + self.assertEqual(NS(bar='y', cmd='cmd', foo='x', rest=[1, 2, 3]), args) + self.assertEqual(['--error'], extras) + argv = 'cmd --foo x 1 2 --error --bar y 3'.split() + args, extras = parser.parse_known_intermixed_args(argv) + self.assertEqual(NS(bar='y', cmd='cmd', foo='x', rest=[1, 2, 3]), args) + self.assertEqual(['--error'], extras) # restores attributes that were temporarily changed self.assertIsNone(parser.usage) @@ -6436,37 +6447,48 @@ def test_remainder(self): parser.parse_intermixed_args(argv) self.assertRegex(str(cm.exception), r'\.\.\.') - def test_exclusive(self): - # mutually exclusive group; intermixed works fine - parser = ErrorRaisingArgumentParser(prog='PROG') + def test_required_exclusive(self): + # required mutually exclusive group; intermixed works fine + parser = argparse.ArgumentParser(prog='PROG', exit_on_error=False) group = parser.add_mutually_exclusive_group(required=True) group.add_argument('--foo', action='store_true', help='FOO') group.add_argument('--spam', help='SPAM') parser.add_argument('badger', nargs='*', default='X', help='BADGER') + args = parser.parse_intermixed_args('--foo 1 2'.split()) + self.assertEqual(NS(badger=['1', '2'], foo=True, spam=None), args) args = parser.parse_intermixed_args('1 --foo 2'.split()) self.assertEqual(NS(badger=['1', '2'], foo=True, spam=None), args) - self.assertRaises(ArgumentParserError, parser.parse_intermixed_args, '1 2'.split()) + self.assertRaisesRegex(argparse.ArgumentError, + 'one of the arguments --foo --spam is required', + parser.parse_intermixed_args, '1 2'.split()) self.assertEqual(group.required, True) - def test_exclusive_incompatible(self): - # mutually exclusive group including positional - fail - parser = ErrorRaisingArgumentParser(prog='PROG') + def test_required_exclusive_with_positional(self): + # required mutually exclusive group with positional argument + parser = argparse.ArgumentParser(prog='PROG', exit_on_error=False) group = parser.add_mutually_exclusive_group(required=True) group.add_argument('--foo', action='store_true', help='FOO') group.add_argument('--spam', help='SPAM') group.add_argument('badger', nargs='*', default='X', help='BADGER') - self.assertRaises(TypeError, parser.parse_intermixed_args, []) + args = parser.parse_intermixed_args(['--foo']) + self.assertEqual(NS(foo=True, spam=None, badger='X'), args) + args = parser.parse_intermixed_args(['a', 'b']) + self.assertEqual(NS(foo=False, spam=None, badger=['a', 'b']), args) + self.assertRaisesRegex(argparse.ArgumentError, + 'one of the arguments --foo --spam badger is required', + parser.parse_intermixed_args, []) + self.assertRaisesRegex(argparse.ArgumentError, + 'argument badger: not allowed with argument --foo', + parser.parse_intermixed_args, ['--foo', 'a', 'b']) + self.assertRaisesRegex(argparse.ArgumentError, + 'argument badger: not allowed with argument --foo', + parser.parse_intermixed_args, ['a', '--foo', 'b']) self.assertEqual(group.required, True) def test_invalid_args(self): parser = ErrorRaisingArgumentParser(prog='PROG') self.assertRaises(ArgumentParserError, parser.parse_intermixed_args, ['a']) - parser = ErrorRaisingArgumentParser(prog='PROG') - parser.add_argument('--foo', nargs="*") - parser.add_argument('foo') - with self.assertWarns(UserWarning): - parser.parse_intermixed_args(['hello', '--foo']) class TestIntermixedMessageContentError(TestCase): # case where Intermixed gives different error message @@ -6485,7 +6507,7 @@ def test_missing_argument_name_in_message(self): with self.assertRaises(ArgumentParserError) as cm: parser.parse_intermixed_args([]) msg = str(cm.exception) - self.assertNotRegex(msg, 'req_pos') + self.assertRegex(msg, 'req_pos') self.assertRegex(msg, 'req_opt') # ========================== diff --git a/Misc/NEWS.d/next/Library/2024-10-22-13-28-00.gh-issue-125355.zssHm_.rst b/Misc/NEWS.d/next/Library/2024-10-22-13-28-00.gh-issue-125355.zssHm_.rst new file mode 100644 index 00000000000000..fd67f697641d92 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2024-10-22-13-28-00.gh-issue-125355.zssHm_.rst @@ -0,0 +1,7 @@ +Fix several bugs in :meth:`argparse.ArgumentParser.parse_intermixed_args`. + +* The parser no longer changes temporarily during parsing. +* Default values are not processed twice. +* Required mutually exclusive groups containing positional arguments are now supported. +* The missing arguments report now includes the names of all required optional and positional arguments. +* Unknown options can be intermixed with positional arguments in parse_known_intermixed_args(). From 91ddde4af0c3031c84a967bcf59f6fb4f8a48c0d Mon Sep 17 00:00:00 2001 From: Adam Turner <9087854+AA-Turner@users.noreply.github.com> Date: Tue, 22 Oct 2024 14:07:09 +0100 Subject: [PATCH 26/36] Doc: Show object descriptions in the table of contents (#125757) --- Doc/conf.py | 3 ++- Doc/tools/extensions/pyspecific.py | 1 + Doc/tools/static/sidebar-wrap.css | 6 ++++++ 3 files changed, 9 insertions(+), 1 deletion(-) create mode 100644 Doc/tools/static/sidebar-wrap.css diff --git a/Doc/conf.py b/Doc/conf.py index db8fb9a9a68c6b..7ee3c91581345d 100644 --- a/Doc/conf.py +++ b/Doc/conf.py @@ -94,7 +94,8 @@ # Create table of contents entries for domain objects (e.g. functions, classes, # attributes, etc.). Default is True. -toc_object_entries = False +toc_object_entries = True +toc_object_entries_show_parents = 'hide' # Ignore any .rst files in the includes/ directory; # they're embedded in pages but not rendered individually. diff --git a/Doc/tools/extensions/pyspecific.py b/Doc/tools/extensions/pyspecific.py index bcb8a421e32d09..f4df7ec0839339 100644 --- a/Doc/tools/extensions/pyspecific.py +++ b/Doc/tools/extensions/pyspecific.py @@ -434,5 +434,6 @@ def setup(app): app.add_directive_to_domain('py', 'awaitablemethod', PyAwaitableMethod) app.add_directive_to_domain('py', 'abstractmethod', PyAbstractMethod) app.add_directive('miscnews', MiscNews) + app.add_css_file('sidebar-wrap.css') app.connect('env-check-consistency', patch_pairindextypes) return {'version': '1.0', 'parallel_read_safe': True} diff --git a/Doc/tools/static/sidebar-wrap.css b/Doc/tools/static/sidebar-wrap.css new file mode 100644 index 00000000000000..0a80f516f28349 --- /dev/null +++ b/Doc/tools/static/sidebar-wrap.css @@ -0,0 +1,6 @@ +div.sphinxsidebarwrapper { + overflow-x: scroll; +} +div.sphinxsidebarwrapper li code { + overflow-wrap: normal; +} From 079875e39589eb0628b5883f7ffa387e7476ec06 Mon Sep 17 00:00:00 2001 From: Mikhail Efimov Date: Tue, 22 Oct 2024 19:00:25 +0300 Subject: [PATCH 27/36] gh-125038: Fix crash after genexpr.gi_frame.f_locals manipulations (#125178) --- Lib/test/test_dis.py | 1 + Lib/test/test_generators.py | 73 +++++++++++++++++++ ...-10-09-13-53-50.gh-issue-125038.ffSLCz.rst | 2 + Python/codegen.c | 1 + 4 files changed, 77 insertions(+) create mode 100644 Misc/NEWS.d/next/Core_and_Builtins/2024-10-09-13-53-50.gh-issue-125038.ffSLCz.rst diff --git a/Lib/test/test_dis.py b/Lib/test/test_dis.py index 1ee0fbe98914be..1f9c04cdbc926c 100644 --- a/Lib/test/test_dis.py +++ b/Lib/test/test_dis.py @@ -810,6 +810,7 @@ def foo(x): POP_TOP L1: RESUME 0 LOAD_FAST 0 (.0) + GET_ITER L2: FOR_ITER 10 (to L3) STORE_FAST 1 (z) LOAD_DEREF 2 (x) diff --git a/Lib/test/test_generators.py b/Lib/test/test_generators.py index 03a31ec6a05726..bf2cb1160723b0 100644 --- a/Lib/test/test_generators.py +++ b/Lib/test/test_generators.py @@ -268,6 +268,79 @@ def loop(): #This should not raise loop() + +class ModifyUnderlyingIterableTest(unittest.TestCase): + iterables = [ + range(0), + range(20), + [1, 2, 3], + (2,), + {13, 48, 211}, + frozenset((15, 8, 6)), + {1: 2, 3: 4}, + ] + + non_iterables = [ + None, + 42, + 3.0, + 2j, + ] + + def genexpr(self): + return (x for x in range(10)) + + def genfunc(self): + def gen(it): + for x in it: + yield x + return gen(range(10)) + + def process_tests(self, get_generator): + for obj in self.iterables: + g_obj = get_generator(obj) + with self.subTest(g_obj=g_obj, obj=obj): + self.assertListEqual(list(g_obj), list(obj)) + + g_iter = get_generator(iter(obj)) + with self.subTest(g_iter=g_iter, obj=obj): + self.assertListEqual(list(g_iter), list(obj)) + + err_regex = "'.*' object is not iterable" + for obj in self.non_iterables: + g_obj = get_generator(obj) + with self.subTest(g_obj=g_obj): + self.assertRaisesRegex(TypeError, err_regex, list, g_obj) + + def test_modify_f_locals(self): + def modify_f_locals(g, local, obj): + g.gi_frame.f_locals[local] = obj + return g + + def get_generator_genexpr(obj): + return modify_f_locals(self.genexpr(), '.0', obj) + + def get_generator_genfunc(obj): + return modify_f_locals(self.genfunc(), 'it', obj) + + self.process_tests(get_generator_genexpr) + self.process_tests(get_generator_genfunc) + + def test_new_gen_from_gi_code(self): + def new_gen_from_gi_code(g, obj): + generator_func = types.FunctionType(g.gi_code, {}) + return generator_func(obj) + + def get_generator_genexpr(obj): + return new_gen_from_gi_code(self.genexpr(), obj) + + def get_generator_genfunc(obj): + return new_gen_from_gi_code(self.genfunc(), obj) + + self.process_tests(get_generator_genexpr) + self.process_tests(get_generator_genfunc) + + class ExceptionTest(unittest.TestCase): # Tests for the issue #23353: check that the currently handled exception # is correctly saved/restored in PyEval_EvalFrameEx(). diff --git a/Misc/NEWS.d/next/Core_and_Builtins/2024-10-09-13-53-50.gh-issue-125038.ffSLCz.rst b/Misc/NEWS.d/next/Core_and_Builtins/2024-10-09-13-53-50.gh-issue-125038.ffSLCz.rst new file mode 100644 index 00000000000000..15de48ec0e4450 --- /dev/null +++ b/Misc/NEWS.d/next/Core_and_Builtins/2024-10-09-13-53-50.gh-issue-125038.ffSLCz.rst @@ -0,0 +1,2 @@ +Fix crash when iterating over a generator expression after direct changes on ``gi_frame.f_locals``. +Patch by Mikhail Efimov. diff --git a/Python/codegen.c b/Python/codegen.c index 689d2b5124e9d3..bfacc6f0c55593 100644 --- a/Python/codegen.c +++ b/Python/codegen.c @@ -4164,6 +4164,7 @@ codegen_sync_comprehension_generator(compiler *c, location loc, if (IS_JUMP_TARGET_LABEL(start)) { depth++; + ADDOP(c, LOC(gen->iter), GET_ITER); USE_LABEL(c, start); ADDOP_JUMP(c, LOC(gen->iter), FOR_ITER, anchor); } From aaed91cabcedc16c089c4b1c9abb1114659a83d3 Mon Sep 17 00:00:00 2001 From: Ethan Furman Date: Tue, 22 Oct 2024 11:04:00 -0700 Subject: [PATCH 28/36] gh-125710: [Enum] fix hashable<->nonhashable comparisons for member values (GH-125735) --- Lib/enum.py | 26 ++++++++++++++----- Lib/test/test_enum.py | 7 +++++ ...-10-19-13-37-37.gh-issue-125710.FyFAAr.rst | 1 + 3 files changed, 28 insertions(+), 6 deletions(-) create mode 100644 Misc/NEWS.d/next/Library/2024-10-19-13-37-37.gh-issue-125710.FyFAAr.rst diff --git a/Lib/enum.py b/Lib/enum.py index 17d72738792982..4f9912229603a6 100644 --- a/Lib/enum.py +++ b/Lib/enum.py @@ -327,6 +327,8 @@ def __set_name__(self, enum_class, member_name): # to the map, and by-value lookups for this value will be # linear. enum_class._value2member_map_.setdefault(value, enum_member) + if value not in enum_class._hashable_values_: + enum_class._hashable_values_.append(value) except TypeError: # keep track of the value in a list so containment checks are quick enum_class._unhashable_values_.append(value) @@ -538,7 +540,8 @@ def __new__(metacls, cls, bases, classdict, *, boundary=None, _simple=False, **k classdict['_member_names_'] = [] classdict['_member_map_'] = {} classdict['_value2member_map_'] = {} - classdict['_unhashable_values_'] = [] + classdict['_hashable_values_'] = [] # for comparing with non-hashable types + classdict['_unhashable_values_'] = [] # e.g. frozenset() with set() classdict['_unhashable_values_map_'] = {} classdict['_member_type_'] = member_type # now set the __repr__ for the value @@ -748,7 +751,10 @@ def __contains__(cls, value): try: return value in cls._value2member_map_ except TypeError: - return value in cls._unhashable_values_ + return ( + value in cls._unhashable_values_ # both structures are lists + or value in cls._hashable_values_ + ) def __delattr__(cls, attr): # nicer error message when someone tries to delete an attribute @@ -1166,8 +1172,11 @@ def __new__(cls, value): pass except TypeError: # not there, now do long search -- O(n) behavior - for name, values in cls._unhashable_values_map_.items(): - if value in values: + for name, unhashable_values in cls._unhashable_values_map_.items(): + if value in unhashable_values: + return cls[name] + for name, member in cls._member_map_.items(): + if value == member._value_: return cls[name] # still not found -- verify that members exist, in-case somebody got here mistakenly # (such as via super when trying to override __new__) @@ -1233,6 +1242,7 @@ def _add_value_alias_(self, value): # to the map, and by-value lookups for this value will be # linear. cls._value2member_map_.setdefault(value, self) + cls._hashable_values_.append(value) except TypeError: # keep track of the value in a list so containment checks are quick cls._unhashable_values_.append(value) @@ -1763,6 +1773,7 @@ def convert_class(cls): body['_member_names_'] = member_names = [] body['_member_map_'] = member_map = {} body['_value2member_map_'] = value2member_map = {} + body['_hashable_values_'] = hashable_values = [] body['_unhashable_values_'] = unhashable_values = [] body['_unhashable_values_map_'] = {} body['_member_type_'] = member_type = etype._member_type_ @@ -1826,7 +1837,7 @@ def convert_class(cls): contained = value2member_map.get(member._value_) except TypeError: contained = None - if member._value_ in unhashable_values: + if member._value_ in unhashable_values or member.value in hashable_values: for m in enum_class: if m._value_ == member._value_: contained = m @@ -1846,6 +1857,7 @@ def convert_class(cls): else: enum_class._add_member_(name, member) value2member_map[value] = member + hashable_values.append(value) if _is_single_bit(value): # not a multi-bit alias, record in _member_names_ and _flag_mask_ member_names.append(name) @@ -1882,7 +1894,7 @@ def convert_class(cls): contained = value2member_map.get(member._value_) except TypeError: contained = None - if member._value_ in unhashable_values: + if member._value_ in unhashable_values or member._value_ in hashable_values: for m in enum_class: if m._value_ == member._value_: contained = m @@ -1908,6 +1920,8 @@ def convert_class(cls): # to the map, and by-value lookups for this value will be # linear. enum_class._value2member_map_.setdefault(value, member) + if value not in hashable_values: + hashable_values.append(value) except TypeError: # keep track of the value in a list so containment checks are quick enum_class._unhashable_values_.append(value) diff --git a/Lib/test/test_enum.py b/Lib/test/test_enum.py index 5b4a8070526fcf..7184769bfd6fc3 100644 --- a/Lib/test/test_enum.py +++ b/Lib/test/test_enum.py @@ -3460,6 +3460,13 @@ def test_empty_names(self): self.assertRaisesRegex(TypeError, '.int. object is not iterable', Enum, 'bad_enum', names=0) self.assertRaisesRegex(TypeError, '.int. object is not iterable', Enum, 'bad_enum', 0, type=int) + def test_nonhashable_matches_hashable(self): # issue 125710 + class Directions(Enum): + DOWN_ONLY = frozenset({"sc"}) + UP_ONLY = frozenset({"cs"}) + UNRESTRICTED = frozenset({"sc", "cs"}) + self.assertIs(Directions({"sc"}), Directions.DOWN_ONLY) + class TestOrder(unittest.TestCase): "test usage of the `_order_` attribute" diff --git a/Misc/NEWS.d/next/Library/2024-10-19-13-37-37.gh-issue-125710.FyFAAr.rst b/Misc/NEWS.d/next/Library/2024-10-19-13-37-37.gh-issue-125710.FyFAAr.rst new file mode 100644 index 00000000000000..8d5220e9889c3a --- /dev/null +++ b/Misc/NEWS.d/next/Library/2024-10-19-13-37-37.gh-issue-125710.FyFAAr.rst @@ -0,0 +1 @@ +[Enum] fix hashable<->nonhashable comparisons for member values From 34653bba644aa5481613f398153757d7357e39ea Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mario=20=C5=A0a=C5=A1ko?= Date: Tue, 22 Oct 2024 22:42:22 +0200 Subject: [PATCH 29/36] gh-125259: Fix error notes removal in enum initialization (GH-125647) --- Lib/enum.py | 16 +++++----------- Lib/test/test_enum.py | 19 +++++++++++++++++++ ...-10-17-16-10-29.gh-issue-125259.oMew0c.rst | 1 + 3 files changed, 25 insertions(+), 11 deletions(-) create mode 100644 Misc/NEWS.d/next/Library/2024-10-17-16-10-29.gh-issue-125259.oMew0c.rst diff --git a/Lib/enum.py b/Lib/enum.py index 4f9912229603a6..27be3fb83b2afb 100644 --- a/Lib/enum.py +++ b/Lib/enum.py @@ -557,22 +557,16 @@ def __new__(metacls, cls, bases, classdict, *, boundary=None, _simple=False, **k classdict['_all_bits_'] = 0 classdict['_inverted_'] = None try: - exc = None classdict['_%s__in_progress' % cls] = True enum_class = super().__new__(metacls, cls, bases, classdict, **kwds) classdict['_%s__in_progress' % cls] = False delattr(enum_class, '_%s__in_progress' % cls) except Exception as e: - # since 3.12 the line "Error calling __set_name__ on '_proto_member' instance ..." - # is tacked on to the error instead of raising a RuntimeError - # recreate the exception to discard - exc = type(e)(str(e)) - exc.__cause__ = e.__cause__ - exc.__context__ = e.__context__ - tb = e.__traceback__ - if exc is not None: - raise exc.with_traceback(tb) - # + # since 3.12 the note "Error calling __set_name__ on '_proto_member' instance ..." + # is tacked on to the error instead of raising a RuntimeError, so discard it + if hasattr(e, '__notes__'): + del e.__notes__ + raise # update classdict with any changes made by __init_subclass__ classdict.update(enum_class.__dict__) # diff --git a/Lib/test/test_enum.py b/Lib/test/test_enum.py index 7184769bfd6fc3..b9e13fb8c3585e 100644 --- a/Lib/test/test_enum.py +++ b/Lib/test/test_enum.py @@ -1888,6 +1888,25 @@ def test_wrong_inheritance_order(self): class Wrong(Enum, str): NotHere = 'error before this point' + def test_raise_custom_error_on_creation(self): + class InvalidRgbColorError(ValueError): + def __init__(self, r, g, b): + self.r = r + self.g = g + self.b = b + super().__init__(f'({r}, {g}, {b}) is not a valid RGB color') + + with self.assertRaises(InvalidRgbColorError): + class RgbColor(Enum): + RED = (255, 0, 0) + GREEN = (0, 255, 0) + BLUE = (0, 0, 255) + INVALID = (256, 0, 0) + + def __init__(self, r, g, b): + if not all(0 <= val <= 255 for val in (r, g, b)): + raise InvalidRgbColorError(r, g, b) + def test_intenum_transitivity(self): class number(IntEnum): one = 1 diff --git a/Misc/NEWS.d/next/Library/2024-10-17-16-10-29.gh-issue-125259.oMew0c.rst b/Misc/NEWS.d/next/Library/2024-10-17-16-10-29.gh-issue-125259.oMew0c.rst new file mode 100644 index 00000000000000..4fa6330abea512 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2024-10-17-16-10-29.gh-issue-125259.oMew0c.rst @@ -0,0 +1 @@ +Fix the notes removal logic for errors thrown in enum initialization. From c75ff2ef8eb71d91b1f92db9c2bc7ff18c582ab1 Mon Sep 17 00:00:00 2001 From: Jacob Walls Date: Wed, 23 Oct 2024 00:41:33 -0400 Subject: [PATCH 30/36] gh-80958: unittest: discovery support for namespace packages as start directory (#123820) --- Doc/library/unittest.rst | 35 +++++------ Doc/whatsnew/3.14.rst | 9 +++ .../namespace_test_pkg/bar/__init__.py | 0 .../namespace_test_pkg/bar/test_bar.py | 5 ++ .../namespace_test_pkg/noop/no2/__init__.py | 0 .../namespace_test_pkg/noop/no2/test_no2.py | 5 ++ .../namespace_test_pkg/noop/test_noop.py | 5 ++ .../namespace_test_pkg/test_foo.py | 5 ++ Lib/test/test_unittest/test_discovery.py | 54 ++++++++++++++++- Lib/unittest/loader.py | 59 ++++++++++++++----- Makefile.pre.in | 4 ++ ...4-09-07-13-57-49.gh-issue-80958.fVYnqV.rst | 1 + 12 files changed, 145 insertions(+), 37 deletions(-) create mode 100644 Lib/test/test_unittest/namespace_test_pkg/bar/__init__.py create mode 100644 Lib/test/test_unittest/namespace_test_pkg/bar/test_bar.py create mode 100644 Lib/test/test_unittest/namespace_test_pkg/noop/no2/__init__.py create mode 100644 Lib/test/test_unittest/namespace_test_pkg/noop/no2/test_no2.py create mode 100644 Lib/test/test_unittest/namespace_test_pkg/noop/test_noop.py create mode 100644 Lib/test/test_unittest/namespace_test_pkg/test_foo.py create mode 100644 Misc/NEWS.d/next/Library/2024-09-07-13-57-49.gh-issue-80958.fVYnqV.rst diff --git a/Doc/library/unittest.rst b/Doc/library/unittest.rst index c49aba69b12126..38bad9405597dd 100644 --- a/Doc/library/unittest.rst +++ b/Doc/library/unittest.rst @@ -340,28 +340,21 @@ Test modules and packages can customize test loading and discovery by through the `load_tests protocol`_. .. versionchanged:: 3.4 - Test discovery supports :term:`namespace packages ` - for the start directory. Note that you need to specify the top level - directory too (e.g. - ``python -m unittest discover -s root/namespace -t root``). + Test discovery supports :term:`namespace packages `. .. versionchanged:: 3.11 - :mod:`unittest` dropped the :term:`namespace packages ` - support in Python 3.11. It has been broken since Python 3.7. Start directory and - subdirectories containing tests must be regular package that have - ``__init__.py`` file. + Test discovery dropped the :term:`namespace packages ` + support. It has been broken since Python 3.7. + Start directory and its subdirectories containing tests must be regular + package that have ``__init__.py`` file. - Directories containing start directory still can be a namespace package. - In this case, you need to specify start directory as dotted package name, - and target directory explicitly. For example:: + If the start directory is the dotted name of the package, the ancestor packages + can be namespace packages. - # proj/ <-- current directory - # namespace/ - # mypkg/ - # __init__.py - # test_mypkg.py - - python -m unittest discover -s namespace.mypkg -t . +.. versionchanged:: 3.14 + Test discovery supports :term:`namespace package` as start directory again. + To avoid scanning directories unrelated to Python, + tests are not searched in subdirectories that do not contain ``__init__.py``. .. _organizing-tests: @@ -1915,10 +1908,8 @@ Loading and running tests Modules that raise :exc:`SkipTest` on import are recorded as skips, not errors. - .. versionchanged:: 3.4 *start_dir* can be a :term:`namespace packages `. - .. versionchanged:: 3.4 Paths are sorted before being imported so that execution order is the same even if the underlying file system's ordering is not dependent on file name. @@ -1930,11 +1921,13 @@ Loading and running tests .. versionchanged:: 3.11 *start_dir* can not be a :term:`namespace packages `. - It has been broken since Python 3.7 and Python 3.11 officially remove it. + It has been broken since Python 3.7, and Python 3.11 officially removes it. .. versionchanged:: 3.13 *top_level_dir* is only stored for the duration of *discover* call. + .. versionchanged:: 3.14 + *start_dir* can once again be a :term:`namespace package`. The following attributes of a :class:`TestLoader` can be configured either by subclassing or assignment on an instance: diff --git a/Doc/whatsnew/3.14.rst b/Doc/whatsnew/3.14.rst index d52faa614db94e..1dd6c19018934b 100644 --- a/Doc/whatsnew/3.14.rst +++ b/Doc/whatsnew/3.14.rst @@ -421,6 +421,15 @@ unicodedata * The Unicode database has been updated to Unicode 16.0.0. + +unittest +-------- + +* unittest discovery supports :term:`namespace package` as start + directory again. It was removed in Python 3.11. + (Contributed by Jacob Walls in :gh:`80958`.) + + .. Add improved modules above alphabetically, not here at the end. Optimizations diff --git a/Lib/test/test_unittest/namespace_test_pkg/bar/__init__.py b/Lib/test/test_unittest/namespace_test_pkg/bar/__init__.py new file mode 100644 index 00000000000000..e69de29bb2d1d6 diff --git a/Lib/test/test_unittest/namespace_test_pkg/bar/test_bar.py b/Lib/test/test_unittest/namespace_test_pkg/bar/test_bar.py new file mode 100644 index 00000000000000..05b184d9eba685 --- /dev/null +++ b/Lib/test/test_unittest/namespace_test_pkg/bar/test_bar.py @@ -0,0 +1,5 @@ +import unittest + +class PassingTest(unittest.TestCase): + def test_true(self): + self.assertTrue(True) diff --git a/Lib/test/test_unittest/namespace_test_pkg/noop/no2/__init__.py b/Lib/test/test_unittest/namespace_test_pkg/noop/no2/__init__.py new file mode 100644 index 00000000000000..e69de29bb2d1d6 diff --git a/Lib/test/test_unittest/namespace_test_pkg/noop/no2/test_no2.py b/Lib/test/test_unittest/namespace_test_pkg/noop/no2/test_no2.py new file mode 100644 index 00000000000000..05b184d9eba685 --- /dev/null +++ b/Lib/test/test_unittest/namespace_test_pkg/noop/no2/test_no2.py @@ -0,0 +1,5 @@ +import unittest + +class PassingTest(unittest.TestCase): + def test_true(self): + self.assertTrue(True) diff --git a/Lib/test/test_unittest/namespace_test_pkg/noop/test_noop.py b/Lib/test/test_unittest/namespace_test_pkg/noop/test_noop.py new file mode 100644 index 00000000000000..05b184d9eba685 --- /dev/null +++ b/Lib/test/test_unittest/namespace_test_pkg/noop/test_noop.py @@ -0,0 +1,5 @@ +import unittest + +class PassingTest(unittest.TestCase): + def test_true(self): + self.assertTrue(True) diff --git a/Lib/test/test_unittest/namespace_test_pkg/test_foo.py b/Lib/test/test_unittest/namespace_test_pkg/test_foo.py new file mode 100644 index 00000000000000..05b184d9eba685 --- /dev/null +++ b/Lib/test/test_unittest/namespace_test_pkg/test_foo.py @@ -0,0 +1,5 @@ +import unittest + +class PassingTest(unittest.TestCase): + def test_true(self): + self.assertTrue(True) diff --git a/Lib/test/test_unittest/test_discovery.py b/Lib/test/test_unittest/test_discovery.py index a44b18406c08be..38c9779daaf87d 100644 --- a/Lib/test/test_unittest/test_discovery.py +++ b/Lib/test/test_unittest/test_discovery.py @@ -4,12 +4,14 @@ import sys import types import pickle +from importlib._bootstrap_external import NamespaceLoader from test import support from test.support import import_helper import unittest import unittest.mock import test.test_unittest +from test.test_importlib import util as test_util class TestableTestProgram(unittest.TestProgram): @@ -395,7 +397,7 @@ def restore_isdir(): self.addCleanup(restore_isdir) _find_tests_args = [] - def _find_tests(start_dir, pattern): + def _find_tests(start_dir, pattern, namespace=None): _find_tests_args.append((start_dir, pattern)) return ['tests'] loader._find_tests = _find_tests @@ -815,7 +817,7 @@ def test_discovery_from_dotted_path(self): expectedPath = os.path.abspath(os.path.dirname(test.test_unittest.__file__)) self.wasRun = False - def _find_tests(start_dir, pattern): + def _find_tests(start_dir, pattern, namespace=None): self.wasRun = True self.assertEqual(start_dir, expectedPath) return tests @@ -848,6 +850,54 @@ def restore(): 'Can not use builtin modules ' 'as dotted module names') + def test_discovery_from_dotted_namespace_packages(self): + loader = unittest.TestLoader() + + package = types.ModuleType('package') + package.__name__ = "tests" + package.__path__ = ['/a', '/b'] + package.__file__ = None + package.__spec__ = types.SimpleNamespace( + name=package.__name__, + loader=NamespaceLoader(package.__name__, package.__path__, None), + submodule_search_locations=['/a', '/b'] + ) + + def _import(packagename, *args, **kwargs): + sys.modules[packagename] = package + return package + + _find_tests_args = [] + def _find_tests(start_dir, pattern, namespace=None): + _find_tests_args.append((start_dir, pattern)) + return ['%s/tests' % start_dir] + + loader._find_tests = _find_tests + loader.suiteClass = list + + with unittest.mock.patch('builtins.__import__', _import): + # Since loader.discover() can modify sys.path, restore it when done. + with import_helper.DirsOnSysPath(): + # Make sure to remove 'package' from sys.modules when done. + with test_util.uncache('package'): + suite = loader.discover('package') + + self.assertEqual(suite, ['/a/tests', '/b/tests']) + + def test_discovery_start_dir_is_namespace(self): + """Subdirectory discovery not affected if start_dir is a namespace pkg.""" + loader = unittest.TestLoader() + with ( + import_helper.DirsOnSysPath(os.path.join(os.path.dirname(__file__))), + test_util.uncache('namespace_test_pkg') + ): + suite = loader.discover('namespace_test_pkg') + self.assertEqual( + {list(suite)[0]._tests[0].__module__ for suite in suite._tests if list(suite)}, + # files under namespace_test_pkg.noop not discovered. + {'namespace_test_pkg.test_foo', 'namespace_test_pkg.bar.test_bar'}, + ) + def test_discovery_failed_discovery(self): from test.test_importlib import util diff --git a/Lib/unittest/loader.py b/Lib/unittest/loader.py index 22797b83a68bc8..a52950dad224ee 100644 --- a/Lib/unittest/loader.py +++ b/Lib/unittest/loader.py @@ -274,6 +274,8 @@ def discover(self, start_dir, pattern='test*.py', top_level_dir=None): self._top_level_dir = top_level_dir is_not_importable = False + is_namespace = False + tests = [] if os.path.isdir(os.path.abspath(start_dir)): start_dir = os.path.abspath(start_dir) if start_dir != top_level_dir: @@ -286,12 +288,25 @@ def discover(self, start_dir, pattern='test*.py', top_level_dir=None): is_not_importable = True else: the_module = sys.modules[start_dir] - top_part = start_dir.split('.')[0] - try: - start_dir = os.path.abspath( - os.path.dirname((the_module.__file__))) - except AttributeError: - if the_module.__name__ in sys.builtin_module_names: + if not hasattr(the_module, "__file__") or the_module.__file__ is None: + # look for namespace packages + try: + spec = the_module.__spec__ + except AttributeError: + spec = None + + if spec and spec.submodule_search_locations is not None: + is_namespace = True + + for path in the_module.__path__: + if (not set_implicit_top and + not path.startswith(top_level_dir)): + continue + self._top_level_dir = \ + (path.split(the_module.__name__ + .replace(".", os.path.sep))[0]) + tests.extend(self._find_tests(path, pattern, namespace=True)) + elif the_module.__name__ in sys.builtin_module_names: # builtin module raise TypeError('Can not use builtin modules ' 'as dotted module names') from None @@ -300,14 +315,27 @@ def discover(self, start_dir, pattern='test*.py', top_level_dir=None): f"don't know how to discover from {the_module!r}" ) from None + else: + top_part = start_dir.split('.')[0] + start_dir = os.path.abspath(os.path.dirname((the_module.__file__))) + if set_implicit_top: - self._top_level_dir = self._get_directory_containing_module(top_part) + if not is_namespace: + if sys.modules[top_part].__file__ is None: + self._top_level_dir = os.path.dirname(the_module.__file__) + if self._top_level_dir not in sys.path: + sys.path.insert(0, self._top_level_dir) + else: + self._top_level_dir = \ + self._get_directory_containing_module(top_part) sys.path.remove(top_level_dir) if is_not_importable: raise ImportError('Start directory is not importable: %r' % start_dir) - tests = list(self._find_tests(start_dir, pattern)) + if not is_namespace: + tests = list(self._find_tests(start_dir, pattern)) + self._top_level_dir = original_top_level_dir return self.suiteClass(tests) @@ -343,7 +371,7 @@ def _match_path(self, path, full_path, pattern): # override this method to use alternative matching strategy return fnmatch(path, pattern) - def _find_tests(self, start_dir, pattern): + def _find_tests(self, start_dir, pattern, namespace=False): """Used by discovery. Yields test suites it loads.""" # Handle the __init__ in this package name = self._get_name_from_path(start_dir) @@ -352,7 +380,8 @@ def _find_tests(self, start_dir, pattern): if name != '.' and name not in self._loading_packages: # name is in self._loading_packages while we have called into # loadTestsFromModule with name. - tests, should_recurse = self._find_test_path(start_dir, pattern) + tests, should_recurse = self._find_test_path( + start_dir, pattern, namespace) if tests is not None: yield tests if not should_recurse: @@ -363,7 +392,8 @@ def _find_tests(self, start_dir, pattern): paths = sorted(os.listdir(start_dir)) for path in paths: full_path = os.path.join(start_dir, path) - tests, should_recurse = self._find_test_path(full_path, pattern) + tests, should_recurse = self._find_test_path( + full_path, pattern, False) if tests is not None: yield tests if should_recurse: @@ -371,11 +401,11 @@ def _find_tests(self, start_dir, pattern): name = self._get_name_from_path(full_path) self._loading_packages.add(name) try: - yield from self._find_tests(full_path, pattern) + yield from self._find_tests(full_path, pattern, False) finally: self._loading_packages.discard(name) - def _find_test_path(self, full_path, pattern): + def _find_test_path(self, full_path, pattern, namespace=False): """Used by discovery. Loads tests from a single file, or a directories' __init__.py when @@ -419,7 +449,8 @@ def _find_test_path(self, full_path, pattern): msg % (mod_name, module_dir, expected_dir)) return self.loadTestsFromModule(module, pattern=pattern), False elif os.path.isdir(full_path): - if not os.path.isfile(os.path.join(full_path, '__init__.py')): + if (not namespace and + not os.path.isfile(os.path.join(full_path, '__init__.py'))): return None, False load_tests = None diff --git a/Makefile.pre.in b/Makefile.pre.in index fb6f22d57397db..d6f75a931a3db2 100644 --- a/Makefile.pre.in +++ b/Makefile.pre.in @@ -2534,6 +2534,10 @@ TESTSUBDIRS= idlelib/idle_test \ test/test_tools \ test/test_ttk \ test/test_unittest \ + test/test_unittest/namespace_test_pkg \ + test/test_unittest/namespace_test_pkg/bar \ + test/test_unittest/namespace_test_pkg/noop \ + test/test_unittest/namespace_test_pkg/noop/no2 \ test/test_unittest/testmock \ test/test_warnings \ test/test_warnings/data \ diff --git a/Misc/NEWS.d/next/Library/2024-09-07-13-57-49.gh-issue-80958.fVYnqV.rst b/Misc/NEWS.d/next/Library/2024-09-07-13-57-49.gh-issue-80958.fVYnqV.rst new file mode 100644 index 00000000000000..f0edd7b1ac6e8b --- /dev/null +++ b/Misc/NEWS.d/next/Library/2024-09-07-13-57-49.gh-issue-80958.fVYnqV.rst @@ -0,0 +1 @@ +unittest discovery supports PEP 420 namespace packages as start directory again. From 834ba5aaf21ac7fd123534dae8f9e478ee526aaa Mon Sep 17 00:00:00 2001 From: Serhiy Storchaka Date: Wed, 23 Oct 2024 10:50:29 +0300 Subject: [PATCH 31/36] gh-58032: Deprecate the argparse.FileType type converter (GH-124664) --- .../pending-removal-in-future.rst | 21 +++---- Doc/library/argparse.rst | 25 +++++--- Doc/whatsnew/3.14.rst | 6 ++ Lib/argparse.py | 18 ++++-- Lib/test/test_argparse.py | 57 ++++++++++++------- ...4-09-27-13-10-17.gh-issue-58032.0aNAQ0.rst | 1 + 6 files changed, 83 insertions(+), 45 deletions(-) create mode 100644 Misc/NEWS.d/next/Library/2024-09-27-13-10-17.gh-issue-58032.0aNAQ0.rst diff --git a/Doc/deprecations/pending-removal-in-future.rst b/Doc/deprecations/pending-removal-in-future.rst index d77fc86eab0ed6..5a4502ac08a5f0 100644 --- a/Doc/deprecations/pending-removal-in-future.rst +++ b/Doc/deprecations/pending-removal-in-future.rst @@ -4,16 +4,6 @@ Pending removal in future versions The following APIs will be removed in the future, although there is currently no date scheduled for their removal. -* :mod:`argparse`: - - * Nesting argument groups and nesting mutually exclusive - groups are deprecated. - * Passing the undocumented keyword argument *prefix_chars* to - :meth:`~argparse.ArgumentParser.add_argument_group` is now - deprecated. - -* :mod:`array`'s ``'u'`` format code (:gh:`57281`) - * :mod:`builtins`: * ``bool(NotImplemented)``. @@ -43,6 +33,17 @@ although there is currently no date scheduled for their removal. as a single positional argument. (Contributed by Serhiy Storchaka in :gh:`109218`.) +* :mod:`argparse`: + + * Nesting argument groups and nesting mutually exclusive + groups are deprecated. + * Passing the undocumented keyword argument *prefix_chars* to + :meth:`~argparse.ArgumentParser.add_argument_group` is now + deprecated. + * The :class:`argparse.FileType` type converter is deprecated. + +* :mod:`array`'s ``'u'`` format code (:gh:`57281`) + * :mod:`calendar`: ``calendar.January`` and ``calendar.February`` constants are deprecated and replaced by :data:`calendar.JANUARY` and :data:`calendar.FEBRUARY`. diff --git a/Doc/library/argparse.rst b/Doc/library/argparse.rst index ef0db3e9789c98..65663d43f50a9d 100644 --- a/Doc/library/argparse.rst +++ b/Doc/library/argparse.rst @@ -865,16 +865,14 @@ See also :ref:`specifying-ambiguous-arguments`. The supported values are: output files:: >>> parser = argparse.ArgumentParser() - >>> parser.add_argument('infile', nargs='?', type=argparse.FileType('r'), - ... default=sys.stdin) - >>> parser.add_argument('outfile', nargs='?', type=argparse.FileType('w'), - ... default=sys.stdout) + >>> parser.add_argument('infile', nargs='?') + >>> parser.add_argument('outfile', nargs='?') >>> parser.parse_args(['input.txt', 'output.txt']) - Namespace(infile=<_io.TextIOWrapper name='input.txt' encoding='UTF-8'>, - outfile=<_io.TextIOWrapper name='output.txt' encoding='UTF-8'>) + Namespace(infile='input.txt', outfile='output.txt') + >>> parser.parse_args(['input.txt']) + Namespace(infile='input.txt', outfile=None) >>> parser.parse_args([]) - Namespace(infile=<_io.TextIOWrapper name='' encoding='UTF-8'>, - outfile=<_io.TextIOWrapper name='' encoding='UTF-8'>) + Namespace(infile=None, outfile=None) .. index:: single: * (asterisk); in argparse module @@ -1033,7 +1031,6 @@ Common built-in types and functions can be used as type converters: parser.add_argument('distance', type=float) parser.add_argument('street', type=ascii) parser.add_argument('code_point', type=ord) - parser.add_argument('dest_file', type=argparse.FileType('w', encoding='latin-1')) parser.add_argument('datapath', type=pathlib.Path) User defined functions can be used as well: @@ -1827,9 +1824,19 @@ FileType objects >>> parser.parse_args(['-']) Namespace(infile=<_io.TextIOWrapper name='' encoding='UTF-8'>) + .. note:: + + If one argument uses *FileType* and then a subsequent argument fails, + an error is reported but the file is not automatically closed. + This can also clobber the output files. + In this case, it would be better to wait until after the parser has + run and then use the :keyword:`with`-statement to manage the files. + .. versionchanged:: 3.4 Added the *encodings* and *errors* parameters. + .. deprecated:: 3.14 + Argument groups ^^^^^^^^^^^^^^^ diff --git a/Doc/whatsnew/3.14.rst b/Doc/whatsnew/3.14.rst index 1dd6c19018934b..b389e6da4c0ac3 100644 --- a/Doc/whatsnew/3.14.rst +++ b/Doc/whatsnew/3.14.rst @@ -464,6 +464,12 @@ Deprecated as a single positional argument. (Contributed by Serhiy Storchaka in :gh:`109218`.) +* :mod:`argparse`: + Deprecated the :class:`argparse.FileType` type converter. + Anything with resource management should be done downstream after the + arguments are parsed. + (Contributed by Serhiy Storchaka in :gh:`58032`.) + * :mod:`multiprocessing` and :mod:`concurrent.futures`: The default start method (see :ref:`multiprocessing-start-methods`) changed away from *fork* to *forkserver* on platforms where it was not already diff --git a/Lib/argparse.py b/Lib/argparse.py index 024622bec17c3b..9746173984c6ca 100644 --- a/Lib/argparse.py +++ b/Lib/argparse.py @@ -18,11 +18,12 @@ 'integers', metavar='int', nargs='+', type=int, help='an integer to be summed') parser.add_argument( - '--log', default=sys.stdout, type=argparse.FileType('w'), + '--log', help='the file where the sum should be written') args = parser.parse_args() - args.log.write('%s' % sum(args.integers)) - args.log.close() + with (open(args.log, 'w') if args.log is not None + else contextlib.nullcontext(sys.stdout)) as log: + log.write('%s' % sum(args.integers)) The module contains the following public classes: @@ -39,7 +40,8 @@ - FileType -- A factory for defining types of files to be created. As the example above shows, instances of FileType are typically passed as - the type= argument of add_argument() calls. + the type= argument of add_argument() calls. Deprecated since + Python 3.14. - Action -- The base class for parser actions. Typically actions are selected by passing strings like 'store_true' or 'append_const' to @@ -1252,7 +1254,7 @@ def __call__(self, parser, namespace, values, option_string=None): # ============== class FileType(object): - """Factory for creating file object types + """Deprecated factory for creating file object types Instances of FileType are typically passed as type= arguments to the ArgumentParser add_argument() method. @@ -1269,6 +1271,12 @@ class FileType(object): """ def __init__(self, mode='r', bufsize=-1, encoding=None, errors=None): + import warnings + warnings.warn( + "FileType is deprecated. Simply open files after parsing arguments.", + category=PendingDeprecationWarning, + stacklevel=2 + ) self._mode = mode self._bufsize = bufsize self._encoding = encoding diff --git a/Lib/test/test_argparse.py b/Lib/test/test_argparse.py index 4bd7a935b9b757..ed1c5c34e526aa 100644 --- a/Lib/test/test_argparse.py +++ b/Lib/test/test_argparse.py @@ -1773,27 +1773,43 @@ def convert_arg_line_to_args(self, arg_line): # Type conversion tests # ===================== +def FileType(*args, **kwargs): + with warnings.catch_warnings(): + warnings.filterwarnings('ignore', 'FileType is deprecated', + PendingDeprecationWarning, __name__) + return argparse.FileType(*args, **kwargs) + + +class TestFileTypeDeprecation(TestCase): + + def test(self): + with self.assertWarns(PendingDeprecationWarning) as cm: + argparse.FileType() + self.assertIn('FileType is deprecated', str(cm.warning)) + self.assertEqual(cm.filename, __file__) + + class TestFileTypeRepr(TestCase): def test_r(self): - type = argparse.FileType('r') + type = FileType('r') self.assertEqual("FileType('r')", repr(type)) def test_wb_1(self): - type = argparse.FileType('wb', 1) + type = FileType('wb', 1) self.assertEqual("FileType('wb', 1)", repr(type)) def test_r_latin(self): - type = argparse.FileType('r', encoding='latin_1') + type = FileType('r', encoding='latin_1') self.assertEqual("FileType('r', encoding='latin_1')", repr(type)) def test_w_big5_ignore(self): - type = argparse.FileType('w', encoding='big5', errors='ignore') + type = FileType('w', encoding='big5', errors='ignore') self.assertEqual("FileType('w', encoding='big5', errors='ignore')", repr(type)) def test_r_1_replace(self): - type = argparse.FileType('r', 1, errors='replace') + type = FileType('r', 1, errors='replace') self.assertEqual("FileType('r', 1, errors='replace')", repr(type)) @@ -1847,7 +1863,6 @@ def __eq__(self, other): text = text.decode('ascii') return self.name == other.name == text - class TestFileTypeR(TempDirMixin, ParserTestCase): """Test the FileType option/argument type for reading files""" @@ -1860,8 +1875,8 @@ def setUp(self): self.create_readonly_file('readonly') argument_signatures = [ - Sig('-x', type=argparse.FileType()), - Sig('spam', type=argparse.FileType('r')), + Sig('-x', type=FileType()), + Sig('spam', type=FileType('r')), ] failures = ['-x', '', 'non-existent-file.txt'] successes = [ @@ -1881,7 +1896,7 @@ def setUp(self): file.close() argument_signatures = [ - Sig('-c', type=argparse.FileType('r'), default='no-file.txt'), + Sig('-c', type=FileType('r'), default='no-file.txt'), ] # should provoke no such file error failures = [''] @@ -1900,8 +1915,8 @@ def setUp(self): file.write(file_name) argument_signatures = [ - Sig('-x', type=argparse.FileType('rb')), - Sig('spam', type=argparse.FileType('rb')), + Sig('-x', type=FileType('rb')), + Sig('spam', type=FileType('rb')), ] failures = ['-x', ''] successes = [ @@ -1939,8 +1954,8 @@ def setUp(self): self.create_writable_file('writable') argument_signatures = [ - Sig('-x', type=argparse.FileType('w')), - Sig('spam', type=argparse.FileType('w')), + Sig('-x', type=FileType('w')), + Sig('spam', type=FileType('w')), ] failures = ['-x', '', 'readonly'] successes = [ @@ -1962,8 +1977,8 @@ def setUp(self): self.create_writable_file('writable') argument_signatures = [ - Sig('-x', type=argparse.FileType('x')), - Sig('spam', type=argparse.FileType('x')), + Sig('-x', type=FileType('x')), + Sig('spam', type=FileType('x')), ] failures = ['-x', '', 'readonly', 'writable'] successes = [ @@ -1977,8 +1992,8 @@ class TestFileTypeWB(TempDirMixin, ParserTestCase): """Test the FileType option/argument type for writing binary files""" argument_signatures = [ - Sig('-x', type=argparse.FileType('wb')), - Sig('spam', type=argparse.FileType('wb')), + Sig('-x', type=FileType('wb')), + Sig('spam', type=FileType('wb')), ] failures = ['-x', ''] successes = [ @@ -1994,8 +2009,8 @@ class TestFileTypeXB(TestFileTypeX): "Test the FileType option/argument type for writing new binary files only" argument_signatures = [ - Sig('-x', type=argparse.FileType('xb')), - Sig('spam', type=argparse.FileType('xb')), + Sig('-x', type=FileType('xb')), + Sig('spam', type=FileType('xb')), ] successes = [ ('-x foo bar', NS(x=WFile('foo'), spam=WFile('bar'))), @@ -2007,7 +2022,7 @@ class TestFileTypeOpenArgs(TestCase): """Test that open (the builtin) is correctly called""" def test_open_args(self): - FT = argparse.FileType + FT = FileType cases = [ (FT('rb'), ('rb', -1, None, None)), (FT('w', 1), ('w', 1, None, None)), @@ -2022,7 +2037,7 @@ def test_open_args(self): def test_invalid_file_type(self): with self.assertRaises(ValueError): - argparse.FileType('b')('-test') + FileType('b')('-test') class TestFileTypeMissingInitialization(TestCase): diff --git a/Misc/NEWS.d/next/Library/2024-09-27-13-10-17.gh-issue-58032.0aNAQ0.rst b/Misc/NEWS.d/next/Library/2024-09-27-13-10-17.gh-issue-58032.0aNAQ0.rst new file mode 100644 index 00000000000000..278512b22a8d3f --- /dev/null +++ b/Misc/NEWS.d/next/Library/2024-09-27-13-10-17.gh-issue-58032.0aNAQ0.rst @@ -0,0 +1 @@ +Deprecate the :class:`argparse.FileType` type converter. From de0d5c6e2e12f24ade1ccc457afaf5fb2c650c64 Mon Sep 17 00:00:00 2001 From: Irit Katriel <1055913+iritkatriel@users.noreply.github.com> Date: Wed, 23 Oct 2024 14:48:39 +0100 Subject: [PATCH 32/36] gh-119786: move 'changing grammar' checklist from devguide to InternalDocs (#125874) --- InternalDocs/README.md | 2 + InternalDocs/changing_grammar.md | 63 ++++++++++++++++++++++++++++++++ 2 files changed, 65 insertions(+) create mode 100644 InternalDocs/changing_grammar.md diff --git a/InternalDocs/README.md b/InternalDocs/README.md index 48c893bde2a631..2ef6e653ac19d4 100644 --- a/InternalDocs/README.md +++ b/InternalDocs/README.md @@ -19,6 +19,8 @@ Compiling Python Source Code - [Compiler Design](compiler.md) +- [Changing Python's Grammar](changing_grammar.md) + Runtime Objects --- diff --git a/InternalDocs/changing_grammar.md b/InternalDocs/changing_grammar.md new file mode 100644 index 00000000000000..1a5eebdc1418dc --- /dev/null +++ b/InternalDocs/changing_grammar.md @@ -0,0 +1,63 @@ +# Changing CPython's grammar + +There's more to changing Python's grammar than editing +[`Grammar/python.gram`](../Grammar/python.gram). +Below is a checklist of things that may need to change. + +> [!NOTE] +> +> Many of these changes require re-generating some of the derived +> files. If things mysteriously don't work, it may help to run +> ``make clean``. + +## Checklist + +* [`Grammar/python.gram`](../Grammar/python.gram): The grammar definition, + with actions that build AST nodes. + After changing it, run ``make regen-pegen`` (or ``build.bat --regen`` on Windows), + to regenerate [`Parser/parser.c`](../Parser/parser.c). + (This runs Python's parser generator, [`Tools/peg_generator`](../Tools/peg_generator)). + +* [`Grammar/Tokens`](../Grammar/Tokens) is a place for adding new token types. After + changing it, run ``make regen-token`` to regenerate + [`Include/internal/pycore_token.h`](../Include/internal/pycore_token.h), + [`Parser/token.c`](../Parser/token.c), [`Lib/token.py`](../Lib/token.py) + and [`Doc/library/token-list.inc`](../Doc/library/token-list.inc). + If you change both ``python.gram`` and ``Tokens``, run ``make regen-token`` + before ``make regen-pegen``. + On Windows, ``build.bat --regen`` will regenerate both at the same time. + +* [`Parser/Python.asdl`](../Parser/Python.asdl) may need changes to match the grammar. + Then run ``make regen-ast`` to regenerate + [`Include/internal/pycore_ast.h`](../Include/internal/pycore_ast.h) and + [`Python/Python-ast.c`](../Python/Python-ast.c). + +* [`Parser/lexer/`](../Parser/lexer/) contains the tokenization code. + This is where you would add a new type of comment or string literal, for example. + +* [`Python/ast.c`](../Python/ast.c) will need changes to validate AST objects + involved with the grammar change. + +* [`Python/ast_unparse.c`](../Python/ast_unparse.c) will need changes to unparse + AST involved with the grammar change ("unparsing" is used to turn annotations + into strings per [PEP 563](https://peps.python.org/pep-0563/). + +* The [`compiler`](compiler.md) may need to change when there are changes + to the `AST`. + +* ``_Unparser`` in the [`Lib/ast.py`](../Lib/ast.py) file may need changes + to accommodate any modifications in the AST nodes. + +* [`Doc/library/ast.rst`](../Doc/library/ast.rst) may need to be updated + to reflect changes to AST nodes. + +* Add some usage of your new syntax to ``test_grammar.py``. + +* Certain changes may require tweaks to the library module + [`pyclbr`](https://docs.python.org/3/library/pyclbr.html#module-pyclbr). + +* [`Lib/tokenize.py`](../Lib/tokenize.py) needs changes to match changes + to the tokenizer. + +* Documentation must be written! Specifically, one or more of the pages in + [`Doc/reference/`](../Doc/reference/) will need to be updated. From 6f26d496d3c894970ee18a125e9100791ebc2b36 Mon Sep 17 00:00:00 2001 From: Eric Snow Date: Wed, 23 Oct 2024 10:10:06 -0600 Subject: [PATCH 33/36] gh-125286: Share the Main Refchain With Legacy Interpreters (gh-125709) They used to be shared, before 3.12. Returning to sharing them resolves a failure on Py_TRACE_REFS builds. Co-authored-by: Petr Viktorin --- Doc/library/sys.rst | 29 +++++++++++++ Doc/using/configure.rst | 2 +- Doc/whatsnew/3.14.rst | 9 ++++ Objects/object.c | 92 ++++++++++++++++++++--------------------- Objects/unicodeobject.c | 8 ---- Python/pylifecycle.c | 14 +++++++ Python/pystate.c | 6 +-- 7 files changed, 99 insertions(+), 61 deletions(-) diff --git a/Doc/library/sys.rst b/Doc/library/sys.rst index 20a06a1ecd1a4c..37f1719db607de 100644 --- a/Doc/library/sys.rst +++ b/Doc/library/sys.rst @@ -920,6 +920,35 @@ always available. It is not guaranteed to exist in all implementations of Python. +.. function:: getobjects(limit[, type]) + + This function only exists if CPython was built using the + specialized configure option :option:`--with-trace-refs`. + It is intended only for debugging garbage-collection issues. + + Return a list of up to *limit* dynamically allocated Python objects. + If *type* is given, only objects of that exact type (not subtypes) + are included. + + Objects from the list are not safe to use. + Specifically, the result will include objects from all interpreters that + share their object allocator state (that is, ones created with + :c:member:`PyInterpreterConfig.use_main_obmalloc` set to 1 + or using :c:func:`Py_NewInterpreter`, and the + :ref:`main interpreter `). + Mixing objects from different interpreters may lead to crashes + or other unexpected behavior. + + .. impl-detail:: + + This function should be used for specialized purposes only. + It is not guaranteed to exist in all implementations of Python. + + .. versionchanged:: next + + The result may include objects from other interpreters. + + .. function:: getprofile() .. index:: diff --git a/Doc/using/configure.rst b/Doc/using/configure.rst index 10cdf2376229ff..0e7b1be5b4bc2e 100644 --- a/Doc/using/configure.rst +++ b/Doc/using/configure.rst @@ -702,7 +702,7 @@ Debug options Effects: * Define the ``Py_TRACE_REFS`` macro. - * Add :func:`!sys.getobjects` function. + * Add :func:`sys.getobjects` function. * Add :envvar:`PYTHONDUMPREFS` environment variable. The :envvar:`PYTHONDUMPREFS` environment variable can be used to dump diff --git a/Doc/whatsnew/3.14.rst b/Doc/whatsnew/3.14.rst index b389e6da4c0ac3..64f3d18e7fc6a4 100644 --- a/Doc/whatsnew/3.14.rst +++ b/Doc/whatsnew/3.14.rst @@ -416,6 +416,15 @@ symtable (Contributed by Bénédikt Tran in :gh:`120029`.) + +sys +--- + +* The previously undocumented special function :func:`sys.getobjects`, + which only exists in specialized builds of Python, may now return objects + from other interpreters than the one it's called in. + + unicodedata ----------- diff --git a/Objects/object.c b/Objects/object.c index 1a15b70d3dc63f..7cc74a8dc0d8eb 100644 --- a/Objects/object.c +++ b/Objects/object.c @@ -171,6 +171,48 @@ _PyDebug_PrintTotalRefs(void) { #define REFCHAIN(interp) interp->object_state.refchain #define REFCHAIN_VALUE ((void*)(uintptr_t)1) +static inline int +has_own_refchain(PyInterpreterState *interp) +{ + if (interp->feature_flags & Py_RTFLAGS_USE_MAIN_OBMALLOC) { + return (_Py_IsMainInterpreter(interp) + || _PyInterpreterState_Main() == NULL); + } + return 1; +} + +static int +refchain_init(PyInterpreterState *interp) +{ + if (!has_own_refchain(interp)) { + // Legacy subinterpreters share a refchain with the main interpreter. + REFCHAIN(interp) = REFCHAIN(_PyInterpreterState_Main()); + return 0; + } + _Py_hashtable_allocator_t alloc = { + // Don't use default PyMem_Malloc() and PyMem_Free() which + // require the caller to hold the GIL. + .malloc = PyMem_RawMalloc, + .free = PyMem_RawFree, + }; + REFCHAIN(interp) = _Py_hashtable_new_full( + _Py_hashtable_hash_ptr, _Py_hashtable_compare_direct, + NULL, NULL, &alloc); + if (REFCHAIN(interp) == NULL) { + return -1; + } + return 0; +} + +static void +refchain_fini(PyInterpreterState *interp) +{ + if (has_own_refchain(interp) && REFCHAIN(interp) != NULL) { + _Py_hashtable_destroy(REFCHAIN(interp)); + } + REFCHAIN(interp) = NULL; +} + bool _PyRefchain_IsTraced(PyInterpreterState *interp, PyObject *obj) { @@ -2191,16 +2233,7 @@ PyStatus _PyObject_InitState(PyInterpreterState *interp) { #ifdef Py_TRACE_REFS - _Py_hashtable_allocator_t alloc = { - // Don't use default PyMem_Malloc() and PyMem_Free() which - // require the caller to hold the GIL. - .malloc = PyMem_RawMalloc, - .free = PyMem_RawFree, - }; - REFCHAIN(interp) = _Py_hashtable_new_full( - _Py_hashtable_hash_ptr, _Py_hashtable_compare_direct, - NULL, NULL, &alloc); - if (REFCHAIN(interp) == NULL) { + if (refchain_init(interp) < 0) { return _PyStatus_NO_MEMORY(); } #endif @@ -2211,8 +2244,7 @@ void _PyObject_FiniState(PyInterpreterState *interp) { #ifdef Py_TRACE_REFS - _Py_hashtable_destroy(REFCHAIN(interp)); - REFCHAIN(interp) = NULL; + refchain_fini(interp); #endif } @@ -2501,42 +2533,6 @@ _Py_ResurrectReference(PyObject *op) #ifdef Py_TRACE_REFS -/* Make sure the ref is associated with the right interpreter. - * This only needs special attention for heap-allocated objects - * that have been immortalized, and only when the object might - * outlive the interpreter where it was created. That means the - * object was necessarily created using a global allocator - * (i.e. from the main interpreter). Thus in that specific case - * we move the object over to the main interpreter's refchain. - * - * This was added for the sake of the immortal interned strings, - * where legacy subinterpreters share the main interpreter's - * interned dict (and allocator), and therefore the strings can - * outlive the subinterpreter. - * - * It may make sense to fold this into _Py_SetImmortalUntracked(), - * but that requires further investigation. In the meantime, it is - * up to the caller to know if this is needed. There should be - * very few cases. - */ -void -_Py_NormalizeImmortalReference(PyObject *op) -{ - assert(_Py_IsImmortal(op)); - PyInterpreterState *interp = _PyInterpreterState_GET(); - if (!_PyRefchain_IsTraced(interp, op)) { - return; - } - PyInterpreterState *main_interp = _PyInterpreterState_Main(); - if (interp != main_interp - && interp->feature_flags & Py_RTFLAGS_USE_MAIN_OBMALLOC) - { - assert(!_PyRefchain_IsTraced(main_interp, op)); - _PyRefchain_Remove(interp, op); - _PyRefchain_Trace(main_interp, op); - } -} - void _Py_ForgetReference(PyObject *op) { diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c index b94a74c2c688a9..9cd9781e412524 100644 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -15444,10 +15444,6 @@ _PyUnicode_InternStatic(PyInterpreterState *interp, PyObject **p) assert(*p); } -#ifdef Py_TRACE_REFS -extern void _Py_NormalizeImmortalReference(PyObject *); -#endif - static void immortalize_interned(PyObject *s) { @@ -15463,10 +15459,6 @@ immortalize_interned(PyObject *s) #endif _PyUnicode_STATE(s).interned = SSTATE_INTERNED_IMMORTAL; _Py_SetImmortal(s); -#ifdef Py_TRACE_REFS - /* Make sure the ref is associated with the right interpreter. */ - _Py_NormalizeImmortalReference(s); -#endif } static /* non-null */ PyObject* diff --git a/Python/pylifecycle.c b/Python/pylifecycle.c index b8f424854ecb86..8f38fbedae9842 100644 --- a/Python/pylifecycle.c +++ b/Python/pylifecycle.c @@ -674,6 +674,13 @@ pycore_create_interpreter(_PyRuntimeState *runtime, return status; } + // This could be done in init_interpreter() (in pystate.c) if it + // didn't depend on interp->feature_flags being set already. + status = _PyObject_InitState(interp); + if (_PyStatus_EXCEPTION(status)) { + return status; + } + // initialize the interp->obmalloc state. This must be done after // the settings are loaded (so that feature_flags are set) but before // any calls are made to obmalloc functions. @@ -2297,6 +2304,13 @@ new_interpreter(PyThreadState **tstate_p, goto error; } + // This could be done in init_interpreter() (in pystate.c) if it + // didn't depend on interp->feature_flags being set already. + status = _PyObject_InitState(interp); + if (_PyStatus_EXCEPTION(status)) { + return status; + } + // initialize the interp->obmalloc state. This must be done after // the settings are loaded (so that feature_flags are set) but before // any calls are made to obmalloc functions. diff --git a/Python/pystate.c b/Python/pystate.c index 7df872cd6d7d8a..36b31f3b9e4200 100644 --- a/Python/pystate.c +++ b/Python/pystate.c @@ -629,10 +629,8 @@ init_interpreter(PyInterpreterState *interp, assert(next != NULL || (interp == runtime->interpreters.main)); interp->next = next; - PyStatus status = _PyObject_InitState(interp); - if (_PyStatus_EXCEPTION(status)) { - return status; - } + // We would call _PyObject_InitState() at this point + // if interp->feature_flags were alredy set. _PyEval_InitState(interp); _PyGC_InitState(&interp->gc); From 9c01db40aa5edbd75ce50342c08f7ed018ee7864 Mon Sep 17 00:00:00 2001 From: Wulian <1055917385@qq.com> Date: Thu, 24 Oct 2024 04:29:32 +0800 Subject: [PATCH 34/36] gh-125665: Update turtledemo docstrings with correct file names (#125691) Co-authored-by: Wulian Co-authored-by: Terry Jan Reedy --- Doc/library/turtle.rst | 3 --- Lib/turtledemo/bytedesign.py | 4 +--- Lib/turtledemo/chaos.py | 8 +++----- Lib/turtledemo/clock.py | 9 ++------- Lib/turtledemo/colormixer.py | 3 +-- Lib/turtledemo/forest.py | 15 ++++++--------- Lib/turtledemo/fractalcurves.py | 4 +--- Lib/turtledemo/lindenmayer.py | 4 +--- Lib/turtledemo/minimal_hanoi.py | 7 +------ Lib/turtledemo/nim.py | 4 +--- Lib/turtledemo/paint.py | 15 +++++---------- Lib/turtledemo/peace.py | 4 +--- Lib/turtledemo/penrose.py | 6 ++---- Lib/turtledemo/planet_and_moon.py | 4 +--- Lib/turtledemo/rosette.py | 4 +--- Lib/turtledemo/round_dance.py | 7 +------ Lib/turtledemo/sorting_animate.py | 7 +------ Lib/turtledemo/tree.py | 4 +--- Lib/turtledemo/two_canvases.py | 2 +- Lib/turtledemo/yinyang.py | 4 +--- 20 files changed, 32 insertions(+), 86 deletions(-) diff --git a/Doc/library/turtle.rst b/Doc/library/turtle.rst index efa4b6f8f1d3f9..8eb4f8271fcfae 100644 --- a/Doc/library/turtle.rst +++ b/Doc/library/turtle.rst @@ -2778,9 +2778,6 @@ Changes since Python 3.0 :func:`Screen.numinput `. These pop up input dialogs and return strings and numbers respectively. -- Two example scripts :file:`tdemo_nim.py` and :file:`tdemo_round_dance.py` - have been added to the :file:`Lib/turtledemo` directory. - .. doctest:: :skipif: _tkinter is None diff --git a/Lib/turtledemo/bytedesign.py b/Lib/turtledemo/bytedesign.py index 476cdaabfceab1..a5d76a6b6ff295 100644 --- a/Lib/turtledemo/bytedesign.py +++ b/Lib/turtledemo/bytedesign.py @@ -1,6 +1,4 @@ -""" turtle-example-suite: - - tdemo_bytedesign.py +"""turtledemo/bytedesign.py An example adapted from the example-suite of PythonCard's turtle graphics. diff --git a/Lib/turtledemo/chaos.py b/Lib/turtledemo/chaos.py index 6a45d0d807ef0b..b25f0fa42c901d 100644 --- a/Lib/turtledemo/chaos.py +++ b/Lib/turtledemo/chaos.py @@ -1,9 +1,7 @@ -# File: tdemo_chaos.py -# Author: Gregor Lingl -# Date: 2009-06-24 - -# A demonstration of chaos +"""turtledemo/chaos.py +A demonstration of chaos. +""" from turtle import * N = 80 diff --git a/Lib/turtledemo/clock.py b/Lib/turtledemo/clock.py index 8a630e29b8da50..8b639066c4f440 100644 --- a/Lib/turtledemo/clock.py +++ b/Lib/turtledemo/clock.py @@ -1,12 +1,7 @@ -""" turtle-example-suite: - - turtledemo/clock.py +"""turtledemo/clock.py Enhanced clock-program, showing date -and time - ------------------------------------ - Press STOP to exit the program! - ------------------------------------ +and time. """ from turtle import * from datetime import datetime diff --git a/Lib/turtledemo/colormixer.py b/Lib/turtledemo/colormixer.py index 448db83361a649..f66012c8154317 100644 --- a/Lib/turtledemo/colormixer.py +++ b/Lib/turtledemo/colormixer.py @@ -1,5 +1,4 @@ -# colormixer - +"""turtledemo/colormixer.py""" from turtle import Screen, Turtle, mainloop class ColorTurtle(Turtle): diff --git a/Lib/turtledemo/forest.py b/Lib/turtledemo/forest.py index cac553223828db..e1fa85a577ffce 100644 --- a/Lib/turtledemo/forest.py +++ b/Lib/turtledemo/forest.py @@ -1,14 +1,11 @@ -""" turtlegraphics-example-suite: +"""turtledemo/forest.py - tdemo_forest.py +Displays a 'forest' of 3 breadth-first trees, +similar to the one in tree.py. +For further details, see tree.py. -Displays a 'forest' of 3 breadth-first-trees -similar to the one in tree. -For further remarks see tree.py - -This example is a 'breadth-first'-rewrite of -a Logo program written by Erich Neuwirth. See -http://homepage.univie.ac.at/erich.neuwirth/ +This example is a breadth-first rewrite of +a Logo program by Erich Neuwirth. """ from turtle import Turtle, colormode, tracer, mainloop from random import randrange diff --git a/Lib/turtledemo/fractalcurves.py b/Lib/turtledemo/fractalcurves.py index fda193e06fedee..2d0a506a4f5b9f 100644 --- a/Lib/turtledemo/fractalcurves.py +++ b/Lib/turtledemo/fractalcurves.py @@ -1,6 +1,4 @@ -""" turtle-example-suite: - - tdemo_fractalCurves.py +"""turtledemo/fractalcurves.py This program draws two fractal-curve-designs: (1) A hilbert curve (in a box) diff --git a/Lib/turtledemo/lindenmayer.py b/Lib/turtledemo/lindenmayer.py index 7c7a84796c3c28..eb309afb9381b1 100644 --- a/Lib/turtledemo/lindenmayer.py +++ b/Lib/turtledemo/lindenmayer.py @@ -1,6 +1,4 @@ -""" turtle-example-suite: - - xtx_lindenmayer_indian.py +"""turtledemo/lindenmayer.py Each morning women in Tamil Nadu, in southern India, place designs, created by using rice diff --git a/Lib/turtledemo/minimal_hanoi.py b/Lib/turtledemo/minimal_hanoi.py index 08d8b630fec3b4..e44330eaaf7f18 100644 --- a/Lib/turtledemo/minimal_hanoi.py +++ b/Lib/turtledemo/minimal_hanoi.py @@ -1,6 +1,4 @@ -""" turtle-example-suite: - - tdemo_minimal_hanoi.py +"""turtledemo/minimal_hanoi.py A minimal 'Towers of Hanoi' animation: A tower of 6 discs is transferred from the @@ -12,9 +10,6 @@ Discs are turtles with shape "square", but stretched to rectangles by shapesize() - --------------------------------------- - To exit press STOP button - --------------------------------------- """ from turtle import * diff --git a/Lib/turtledemo/nim.py b/Lib/turtledemo/nim.py index 9ae6cc5c01b903..f87c479714d662 100644 --- a/Lib/turtledemo/nim.py +++ b/Lib/turtledemo/nim.py @@ -1,6 +1,4 @@ -""" turtle-example-suite: - - tdemo_nim.py +"""turtledemo/nim.py Play nim against the computer. The player who takes the last stick is the winner. diff --git a/Lib/turtledemo/paint.py b/Lib/turtledemo/paint.py index 6e63d004454589..780300fb2da9d1 100644 --- a/Lib/turtledemo/paint.py +++ b/Lib/turtledemo/paint.py @@ -1,12 +1,9 @@ -""" turtle-example-suite: +"""turtledemo/paint.py - tdemo_paint.py - -A simple event-driven paint program - -- left mouse button moves turtle -- middle mouse button changes color -- right mouse button toggles between pen up +A simple event-driven paint program. +- Left mouse button moves turtle. +- Middle mouse button changes color. +- Right mouse button toggles between pen up (no line drawn when the turtle moves) and pen down (line is drawn). If pen up follows at least two pen-down moves, the polygon that @@ -14,8 +11,6 @@ ------------------------------------------- Play around by clicking into the canvas using all three mouse buttons. - ------------------------------------------- - To exit press STOP button ------------------------------------------- """ from turtle import * diff --git a/Lib/turtledemo/peace.py b/Lib/turtledemo/peace.py index fd6abe390ef198..d86c94a48a2472 100644 --- a/Lib/turtledemo/peace.py +++ b/Lib/turtledemo/peace.py @@ -1,6 +1,4 @@ -""" turtle-example-suite: - - tdemo_peace.py +"""turtledemo/peace.py A simple drawing suitable as a beginner's programming example. Aside from the diff --git a/Lib/turtledemo/penrose.py b/Lib/turtledemo/penrose.py index ac12c899d3844e..ceaefedac24a67 100644 --- a/Lib/turtledemo/penrose.py +++ b/Lib/turtledemo/penrose.py @@ -1,6 +1,4 @@ -""" xturtle-example-suite: - - xtx_kites_and_darts.py +"""turtledemo/penrose.py Constructs two aperiodic penrose-tilings, consisting of kites and darts, by the method @@ -11,7 +9,7 @@ consisting of five darts. For more information see: - http://en.wikipedia.org/wiki/Penrose_tiling + https://en.wikipedia.org/wiki/Penrose_tiling ------------------------------------------- """ from turtle import * diff --git a/Lib/turtledemo/planet_and_moon.py b/Lib/turtledemo/planet_and_moon.py index c0e2c5b79e173e..571afcf922103f 100644 --- a/Lib/turtledemo/planet_and_moon.py +++ b/Lib/turtledemo/planet_and_moon.py @@ -1,6 +1,4 @@ -""" turtle-example-suite: - - tdemo_planets_and_moon.py +"""turtledemo/planets_and_moon.py Gravitational system simulation using the approximation method from Feynman-lectures, diff --git a/Lib/turtledemo/rosette.py b/Lib/turtledemo/rosette.py index 47d0f00e9da9d1..48897a620f9d8b 100644 --- a/Lib/turtledemo/rosette.py +++ b/Lib/turtledemo/rosette.py @@ -1,6 +1,4 @@ -""" turtle-example-suite: - - tdemo_wikipedia3.py +"""turtledemo/rosette.py This example is inspired by the Wikipedia article on turtle diff --git a/Lib/turtledemo/round_dance.py b/Lib/turtledemo/round_dance.py index 10383614c6e974..9da6389b213207 100644 --- a/Lib/turtledemo/round_dance.py +++ b/Lib/turtledemo/round_dance.py @@ -1,9 +1,4 @@ -""" turtle-example-suite: - - tdemo_round_dance.py - -(Needs version 1.1 of the turtle module that -comes with Python 3.1) +"""turtledemo/round_dance.py Dancing turtles have a compound shape consisting of a series of triangles of diff --git a/Lib/turtledemo/sorting_animate.py b/Lib/turtledemo/sorting_animate.py index ef4946db38250e..e0a2877cd5d621 100644 --- a/Lib/turtledemo/sorting_animate.py +++ b/Lib/turtledemo/sorting_animate.py @@ -1,6 +1,4 @@ -""" - - sorting_animation.py +"""turtledemo/sorting_animation.py A minimal sorting algorithm animation: Sorts a shelf of 10 blocks using insertion @@ -10,9 +8,6 @@ Blocks are turtles with shape "square", but stretched to rectangles by shapesize() - --------------------------------------- - To exit press space button - --------------------------------------- """ from turtle import * import random diff --git a/Lib/turtledemo/tree.py b/Lib/turtledemo/tree.py index 12729e23688a48..6ad8fcc854a155 100644 --- a/Lib/turtledemo/tree.py +++ b/Lib/turtledemo/tree.py @@ -1,6 +1,4 @@ -""" turtle-example-suite: - - tdemo_tree.py +"""turtledemo/tree.py Displays a 'breadth-first-tree' - in contrast to the classical Logo tree drawing programs, diff --git a/Lib/turtledemo/two_canvases.py b/Lib/turtledemo/two_canvases.py index f3602585ab0592..2c8020378edf1b 100644 --- a/Lib/turtledemo/two_canvases.py +++ b/Lib/turtledemo/two_canvases.py @@ -1,4 +1,4 @@ -"""turtledemo.two_canvases +"""turtledemo/two_canvases.py Use TurtleScreen and RawTurtle to draw on two distinct canvases in a separate window. The diff --git a/Lib/turtledemo/yinyang.py b/Lib/turtledemo/yinyang.py index 791060d17e6b6a..6e92d4bf739194 100644 --- a/Lib/turtledemo/yinyang.py +++ b/Lib/turtledemo/yinyang.py @@ -1,6 +1,4 @@ -""" turtle-example-suite: - - tdemo_yinyang.py +"""turtledemo/yinyang.py Another drawing suitable as a beginner's programming example. From 13c9fa3d64e0653d696daad716703ef05fd5002b Mon Sep 17 00:00:00 2001 From: Petr Viktorin Date: Wed, 23 Oct 2024 23:37:06 +0200 Subject: [PATCH 35/36] gh-121938: ctypes: Skip test of _pack_-ed struct with c_int64 on x86 (GH-125877) The current auto-generated tests don't cover this; it's instead tested manually. --- Lib/test/test_ctypes/test_generated_structs.py | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/Lib/test/test_ctypes/test_generated_structs.py b/Lib/test/test_ctypes/test_generated_structs.py index cbd73c4e911e4e..d61754d6d49e70 100644 --- a/Lib/test/test_ctypes/test_generated_structs.py +++ b/Lib/test/test_ctypes/test_generated_structs.py @@ -135,6 +135,18 @@ class Packed3(Structure): @register() class Packed4(Structure): + def _maybe_skip(): + # `_pack_` enables MSVC-style packing, but keeps platform-specific + # alignments. + # The C code we generate for GCC/clang currently uses + # `__attribute__((ms_struct))`, which activates MSVC layout *and* + # alignments, that is, sizeof(basic type) == alignment(basic type). + # On a Pentium, int64 is 32-bit aligned, so the two won't match. + # The expected behavior is instead tested in + # StructureTestCase.test_packed, over in test_structures.py. + if sizeof(c_int64) != alignment(c_int64): + raise unittest.SkipTest('cannot test on this platform') + _fields_ = [('a', c_int8), ('b', c_int64)] _pack_ = 8 @@ -436,6 +448,8 @@ def test_generated_data(self): """ for name, cls in TESTCASES.items(): with self.subTest(name=name): + if _maybe_skip := getattr(cls, '_maybe_skip', None): + _maybe_skip() expected = iter(_ctypes_test.get_generated_test_data(name)) expected_name = next(expected) if expected_name is None: From 8f2c0f7a03b71485b5635cb47c000e4e8ace8800 Mon Sep 17 00:00:00 2001 From: Tian Gao Date: Wed, 23 Oct 2024 15:04:30 -0700 Subject: [PATCH 36/36] gh-125884: Support breakpoint on functions with annotations (#125892) --- Lib/pdb.py | 7 ++-- Lib/test/test_pdb.py | 36 +++++++++++++++++++ ...-10-23-17-45-40.gh-issue-125884.41E_PD.rst | 1 + 3 files changed, 42 insertions(+), 2 deletions(-) create mode 100644 Misc/NEWS.d/next/Library/2024-10-23-17-45-40.gh-issue-125884.41E_PD.rst diff --git a/Lib/pdb.py b/Lib/pdb.py index 832213abbb98e6..3c0cbb525e28ef 100644 --- a/Lib/pdb.py +++ b/Lib/pdb.py @@ -118,7 +118,7 @@ def find_first_executable_line(code): return code.co_firstlineno def find_function(funcname, filename): - cre = re.compile(r'def\s+%s\s*[(]' % re.escape(funcname)) + cre = re.compile(r'def\s+%s(\s*\[.+\])?\s*[(]' % re.escape(funcname)) try: fp = tokenize.open(filename) except OSError: @@ -138,9 +138,12 @@ def find_function(funcname, filename): if funcdef: try: - funccode = compile(funcdef, filename, 'exec').co_consts[0] + code = compile(funcdef, filename, 'exec') except SyntaxError: continue + # We should always be able to find the code object here + funccode = next(c for c in code.co_consts if + isinstance(c, CodeType) and c.co_name == funcname) lineno_offset = find_first_executable_line(funccode) return funcname, filename, funcstart + lineno_offset - 1 return None diff --git a/Lib/test/test_pdb.py b/Lib/test/test_pdb.py index 1ea93ed037005d..e5f9848319021a 100644 --- a/Lib/test/test_pdb.py +++ b/Lib/test/test_pdb.py @@ -363,6 +363,42 @@ def test_pdb_breakpoint_commands(): 4 """ +def test_pdb_breakpoint_on_annotated_function_def(): + """Test breakpoints on function definitions with annotation. + + >>> def foo[T](): + ... return 0 + + >>> def bar() -> int: + ... return 0 + + >>> def foobar[T]() -> int: + ... return 0 + + >>> reset_Breakpoint() + + >>> def test_function(): + ... import pdb; pdb.Pdb(nosigint=True, readrc=False).set_trace() + ... pass + + >>> with PdbTestInput([ # doctest: +NORMALIZE_WHITESPACE + ... 'break foo', + ... 'break bar', + ... 'break foobar', + ... 'continue', + ... ]): + ... test_function() + > (2)test_function() + -> import pdb; pdb.Pdb(nosigint=True, readrc=False).set_trace() + (Pdb) break foo + Breakpoint 1 at :2 + (Pdb) break bar + Breakpoint 2 at :2 + (Pdb) break foobar + Breakpoint 3 at :2 + (Pdb) continue + """ + def test_pdb_commands(): """Test the commands command of pdb. diff --git a/Misc/NEWS.d/next/Library/2024-10-23-17-45-40.gh-issue-125884.41E_PD.rst b/Misc/NEWS.d/next/Library/2024-10-23-17-45-40.gh-issue-125884.41E_PD.rst new file mode 100644 index 00000000000000..684b1f282b143e --- /dev/null +++ b/Misc/NEWS.d/next/Library/2024-10-23-17-45-40.gh-issue-125884.41E_PD.rst @@ -0,0 +1 @@ +Fixed the bug for :mod:`pdb` where it can't set breakpoints on functions with certain annotations.