diff --git a/Include/internal/pycore_interp.h b/Include/internal/pycore_interp.h index f745b09796753b..6f00eca8de05af 100644 --- a/Include/internal/pycore_interp.h +++ b/Include/internal/pycore_interp.h @@ -31,7 +31,7 @@ extern "C" { #include "pycore_list.h" // struct _Py_list_state #include "pycore_mimalloc.h" // struct _mimalloc_interp_state #include "pycore_object_state.h" // struct _py_object_state -#include "pycore_optimizer.h" // _PyOptimizerObject +#include "pycore_optimizer.h" // _PyExecutorObject #include "pycore_obmalloc.h" // struct _obmalloc_state #include "pycore_qsbr.h" // struct _qsbr_state #include "pycore_stackref.h" // Py_STACKREF_DEBUG @@ -262,7 +262,7 @@ struct _is { struct ast_state ast; struct types_state types; struct callable_cache callable_cache; - _PyOptimizerObject *optimizer; + bool jit; _PyExecutorObject *executor_list_head; size_t trace_run_counter; _rare_events rare_events; diff --git a/Include/internal/pycore_opcode_metadata.h b/Include/internal/pycore_opcode_metadata.h index bad5e515a99565..98dfead35f7c31 100644 --- a/Include/internal/pycore_opcode_metadata.h +++ b/Include/internal/pycore_opcode_metadata.h @@ -271,8 +271,12 @@ int _PyOpcode_num_popped(int opcode, int oparg) { return 0; case JUMP_BACKWARD: return 0; + case JUMP_BACKWARD_JIT: + return 0; case JUMP_BACKWARD_NO_INTERRUPT: return 0; + case JUMP_BACKWARD_NO_JIT: + return 0; case JUMP_FORWARD: return 0; case JUMP_IF_FALSE: @@ -742,8 +746,12 @@ int _PyOpcode_num_pushed(int opcode, int oparg) { return 0; case JUMP_BACKWARD: return 0; + case JUMP_BACKWARD_JIT: + return 0; case JUMP_BACKWARD_NO_INTERRUPT: return 0; + case JUMP_BACKWARD_NO_JIT: + return 0; case JUMP_FORWARD: return 0; case JUMP_IF_FALSE: @@ -1467,10 +1475,18 @@ int _PyOpcode_max_stack_effect(int opcode, int oparg, int *effect) { *effect = 0; return 0; } + case JUMP_BACKWARD_JIT: { + *effect = 0; + return 0; + } case JUMP_BACKWARD_NO_INTERRUPT: { *effect = 0; return 0; } + case JUMP_BACKWARD_NO_JIT: { + *effect = 0; + return 0; + } case JUMP_FORWARD: { *effect = 0; return 0; @@ -2110,7 +2126,9 @@ const struct opcode_metadata _PyOpcode_opcode_metadata[266] = { [INTERPRETER_EXIT] = { true, INSTR_FMT_IX, 0 }, [IS_OP] = { true, INSTR_FMT_IB, HAS_ARG_FLAG }, [JUMP_BACKWARD] = { true, INSTR_FMT_IBC, HAS_ARG_FLAG | HAS_JUMP_FLAG | HAS_EVAL_BREAK_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG }, + [JUMP_BACKWARD_JIT] = { true, INSTR_FMT_IBC, HAS_ARG_FLAG | HAS_JUMP_FLAG | HAS_EVAL_BREAK_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG }, [JUMP_BACKWARD_NO_INTERRUPT] = { true, INSTR_FMT_IB, HAS_ARG_FLAG | HAS_JUMP_FLAG }, + [JUMP_BACKWARD_NO_JIT] = { true, INSTR_FMT_IBC, HAS_ARG_FLAG | HAS_JUMP_FLAG | HAS_EVAL_BREAK_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG }, [JUMP_FORWARD] = { true, INSTR_FMT_IB, HAS_ARG_FLAG | HAS_JUMP_FLAG }, [LIST_APPEND] = { true, INSTR_FMT_IB, HAS_ARG_FLAG | HAS_ERROR_FLAG }, [LIST_EXTEND] = { true, INSTR_FMT_IB, HAS_ARG_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG }, @@ -2539,7 +2557,9 @@ const char *_PyOpcode_OpName[266] = { [IS_OP] = "IS_OP", [JUMP] = "JUMP", [JUMP_BACKWARD] = "JUMP_BACKWARD", + [JUMP_BACKWARD_JIT] = "JUMP_BACKWARD_JIT", [JUMP_BACKWARD_NO_INTERRUPT] = "JUMP_BACKWARD_NO_INTERRUPT", + [JUMP_BACKWARD_NO_JIT] = "JUMP_BACKWARD_NO_JIT", [JUMP_FORWARD] = "JUMP_FORWARD", [JUMP_IF_FALSE] = "JUMP_IF_FALSE", [JUMP_IF_TRUE] = "JUMP_IF_TRUE", @@ -2800,7 +2820,9 @@ const uint8_t _PyOpcode_Deopt[256] = { [INTERPRETER_EXIT] = INTERPRETER_EXIT, [IS_OP] = IS_OP, [JUMP_BACKWARD] = JUMP_BACKWARD, + [JUMP_BACKWARD_JIT] = JUMP_BACKWARD, [JUMP_BACKWARD_NO_INTERRUPT] = JUMP_BACKWARD_NO_INTERRUPT, + [JUMP_BACKWARD_NO_JIT] = JUMP_BACKWARD, [JUMP_FORWARD] = JUMP_FORWARD, [LIST_APPEND] = LIST_APPEND, [LIST_EXTEND] = LIST_EXTEND, @@ -2939,8 +2961,6 @@ const uint8_t _PyOpcode_Deopt[256] = { case 146: \ case 147: \ case 148: \ - case 230: \ - case 231: \ case 232: \ case 233: \ case 234: \ diff --git a/Include/internal/pycore_optimizer.h b/Include/internal/pycore_optimizer.h index 03ce4d4491acd7..e806e306d2d57f 100644 --- a/Include/internal/pycore_optimizer.h +++ b/Include/internal/pycore_optimizer.h @@ -83,23 +83,6 @@ typedef struct _PyExecutorObject { _PyExitData exits[1]; } _PyExecutorObject; -typedef struct _PyOptimizerObject _PyOptimizerObject; - -/* Should return > 0 if a new executor is created. O if no executor is produced and < 0 if an error occurred. */ -typedef int (*_Py_optimize_func)( - _PyOptimizerObject* self, struct _PyInterpreterFrame *frame, - _Py_CODEUNIT *instr, _PyExecutorObject **exec_ptr, - int curr_stackentries, bool progress_needed); - -struct _PyOptimizerObject { - PyObject_HEAD - _Py_optimize_func optimize; - /* Data needed by the optimizer goes here, but is opaque to the VM */ -}; - -/** Test support **/ -_PyOptimizerObject *_Py_SetOptimizer(PyInterpreterState *interp, _PyOptimizerObject* optimizer); - // Export for '_opcode' shared extension (JIT compiler). PyAPI_FUNC(_PyExecutorObject*) _Py_GetExecutor(PyCodeObject *code, int offset); @@ -110,12 +93,6 @@ void _Py_BloomFilter_Init(_PyBloomFilter *); void _Py_BloomFilter_Add(_PyBloomFilter *bloom, void *obj); PyAPI_FUNC(void) _Py_Executor_DependsOn(_PyExecutorObject *executor, void *obj); -// For testing -// Export for '_testinternalcapi' shared extension. -PyAPI_FUNC(_PyOptimizerObject *) _Py_GetOptimizer(void); -PyAPI_FUNC(int) _Py_SetTier2Optimizer(_PyOptimizerObject* optimizer); -PyAPI_FUNC(PyObject *) _PyOptimizer_NewUOpOptimizer(void); - #define _Py_MAX_ALLOWED_BUILTINS_MODIFICATIONS 3 #define _Py_MAX_ALLOWED_GLOBALS_MODIFICATIONS 6 @@ -144,9 +121,7 @@ int _Py_uop_analyze_and_optimize(struct _PyInterpreterFrame *frame, _PyUOpInstruction *trace, int trace_len, int curr_stackentries, _PyBloomFilter *dependencies); -extern PyTypeObject _PyDefaultOptimizer_Type; extern PyTypeObject _PyUOpExecutor_Type; -extern PyTypeObject _PyUOpOptimizer_Type; #define UOP_FORMAT_TARGET 0 diff --git a/Include/opcode_ids.h b/Include/opcode_ids.h index c3b58825bfc938..4a9fc15dcd2880 100644 --- a/Include/opcode_ids.h +++ b/Include/opcode_ids.h @@ -174,41 +174,43 @@ extern "C" { #define FOR_ITER_LIST 192 #define FOR_ITER_RANGE 193 #define FOR_ITER_TUPLE 194 -#define LOAD_ATTR_CLASS 195 -#define LOAD_ATTR_CLASS_WITH_METACLASS_CHECK 196 -#define LOAD_ATTR_GETATTRIBUTE_OVERRIDDEN 197 -#define LOAD_ATTR_INSTANCE_VALUE 198 -#define LOAD_ATTR_METHOD_LAZY_DICT 199 -#define LOAD_ATTR_METHOD_NO_DICT 200 -#define LOAD_ATTR_METHOD_WITH_VALUES 201 -#define LOAD_ATTR_MODULE 202 -#define LOAD_ATTR_NONDESCRIPTOR_NO_DICT 203 -#define LOAD_ATTR_NONDESCRIPTOR_WITH_VALUES 204 -#define LOAD_ATTR_PROPERTY 205 -#define LOAD_ATTR_SLOT 206 -#define LOAD_ATTR_WITH_HINT 207 -#define LOAD_CONST_IMMORTAL 208 -#define LOAD_CONST_MORTAL 209 -#define LOAD_GLOBAL_BUILTIN 210 -#define LOAD_GLOBAL_MODULE 211 -#define LOAD_SUPER_ATTR_ATTR 212 -#define LOAD_SUPER_ATTR_METHOD 213 -#define RESUME_CHECK 214 -#define SEND_GEN 215 -#define STORE_ATTR_INSTANCE_VALUE 216 -#define STORE_ATTR_SLOT 217 -#define STORE_ATTR_WITH_HINT 218 -#define STORE_SUBSCR_DICT 219 -#define STORE_SUBSCR_LIST_INT 220 -#define TO_BOOL_ALWAYS_TRUE 221 -#define TO_BOOL_BOOL 222 -#define TO_BOOL_INT 223 -#define TO_BOOL_LIST 224 -#define TO_BOOL_NONE 225 -#define TO_BOOL_STR 226 -#define UNPACK_SEQUENCE_LIST 227 -#define UNPACK_SEQUENCE_TUPLE 228 -#define UNPACK_SEQUENCE_TWO_TUPLE 229 +#define JUMP_BACKWARD_JIT 195 +#define JUMP_BACKWARD_NO_JIT 196 +#define LOAD_ATTR_CLASS 197 +#define LOAD_ATTR_CLASS_WITH_METACLASS_CHECK 198 +#define LOAD_ATTR_GETATTRIBUTE_OVERRIDDEN 199 +#define LOAD_ATTR_INSTANCE_VALUE 200 +#define LOAD_ATTR_METHOD_LAZY_DICT 201 +#define LOAD_ATTR_METHOD_NO_DICT 202 +#define LOAD_ATTR_METHOD_WITH_VALUES 203 +#define LOAD_ATTR_MODULE 204 +#define LOAD_ATTR_NONDESCRIPTOR_NO_DICT 205 +#define LOAD_ATTR_NONDESCRIPTOR_WITH_VALUES 206 +#define LOAD_ATTR_PROPERTY 207 +#define LOAD_ATTR_SLOT 208 +#define LOAD_ATTR_WITH_HINT 209 +#define LOAD_CONST_IMMORTAL 210 +#define LOAD_CONST_MORTAL 211 +#define LOAD_GLOBAL_BUILTIN 212 +#define LOAD_GLOBAL_MODULE 213 +#define LOAD_SUPER_ATTR_ATTR 214 +#define LOAD_SUPER_ATTR_METHOD 215 +#define RESUME_CHECK 216 +#define SEND_GEN 217 +#define STORE_ATTR_INSTANCE_VALUE 218 +#define STORE_ATTR_SLOT 219 +#define STORE_ATTR_WITH_HINT 220 +#define STORE_SUBSCR_DICT 221 +#define STORE_SUBSCR_LIST_INT 222 +#define TO_BOOL_ALWAYS_TRUE 223 +#define TO_BOOL_BOOL 224 +#define TO_BOOL_INT 225 +#define TO_BOOL_LIST 226 +#define TO_BOOL_NONE 227 +#define TO_BOOL_STR 228 +#define UNPACK_SEQUENCE_LIST 229 +#define UNPACK_SEQUENCE_TUPLE 230 +#define UNPACK_SEQUENCE_TWO_TUPLE 231 #define INSTRUMENTED_END_FOR 235 #define INSTRUMENTED_POP_ITER 236 #define INSTRUMENTED_END_SEND 237 diff --git a/InternalDocs/jit.md b/InternalDocs/jit.md index 1e9f385d5f87fa..2c204f39792d6a 100644 --- a/InternalDocs/jit.md +++ b/InternalDocs/jit.md @@ -38,12 +38,8 @@ executor in `co_executors`. ## The micro-op optimizer -The optimizer that `_PyOptimizer_Optimize()` runs is configurable via the -`_Py_SetTier2Optimizer()` function (this is used in test via -`_testinternalcapi.set_optimizer()`.) - The micro-op (abbreviated `uop` to approximate `μop`) optimizer is defined in -[`Python/optimizer.c`](../Python/optimizer.c) as the type `_PyUOpOptimizer_Type`. +[`Python/optimizer.c`](../Python/optimizer.c) as `_PyOptimizer_Optimize`. It translates an instruction trace into a sequence of micro-ops by replacing each bytecode by an equivalent sequence of micro-ops (see `_PyOpcode_macro_expansion` in diff --git a/Lib/_opcode_metadata.py b/Lib/_opcode_metadata.py index 459f7411296bcd..12c41374592185 100644 --- a/Lib/_opcode_metadata.py +++ b/Lib/_opcode_metadata.py @@ -85,6 +85,10 @@ "CONTAINS_OP_SET", "CONTAINS_OP_DICT", ], + "JUMP_BACKWARD": [ + "JUMP_BACKWARD_NO_JIT", + "JUMP_BACKWARD_JIT", + ], "FOR_ITER": [ "FOR_ITER_LIST", "FOR_ITER_TUPLE", @@ -167,41 +171,43 @@ 'FOR_ITER_LIST': 192, 'FOR_ITER_RANGE': 193, 'FOR_ITER_TUPLE': 194, - 'LOAD_ATTR_CLASS': 195, - 'LOAD_ATTR_CLASS_WITH_METACLASS_CHECK': 196, - 'LOAD_ATTR_GETATTRIBUTE_OVERRIDDEN': 197, - 'LOAD_ATTR_INSTANCE_VALUE': 198, - 'LOAD_ATTR_METHOD_LAZY_DICT': 199, - 'LOAD_ATTR_METHOD_NO_DICT': 200, - 'LOAD_ATTR_METHOD_WITH_VALUES': 201, - 'LOAD_ATTR_MODULE': 202, - 'LOAD_ATTR_NONDESCRIPTOR_NO_DICT': 203, - 'LOAD_ATTR_NONDESCRIPTOR_WITH_VALUES': 204, - 'LOAD_ATTR_PROPERTY': 205, - 'LOAD_ATTR_SLOT': 206, - 'LOAD_ATTR_WITH_HINT': 207, - 'LOAD_CONST_IMMORTAL': 208, - 'LOAD_CONST_MORTAL': 209, - 'LOAD_GLOBAL_BUILTIN': 210, - 'LOAD_GLOBAL_MODULE': 211, - 'LOAD_SUPER_ATTR_ATTR': 212, - 'LOAD_SUPER_ATTR_METHOD': 213, - 'RESUME_CHECK': 214, - 'SEND_GEN': 215, - 'STORE_ATTR_INSTANCE_VALUE': 216, - 'STORE_ATTR_SLOT': 217, - 'STORE_ATTR_WITH_HINT': 218, - 'STORE_SUBSCR_DICT': 219, - 'STORE_SUBSCR_LIST_INT': 220, - 'TO_BOOL_ALWAYS_TRUE': 221, - 'TO_BOOL_BOOL': 222, - 'TO_BOOL_INT': 223, - 'TO_BOOL_LIST': 224, - 'TO_BOOL_NONE': 225, - 'TO_BOOL_STR': 226, - 'UNPACK_SEQUENCE_LIST': 227, - 'UNPACK_SEQUENCE_TUPLE': 228, - 'UNPACK_SEQUENCE_TWO_TUPLE': 229, + 'JUMP_BACKWARD_JIT': 195, + 'JUMP_BACKWARD_NO_JIT': 196, + 'LOAD_ATTR_CLASS': 197, + 'LOAD_ATTR_CLASS_WITH_METACLASS_CHECK': 198, + 'LOAD_ATTR_GETATTRIBUTE_OVERRIDDEN': 199, + 'LOAD_ATTR_INSTANCE_VALUE': 200, + 'LOAD_ATTR_METHOD_LAZY_DICT': 201, + 'LOAD_ATTR_METHOD_NO_DICT': 202, + 'LOAD_ATTR_METHOD_WITH_VALUES': 203, + 'LOAD_ATTR_MODULE': 204, + 'LOAD_ATTR_NONDESCRIPTOR_NO_DICT': 205, + 'LOAD_ATTR_NONDESCRIPTOR_WITH_VALUES': 206, + 'LOAD_ATTR_PROPERTY': 207, + 'LOAD_ATTR_SLOT': 208, + 'LOAD_ATTR_WITH_HINT': 209, + 'LOAD_CONST_IMMORTAL': 210, + 'LOAD_CONST_MORTAL': 211, + 'LOAD_GLOBAL_BUILTIN': 212, + 'LOAD_GLOBAL_MODULE': 213, + 'LOAD_SUPER_ATTR_ATTR': 214, + 'LOAD_SUPER_ATTR_METHOD': 215, + 'RESUME_CHECK': 216, + 'SEND_GEN': 217, + 'STORE_ATTR_INSTANCE_VALUE': 218, + 'STORE_ATTR_SLOT': 219, + 'STORE_ATTR_WITH_HINT': 220, + 'STORE_SUBSCR_DICT': 221, + 'STORE_SUBSCR_LIST_INT': 222, + 'TO_BOOL_ALWAYS_TRUE': 223, + 'TO_BOOL_BOOL': 224, + 'TO_BOOL_INT': 225, + 'TO_BOOL_LIST': 226, + 'TO_BOOL_NONE': 227, + 'TO_BOOL_STR': 228, + 'UNPACK_SEQUENCE_LIST': 229, + 'UNPACK_SEQUENCE_TUPLE': 230, + 'UNPACK_SEQUENCE_TWO_TUPLE': 231, } opmap = { diff --git a/Lib/test/support/__init__.py b/Lib/test/support/__init__.py index 89f2a6b916bfc2..6436753f998a16 100644 --- a/Lib/test/support/__init__.py +++ b/Lib/test/support/__init__.py @@ -58,7 +58,8 @@ "LOOPBACK_TIMEOUT", "INTERNET_TIMEOUT", "SHORT_TIMEOUT", "LONG_TIMEOUT", "Py_DEBUG", "exceeds_recursion_limit", "get_c_recursion_limit", "skip_on_s390x", - "without_optimizer", + "requires_jit_enabled", + "requires_jit_disabled", "force_not_colorized", "force_not_colorized_test_class", "make_clean_env", @@ -2620,21 +2621,13 @@ def exceeds_recursion_limit(): Py_TRACE_REFS = hasattr(sys, 'getobjects') -# Decorator to disable optimizer while a function run -def without_optimizer(func): - try: - from _testinternalcapi import get_optimizer, set_optimizer - except ImportError: - return func - @functools.wraps(func) - def wrapper(*args, **kwargs): - save_opt = get_optimizer() - try: - set_optimizer(None) - return func(*args, **kwargs) - finally: - set_optimizer(save_opt) - return wrapper +try: + from _testinternalcapi import jit_enabled +except ImportError: + requires_jit_enabled = requires_jit_disabled = unittest.skip("requires _testinternalcapi") +else: + requires_jit_enabled = unittest.skipUnless(jit_enabled(), "requires JIT enabled") + requires_jit_disabled = unittest.skipIf(jit_enabled(), "requires JIT disabled") _BASE_COPY_SRC_DIR_IGNORED_NAMES = frozenset({ diff --git a/Lib/test/test_capi/test_misc.py b/Lib/test/test_capi/test_misc.py index 114e7cdfd0cd9c..1087b38c225085 100644 --- a/Lib/test/test_capi/test_misc.py +++ b/Lib/test/test_capi/test_misc.py @@ -306,7 +306,7 @@ def test_getitem_with_error(self): CURRENT_THREAD_REGEX + r' File .*, line 6 in \n' r'\n' - r'Extension modules: _testcapi \(total: 1\)\n') + r'Extension modules: _testcapi, _testinternalcapi \(total: 2\)\n') else: # Python built with NDEBUG macro defined: # test _Py_CheckFunctionResult() instead. diff --git a/Lib/test/test_capi/test_opt.py b/Lib/test/test_capi/test_opt.py index 6a2f7726222f9b..d3aea37e094e61 100644 --- a/Lib/test/test_capi/test_opt.py +++ b/Lib/test/test_capi/test_opt.py @@ -9,21 +9,12 @@ import _opcode from test.support import (script_helper, requires_specialization, - import_helper, Py_GIL_DISABLED) + import_helper, Py_GIL_DISABLED, requires_jit_enabled) _testinternalcapi = import_helper.import_module("_testinternalcapi") from _testinternalcapi import TIER2_THRESHOLD -@contextlib.contextmanager -def temporary_optimizer(opt): - old_opt = _testinternalcapi.get_optimizer() - _testinternalcapi.set_optimizer(opt) - try: - yield - finally: - _testinternalcapi.set_optimizer(old_opt) - @contextlib.contextmanager def clear_executors(func): @@ -57,8 +48,7 @@ def get_opnames(ex): @requires_specialization @unittest.skipIf(Py_GIL_DISABLED, "optimizer not yet supported in free-threaded builds") -@unittest.skipUnless(hasattr(_testinternalcapi, "get_optimizer"), - "Requires optimizer infrastructure") +@requires_jit_enabled class TestExecutorInvalidation(unittest.TestCase): def test_invalidate_object(self): @@ -75,10 +65,8 @@ def f{n}(): funcs = [ ns[f'f{n}'] for n in range(5)] objects = [object() for _ in range(5)] - opt = _testinternalcapi.new_uop_optimizer() - with temporary_optimizer(opt): - for f in funcs: - f() + for f in funcs: + f() executors = [get_first_executor(f) for f in funcs] # Set things up so each executor depends on the objects # with an equal or lower index. @@ -106,9 +94,7 @@ def f(): pass """), ns, ns) f = ns['f'] - opt = _testinternalcapi.new_uop_optimizer() - with temporary_optimizer(opt): - f() + f() exe = get_first_executor(f) self.assertIsNotNone(exe) self.assertTrue(exe.is_valid()) @@ -119,9 +105,7 @@ def test_sys__clear_internal_caches(self): def f(): for _ in range(TIER2_THRESHOLD): pass - opt = _testinternalcapi.new_uop_optimizer() - with temporary_optimizer(opt): - f() + f() exe = get_first_executor(f) self.assertIsNotNone(exe) self.assertTrue(exe.is_valid()) @@ -133,8 +117,7 @@ def f(): @requires_specialization @unittest.skipIf(Py_GIL_DISABLED, "optimizer not yet supported in free-threaded builds") -@unittest.skipUnless(hasattr(_testinternalcapi, "get_optimizer"), - "Requires optimizer infrastructure") +@requires_jit_enabled @unittest.skipIf(os.getenv("PYTHON_UOPS_OPTIMIZE") == "0", "Needs uop optimizer to run.") class TestUops(unittest.TestCase): @@ -144,9 +127,7 @@ def testfunc(x): while i < x: i += 1 - opt = _testinternalcapi.new_uop_optimizer() - with temporary_optimizer(opt): - testfunc(TIER2_THRESHOLD) + testfunc(TIER2_THRESHOLD) ex = get_first_executor(testfunc) self.assertIsNotNone(ex) @@ -192,11 +173,9 @@ def many_vars(): """), ns, ns) many_vars = ns["many_vars"] - opt = _testinternalcapi.new_uop_optimizer() - with temporary_optimizer(opt): - ex = get_first_executor(many_vars) - self.assertIsNone(ex) - many_vars() + ex = get_first_executor(many_vars) + self.assertIsNone(ex) + many_vars() ex = get_first_executor(many_vars) self.assertIsNotNone(ex) @@ -215,10 +194,7 @@ def testfunc(x): while i < x: i += 1 - opt = _testinternalcapi.new_uop_optimizer() - - with temporary_optimizer(opt): - testfunc(TIER2_THRESHOLD) + testfunc(TIER2_THRESHOLD) ex = get_first_executor(testfunc) self.assertIsNotNone(ex) @@ -231,9 +207,7 @@ def testfunc(n): while i < n: i += 1 - opt = _testinternalcapi.new_uop_optimizer() - with temporary_optimizer(opt): - testfunc(TIER2_THRESHOLD) + testfunc(TIER2_THRESHOLD) ex = get_first_executor(testfunc) self.assertIsNotNone(ex) @@ -246,9 +220,7 @@ def testfunc(a): if x is None: x = 0 - opt = _testinternalcapi.new_uop_optimizer() - with temporary_optimizer(opt): - testfunc(range(TIER2_THRESHOLD)) + testfunc(range(TIER2_THRESHOLD)) ex = get_first_executor(testfunc) self.assertIsNotNone(ex) @@ -263,9 +235,7 @@ def testfunc(a): if x is not None: x = 0 - opt = _testinternalcapi.new_uop_optimizer() - with temporary_optimizer(opt): - testfunc(range(TIER2_THRESHOLD)) + testfunc(range(TIER2_THRESHOLD)) ex = get_first_executor(testfunc) self.assertIsNotNone(ex) @@ -279,9 +249,7 @@ def testfunc(n): while not i >= n: i += 1 - opt = _testinternalcapi.new_uop_optimizer() - with temporary_optimizer(opt): - testfunc(TIER2_THRESHOLD) + testfunc(TIER2_THRESHOLD) ex = get_first_executor(testfunc) self.assertIsNotNone(ex) @@ -294,9 +262,7 @@ def testfunc(n): while i < n: i += 1 - opt = _testinternalcapi.new_uop_optimizer() - with temporary_optimizer(opt): - testfunc(TIER2_THRESHOLD) + testfunc(TIER2_THRESHOLD) ex = get_first_executor(testfunc) self.assertIsNotNone(ex) @@ -314,9 +280,7 @@ def testfunc(n): a += 1 return a - opt = _testinternalcapi.new_uop_optimizer() - with temporary_optimizer(opt): - testfunc(TIER2_THRESHOLD) + testfunc(TIER2_THRESHOLD) ex = get_first_executor(testfunc) self.assertIsNotNone(ex) @@ -332,10 +296,8 @@ def testfunc(n): total += i return total - opt = _testinternalcapi.new_uop_optimizer() - with temporary_optimizer(opt): - total = testfunc(TIER2_THRESHOLD) - self.assertEqual(total, sum(range(TIER2_THRESHOLD))) + total = testfunc(TIER2_THRESHOLD) + self.assertEqual(total, sum(range(TIER2_THRESHOLD))) ex = get_first_executor(testfunc) self.assertIsNotNone(ex) @@ -353,11 +315,9 @@ def testfunc(a): total += i return total - opt = _testinternalcapi.new_uop_optimizer() - with temporary_optimizer(opt): - a = list(range(TIER2_THRESHOLD)) - total = testfunc(a) - self.assertEqual(total, sum(a)) + a = list(range(TIER2_THRESHOLD)) + total = testfunc(a) + self.assertEqual(total, sum(a)) ex = get_first_executor(testfunc) self.assertIsNotNone(ex) @@ -375,11 +335,9 @@ def testfunc(a): total += i return total - opt = _testinternalcapi.new_uop_optimizer() - with temporary_optimizer(opt): - a = tuple(range(TIER2_THRESHOLD)) - total = testfunc(a) - self.assertEqual(total, sum(a)) + a = tuple(range(TIER2_THRESHOLD)) + total = testfunc(a) + self.assertEqual(total, sum(a)) ex = get_first_executor(testfunc) self.assertIsNotNone(ex) @@ -395,14 +353,12 @@ def testfunc(it): for x in it: pass - opt = _testinternalcapi.new_uop_optimizer() - with temporary_optimizer(opt): - a = [1, 2, 3] - it = iter(a) - testfunc(it) - a.append(4) - with self.assertRaises(StopIteration): - next(it) + a = [1, 2, 3] + it = iter(a) + testfunc(it) + a.append(4) + with self.assertRaises(StopIteration): + next(it) def test_call_py_exact_args(self): def testfunc(n): @@ -411,9 +367,7 @@ def dummy(x): for i in range(n): dummy(i) - opt = _testinternalcapi.new_uop_optimizer() - with temporary_optimizer(opt): - testfunc(TIER2_THRESHOLD) + testfunc(TIER2_THRESHOLD) ex = get_first_executor(testfunc) self.assertIsNotNone(ex) @@ -429,9 +383,7 @@ def testfunc(n): else: i = 1 - opt = _testinternalcapi.new_uop_optimizer() - with temporary_optimizer(opt): - testfunc(TIER2_THRESHOLD) + testfunc(TIER2_THRESHOLD) ex = get_first_executor(testfunc) self.assertIsNotNone(ex) @@ -457,9 +409,7 @@ def testfunc(n, m): x += 1000*i + j return x - opt = _testinternalcapi.new_uop_optimizer() - with temporary_optimizer(opt): - x = testfunc(TIER2_THRESHOLD, TIER2_THRESHOLD) + x = testfunc(TIER2_THRESHOLD, TIER2_THRESHOLD) self.assertEqual(x, sum(range(TIER2_THRESHOLD)) * TIER2_THRESHOLD * 1001) @@ -484,9 +434,7 @@ def testfunc(n): bits += 1 return bits - opt = _testinternalcapi.new_uop_optimizer() - with temporary_optimizer(opt): - x = testfunc(TIER2_THRESHOLD * 2) + x = testfunc(TIER2_THRESHOLD * 2) self.assertEqual(x, TIER2_THRESHOLD * 5) ex = get_first_executor(testfunc) @@ -499,16 +447,12 @@ def testfunc(n): @requires_specialization @unittest.skipIf(Py_GIL_DISABLED, "optimizer not yet supported in free-threaded builds") -@unittest.skipUnless(hasattr(_testinternalcapi, "get_optimizer"), - "Requires optimizer infrastructure") +@requires_jit_enabled @unittest.skipIf(os.getenv("PYTHON_UOPS_OPTIMIZE") == "0", "Needs uop optimizer to run.") class TestUopsOptimization(unittest.TestCase): def _run_with_optimizer(self, testfunc, arg): - res = None - opt = _testinternalcapi.new_uop_optimizer() - with temporary_optimizer(opt): - res = testfunc(arg) + res = testfunc(arg) ex = get_first_executor(testfunc) return res, ex @@ -542,10 +486,7 @@ def testfunc(loops): num += 1 return a - opt = _testinternalcapi.new_uop_optimizer() - res = None - with temporary_optimizer(opt): - res = testfunc(TIER2_THRESHOLD) + res = testfunc(TIER2_THRESHOLD) ex = get_first_executor(testfunc) self.assertIsNotNone(ex) @@ -566,10 +507,7 @@ def testfunc(loops): num += 1 return x - opt = _testinternalcapi.new_uop_optimizer() - res = None - with temporary_optimizer(opt): - res = testfunc(TIER2_THRESHOLD) + res = testfunc(TIER2_THRESHOLD) ex = get_first_executor(testfunc) self.assertIsNotNone(ex) @@ -661,16 +599,14 @@ def testfunc(n): for i in range(n): dummy(i) - opt = _testinternalcapi.new_uop_optimizer() # Trigger specialization testfunc(8) - with temporary_optimizer(opt): - del dummy - gc.collect() + del dummy + gc.collect() - def dummy(x): - return x + 2 - testfunc(32) + def dummy(x): + return x + 2 + testfunc(32) ex = get_first_executor(testfunc) # Honestly as long as it doesn't crash it's fine. @@ -703,8 +639,6 @@ def testfunc(n): x = range(i) return x - opt = _testinternalcapi.new_uop_optimizer() - _testinternalcapi.set_optimizer(opt) testfunc(_testinternalcapi.TIER2_THRESHOLD) ex = get_first_executor(testfunc) @@ -712,7 +646,7 @@ def testfunc(n): uops = get_opnames(ex) assert "_LOAD_GLOBAL_BUILTINS" not in uops assert "_LOAD_CONST_INLINE_BORROW" in uops - """)) + """), PYTHON_JIT="1") self.assertEqual(result[0].rc, 0, result) def test_float_add_constant_propagation(self): @@ -1399,9 +1333,7 @@ def testfunc(n): # Only works on functions promoted to constants global_identity(i) - opt = _testinternalcapi.new_uop_optimizer() - with temporary_optimizer(opt): - testfunc(TIER2_THRESHOLD) + testfunc(TIER2_THRESHOLD) ex = get_first_executor(testfunc) self.assertIsNotNone(ex) @@ -1488,12 +1420,12 @@ def test_decref_escapes(self): class Convert9999ToNone: def __del__(self): ns = sys._getframe(1).f_locals - if ns["i"] == 9999: + if ns["i"] == _testinternalcapi.TIER2_THRESHOLD: ns["i"] = None def crash_addition(): try: - for i in range(10000): + for i in range(_testinternalcapi.TIER2_THRESHOLD + 1): n = Convert9999ToNone() i + i # Remove guards for i. n = None # Change i. diff --git a/Lib/test/test_dis.py b/Lib/test/test_dis.py index df4bdf4a3dd312..2e149b32e5c1ec 100644 --- a/Lib/test/test_dis.py +++ b/Lib/test/test_dis.py @@ -15,7 +15,7 @@ import unittest from test.support import (captured_stdout, requires_debug_ranges, requires_specialization, cpython_only, - os_helper) + os_helper, import_helper) from test.support.bytecode_helper import BytecodeTestCase @@ -904,7 +904,7 @@ def loop_test(): LOAD_FAST 0 (i) CALL_PY_GENERAL 1 POP_TOP - JUMP_BACKWARD 16 (to L1) + JUMP_BACKWARD_{: <6} 16 (to L1) %3d L2: END_FOR POP_ITER @@ -1308,7 +1308,8 @@ def test_loop_quicken(self): # Loop can trigger a quicken where the loop is located self.code_quicken(loop_test, 4) got = self.get_disassembly(loop_test, adaptive=True) - expected = dis_loop_test_quickened_code + jit = import_helper.import_module("_testinternalcapi").jit_enabled() + expected = dis_loop_test_quickened_code.format("JIT" if jit else "NO_JIT") self.do_disassembly_compare(got, expected) @cpython_only diff --git a/Lib/test/test_opcache.py b/Lib/test/test_opcache.py index e8ea21f8179978..cc58a4b8c3cd11 100644 --- a/Lib/test/test_opcache.py +++ b/Lib/test/test_opcache.py @@ -6,7 +6,7 @@ import unittest from test.support import (threading_helper, check_impl_detail, requires_specialization, requires_specialization_ft, - cpython_only) + cpython_only, requires_jit_disabled) from test.support.import_helper import import_module # Skip this module on other interpreters, it is cpython specific: @@ -16,20 +16,6 @@ _testinternalcapi = import_module("_testinternalcapi") -def disabling_optimizer(func): - def wrapper(*args, **kwargs): - if not hasattr(_testinternalcapi, "get_optimizer"): - return func(*args, **kwargs) - old_opt = _testinternalcapi.get_optimizer() - _testinternalcapi.set_optimizer(None) - try: - return func(*args, **kwargs) - finally: - _testinternalcapi.set_optimizer(old_opt) - - return wrapper - - class TestBase(unittest.TestCase): def assert_specialized(self, f, opname): instructions = dis.get_instructions(f, adaptive=True) @@ -526,7 +512,7 @@ def f(x, y): f(None) f() - @disabling_optimizer + @requires_jit_disabled @requires_specialization_ft def test_assign_init_code(self): class MyClass: @@ -549,7 +535,7 @@ def count_args(self, *args): MyClass.__init__.__code__ = count_args.__code__ instantiate() - @disabling_optimizer + @requires_jit_disabled @requires_specialization_ft def test_push_init_frame_fails(self): def instantiate(): @@ -583,7 +569,7 @@ class TestRacesDoNotCrash(TestBase): WARMUPS = 2 WRITERS = 2 - @disabling_optimizer + @requires_jit_disabled def assert_races_do_not_crash( self, opname, get_items, read, write, *, check_items=False ): diff --git a/Lib/test/test_regrtest.py b/Lib/test/test_regrtest.py index e9ef830c848aad..969f483814d08d 100644 --- a/Lib/test/test_regrtest.py +++ b/Lib/test/test_regrtest.py @@ -1185,7 +1185,7 @@ def test_run(self): stats=TestStats(4, 1), forever=True) - @support.without_optimizer + @support.requires_jit_disabled def check_leak(self, code, what, *, run_workers=False): test = self.create_test('huntrleaks', code=code) diff --git a/Modules/_testinternalcapi.c b/Modules/_testinternalcapi.c index f3d234a7f9595e..9f38a9d7a9a5c2 100644 --- a/Modules/_testinternalcapi.c +++ b/Modules/_testinternalcapi.c @@ -950,38 +950,13 @@ get_co_framesize(PyObject *self, PyObject *arg) return PyLong_FromLong(code->co_framesize); } -#ifdef _Py_TIER2 - -static PyObject * -new_uop_optimizer(PyObject *self, PyObject *arg) -{ - return _PyOptimizer_NewUOpOptimizer(); -} - static PyObject * -set_optimizer(PyObject *self, PyObject *opt) +jit_enabled(PyObject *self, PyObject *arg) { - if (opt == Py_None) { - opt = NULL; - } - if (_Py_SetTier2Optimizer((_PyOptimizerObject*)opt) < 0) { - return NULL; - } - Py_RETURN_NONE; + return PyBool_FromLong(_PyInterpreterState_GET()->jit); } -static PyObject * -get_optimizer(PyObject *self, PyObject *Py_UNUSED(ignored)) -{ - PyObject *opt = NULL; #ifdef _Py_TIER2 - opt = (PyObject *)_Py_GetOptimizer(); -#endif - if (opt == NULL) { - Py_RETURN_NONE; - } - return opt; -} static PyObject * add_executor_dependency(PyObject *self, PyObject *args) @@ -2047,10 +2022,8 @@ static PyMethodDef module_functions[] = { {"iframe_getline", iframe_getline, METH_O, NULL}, {"iframe_getlasti", iframe_getlasti, METH_O, NULL}, {"get_co_framesize", get_co_framesize, METH_O, NULL}, + {"jit_enabled", jit_enabled, METH_NOARGS, NULL}, #ifdef _Py_TIER2 - {"get_optimizer", get_optimizer, METH_NOARGS, NULL}, - {"set_optimizer", set_optimizer, METH_O, NULL}, - {"new_uop_optimizer", new_uop_optimizer, METH_NOARGS, NULL}, {"add_executor_dependency", add_executor_dependency, METH_VARARGS, NULL}, {"invalidate_executors", invalidate_executors, METH_O, NULL}, #endif diff --git a/Objects/object.c b/Objects/object.c index a70a2c3fc2f3dd..cd48d2f75ba490 100644 --- a/Objects/object.c +++ b/Objects/object.c @@ -19,7 +19,7 @@ #include "pycore_object.h" // PyAPI_DATA() _Py_SwappedOp definition #include "pycore_object_state.h" // struct _reftracer_runtime_state #include "pycore_long.h" // _PyLong_GetZero() -#include "pycore_optimizer.h" // _PyUOpExecutor_Type, _PyUOpOptimizer_Type, ... +#include "pycore_optimizer.h" // _PyUOpExecutor_Type, ... #include "pycore_pyerrors.h" // _PyErr_Occurred() #include "pycore_pymem.h" // _PyMem_IsPtrFreed() #include "pycore_pystate.h" // _PyThreadState_GET() @@ -2379,9 +2379,6 @@ static PyTypeObject* static_types[] = { &_PyBufferWrapper_Type, &_PyContextTokenMissing_Type, &_PyCoroWrapper_Type, -#ifdef _Py_TIER2 - &_PyDefaultOptimizer_Type, -#endif &_Py_GenericAliasIterType, &_PyHamtItems_Type, &_PyHamtKeys_Type, @@ -2404,7 +2401,6 @@ static PyTypeObject* static_types[] = { &_PyUnion_Type, #ifdef _Py_TIER2 &_PyUOpExecutor_Type, - &_PyUOpOptimizer_Type, #endif &_PyWeakref_CallableProxyType, &_PyWeakref_ProxyType, diff --git a/Python/bytecodes.c b/Python/bytecodes.c index 7d463511aee41d..0d7b9f2a781019 100644 --- a/Python/bytecodes.c +++ b/Python/bytecodes.c @@ -2782,13 +2782,26 @@ dummy_func( JUMPBY(oparg); } - tier1 op(_JUMP_BACKWARD, (the_counter/1 --)) { - assert(oparg <= INSTR_OFFSET()); - JUMPBY(-oparg); - #ifdef _Py_TIER2 - #if ENABLE_SPECIALIZATION + family(JUMP_BACKWARD, 1) = { + JUMP_BACKWARD_NO_JIT, + JUMP_BACKWARD_JIT, + }; + + tier1 op(_SPECIALIZE_JUMP_BACKWARD, (--)) { + #if ENABLE_SPECIALIZATION + if (this_instr->op.code == JUMP_BACKWARD) { + this_instr->op.code = tstate->interp->jit ? JUMP_BACKWARD_JIT : JUMP_BACKWARD_NO_JIT; + // Need to re-dispatch so the warmup counter isn't off by one: + next_instr = this_instr; + DISPATCH_SAME_OPARG(); + } + #endif + } + + tier1 op(_JIT, (--)) { + #ifdef _Py_TIER2 _Py_BackoffCounter counter = this_instr[1].counter; - if (backoff_counter_triggers(counter) && this_instr->op.code == JUMP_BACKWARD) { + if (backoff_counter_triggers(counter) && this_instr->op.code == JUMP_BACKWARD_JIT) { _Py_CODEUNIT *start = this_instr; /* Back up over EXTENDED_ARGs so optimizer sees the whole instruction */ while (oparg > 255) { @@ -2811,13 +2824,25 @@ dummy_func( else { ADVANCE_ADAPTIVE_COUNTER(this_instr[1].counter); } - #endif /* ENABLE_SPECIALIZATION */ - #endif /* _Py_TIER2 */ + #endif } macro(JUMP_BACKWARD) = + unused/1 + + _SPECIALIZE_JUMP_BACKWARD + _CHECK_PERIODIC + - _JUMP_BACKWARD; + JUMP_BACKWARD_NO_INTERRUPT; + + macro(JUMP_BACKWARD_NO_JIT) = + unused/1 + + _CHECK_PERIODIC + + JUMP_BACKWARD_NO_INTERRUPT; + + macro(JUMP_BACKWARD_JIT) = + unused/1 + + _CHECK_PERIODIC + + JUMP_BACKWARD_NO_INTERRUPT + + _JIT; pseudo(JUMP, (--)) = { JUMP_FORWARD, @@ -2906,6 +2931,7 @@ dummy_func( * generator or coroutine, so we deliberately do not check it here. * (see bpo-30039). */ + assert(oparg <= INSTR_OFFSET()); JUMPBY(-oparg); } diff --git a/Python/generated_cases.c.h b/Python/generated_cases.c.h index 5dd2f37d811109..ffdad70815caef 100644 --- a/Python/generated_cases.c.h +++ b/Python/generated_cases.c.h @@ -180,6 +180,7 @@ TARGET(BINARY_OP_EXTEND) { _Py_CODEUNIT* const this_instr = frame->instr_ptr = next_instr; + (void)this_instr; next_instr += 6; INSTRUCTION_STATS(BINARY_OP_EXTEND); static_assert(INLINE_CACHE_ENTRIES_BINARY_OP == 5, "incorrect cache size"); @@ -1087,6 +1088,7 @@ TARGET(CALL_ALLOC_AND_ENTER_INIT) { _Py_CODEUNIT* const this_instr = frame->instr_ptr = next_instr; + (void)this_instr; next_instr += 4; INSTRUCTION_STATS(CALL_ALLOC_AND_ENTER_INIT); static_assert(INLINE_CACHE_ENTRIES_CALL == 3, "incorrect cache size"); @@ -1185,6 +1187,7 @@ TARGET(CALL_BOUND_METHOD_EXACT_ARGS) { _Py_CODEUNIT* const this_instr = frame->instr_ptr = next_instr; + (void)this_instr; next_instr += 4; INSTRUCTION_STATS(CALL_BOUND_METHOD_EXACT_ARGS); static_assert(INLINE_CACHE_ENTRIES_CALL == 3, "incorrect cache size"); @@ -1288,6 +1291,7 @@ TARGET(CALL_BOUND_METHOD_GENERAL) { _Py_CODEUNIT* const this_instr = frame->instr_ptr = next_instr; + (void)this_instr; next_instr += 4; INSTRUCTION_STATS(CALL_BOUND_METHOD_GENERAL); static_assert(INLINE_CACHE_ENTRIES_CALL == 3, "incorrect cache size"); @@ -2113,6 +2117,7 @@ TARGET(CALL_KW_BOUND_METHOD) { _Py_CODEUNIT* const this_instr = frame->instr_ptr = next_instr; + (void)this_instr; next_instr += 4; INSTRUCTION_STATS(CALL_KW_BOUND_METHOD); static_assert(INLINE_CACHE_ENTRIES_CALL_KW == 3, "incorrect cache size"); @@ -2313,6 +2318,7 @@ TARGET(CALL_KW_PY) { _Py_CODEUNIT* const this_instr = frame->instr_ptr = next_instr; + (void)this_instr; next_instr += 4; INSTRUCTION_STATS(CALL_KW_PY); static_assert(INLINE_CACHE_ENTRIES_CALL_KW == 3, "incorrect cache size"); @@ -2890,6 +2896,7 @@ TARGET(CALL_PY_EXACT_ARGS) { _Py_CODEUNIT* const this_instr = frame->instr_ptr = next_instr; + (void)this_instr; next_instr += 4; INSTRUCTION_STATS(CALL_PY_EXACT_ARGS); static_assert(INLINE_CACHE_ENTRIES_CALL == 3, "incorrect cache size"); @@ -2971,6 +2978,7 @@ TARGET(CALL_PY_GENERAL) { _Py_CODEUNIT* const this_instr = frame->instr_ptr = next_instr; + (void)this_instr; next_instr += 4; INSTRUCTION_STATS(CALL_PY_GENERAL); static_assert(INLINE_CACHE_ENTRIES_CALL == 3, "incorrect cache size"); @@ -5160,10 +5168,55 @@ } TARGET(JUMP_BACKWARD) { + frame->instr_ptr = next_instr; + next_instr += 2; + INSTRUCTION_STATS(JUMP_BACKWARD); + PREDICTED_JUMP_BACKWARD:; + _Py_CODEUNIT* const this_instr = next_instr - 2; + (void)this_instr; + /* Skip 1 cache entry */ + // _SPECIALIZE_JUMP_BACKWARD + { + #if ENABLE_SPECIALIZATION + if (this_instr->op.code == JUMP_BACKWARD) { + this_instr->op.code = tstate->interp->jit ? JUMP_BACKWARD_JIT : JUMP_BACKWARD_NO_JIT; + // Need to re-dispatch so the warmup counter isn't off by one: + next_instr = this_instr; + DISPATCH_SAME_OPARG(); + } + #endif + } + // _CHECK_PERIODIC + { + _Py_CHECK_EMSCRIPTEN_SIGNALS_PERIODICALLY(); + QSBR_QUIESCENT_STATE(tstate); + if (_Py_atomic_load_uintptr_relaxed(&tstate->eval_breaker) & _PY_EVAL_EVENTS_MASK) { + _PyFrame_SetStackPointer(frame, stack_pointer); + int err = _Py_HandlePending(tstate); + stack_pointer = _PyFrame_GetStackPointer(frame); + if (err != 0) goto error; + } + } + // _JUMP_BACKWARD_NO_INTERRUPT + { + /* This bytecode is used in the `yield from` or `await` loop. + * If there is an interrupt, we want it handled in the innermost + * generator or coroutine, so we deliberately do not check it here. + * (see bpo-30039). + */ + assert(oparg <= INSTR_OFFSET()); + JUMPBY(-oparg); + } + DISPATCH(); + } + + TARGET(JUMP_BACKWARD_JIT) { _Py_CODEUNIT* const this_instr = frame->instr_ptr = next_instr; (void)this_instr; next_instr += 2; - INSTRUCTION_STATS(JUMP_BACKWARD); + INSTRUCTION_STATS(JUMP_BACKWARD_JIT); + static_assert(1 == 1, "incorrect cache size"); + /* Skip 1 cache entry */ // _CHECK_PERIODIC { _Py_CHECK_EMSCRIPTEN_SIGNALS_PERIODICALLY(); @@ -5175,16 +5228,21 @@ if (err != 0) goto error; } } - // _JUMP_BACKWARD + // _JUMP_BACKWARD_NO_INTERRUPT { - uint16_t the_counter = read_u16(&this_instr[1].cache); - (void)the_counter; + /* This bytecode is used in the `yield from` or `await` loop. + * If there is an interrupt, we want it handled in the innermost + * generator or coroutine, so we deliberately do not check it here. + * (see bpo-30039). + */ assert(oparg <= INSTR_OFFSET()); JUMPBY(-oparg); + } + // _JIT + { #ifdef _Py_TIER2 - #if ENABLE_SPECIALIZATION _Py_BackoffCounter counter = this_instr[1].counter; - if (backoff_counter_triggers(counter) && this_instr->op.code == JUMP_BACKWARD) { + if (backoff_counter_triggers(counter) && this_instr->op.code == JUMP_BACKWARD_JIT) { _Py_CODEUNIT *start = this_instr; /* Back up over EXTENDED_ARGs so optimizer sees the whole instruction */ while (oparg > 255) { @@ -5211,8 +5269,7 @@ else { ADVANCE_ADAPTIVE_COUNTER(this_instr[1].counter); } - #endif /* ENABLE_SPECIALIZATION */ - #endif /* _Py_TIER2 */ + #endif } DISPATCH(); } @@ -5226,10 +5283,41 @@ * generator or coroutine, so we deliberately do not check it here. * (see bpo-30039). */ + assert(oparg <= INSTR_OFFSET()); JUMPBY(-oparg); DISPATCH(); } + TARGET(JUMP_BACKWARD_NO_JIT) { + frame->instr_ptr = next_instr; + next_instr += 2; + INSTRUCTION_STATS(JUMP_BACKWARD_NO_JIT); + static_assert(1 == 1, "incorrect cache size"); + /* Skip 1 cache entry */ + // _CHECK_PERIODIC + { + _Py_CHECK_EMSCRIPTEN_SIGNALS_PERIODICALLY(); + QSBR_QUIESCENT_STATE(tstate); + if (_Py_atomic_load_uintptr_relaxed(&tstate->eval_breaker) & _PY_EVAL_EVENTS_MASK) { + _PyFrame_SetStackPointer(frame, stack_pointer); + int err = _Py_HandlePending(tstate); + stack_pointer = _PyFrame_GetStackPointer(frame); + if (err != 0) goto error; + } + } + // _JUMP_BACKWARD_NO_INTERRUPT + { + /* This bytecode is used in the `yield from` or `await` loop. + * If there is an interrupt, we want it handled in the innermost + * generator or coroutine, so we deliberately do not check it here. + * (see bpo-30039). + */ + assert(oparg <= INSTR_OFFSET()); + JUMPBY(-oparg); + } + DISPATCH(); + } + TARGET(JUMP_FORWARD) { frame->instr_ptr = next_instr; next_instr += 1; @@ -5369,6 +5457,7 @@ TARGET(LOAD_ATTR_CLASS) { _Py_CODEUNIT* const this_instr = frame->instr_ptr = next_instr; + (void)this_instr; next_instr += 10; INSTRUCTION_STATS(LOAD_ATTR_CLASS); static_assert(INLINE_CACHE_ENTRIES_LOAD_ATTR == 9, "incorrect cache size"); @@ -5407,6 +5496,7 @@ TARGET(LOAD_ATTR_CLASS_WITH_METACLASS_CHECK) { _Py_CODEUNIT* const this_instr = frame->instr_ptr = next_instr; + (void)this_instr; next_instr += 10; INSTRUCTION_STATS(LOAD_ATTR_CLASS_WITH_METACLASS_CHECK); static_assert(INLINE_CACHE_ENTRIES_LOAD_ATTR == 9, "incorrect cache size"); @@ -5451,6 +5541,7 @@ TARGET(LOAD_ATTR_GETATTRIBUTE_OVERRIDDEN) { _Py_CODEUNIT* const this_instr = frame->instr_ptr = next_instr; + (void)this_instr; next_instr += 10; INSTRUCTION_STATS(LOAD_ATTR_GETATTRIBUTE_OVERRIDDEN); static_assert(INLINE_CACHE_ENTRIES_LOAD_ATTR == 9, "incorrect cache size"); @@ -5487,6 +5578,7 @@ TARGET(LOAD_ATTR_INSTANCE_VALUE) { _Py_CODEUNIT* const this_instr = frame->instr_ptr = next_instr; + (void)this_instr; next_instr += 10; INSTRUCTION_STATS(LOAD_ATTR_INSTANCE_VALUE); static_assert(INLINE_CACHE_ENTRIES_LOAD_ATTR == 9, "incorrect cache size"); @@ -5540,6 +5632,7 @@ TARGET(LOAD_ATTR_METHOD_LAZY_DICT) { _Py_CODEUNIT* const this_instr = frame->instr_ptr = next_instr; + (void)this_instr; next_instr += 10; INSTRUCTION_STATS(LOAD_ATTR_METHOD_LAZY_DICT); static_assert(INLINE_CACHE_ENTRIES_LOAD_ATTR == 9, "incorrect cache size"); @@ -5583,6 +5676,7 @@ TARGET(LOAD_ATTR_METHOD_NO_DICT) { _Py_CODEUNIT* const this_instr = frame->instr_ptr = next_instr; + (void)this_instr; next_instr += 10; INSTRUCTION_STATS(LOAD_ATTR_METHOD_NO_DICT); static_assert(INLINE_CACHE_ENTRIES_LOAD_ATTR == 9, "incorrect cache size"); @@ -5619,6 +5713,7 @@ TARGET(LOAD_ATTR_METHOD_WITH_VALUES) { _Py_CODEUNIT* const this_instr = frame->instr_ptr = next_instr; + (void)this_instr; next_instr += 10; INSTRUCTION_STATS(LOAD_ATTR_METHOD_WITH_VALUES); static_assert(INLINE_CACHE_ENTRIES_LOAD_ATTR == 9, "incorrect cache size"); @@ -5669,6 +5764,7 @@ TARGET(LOAD_ATTR_MODULE) { _Py_CODEUNIT* const this_instr = frame->instr_ptr = next_instr; + (void)this_instr; next_instr += 10; INSTRUCTION_STATS(LOAD_ATTR_MODULE); static_assert(INLINE_CACHE_ENTRIES_LOAD_ATTR == 9, "incorrect cache size"); @@ -5724,6 +5820,7 @@ TARGET(LOAD_ATTR_NONDESCRIPTOR_NO_DICT) { _Py_CODEUNIT* const this_instr = frame->instr_ptr = next_instr; + (void)this_instr; next_instr += 10; INSTRUCTION_STATS(LOAD_ATTR_NONDESCRIPTOR_NO_DICT); static_assert(INLINE_CACHE_ENTRIES_LOAD_ATTR == 9, "incorrect cache size"); @@ -5755,6 +5852,7 @@ TARGET(LOAD_ATTR_NONDESCRIPTOR_WITH_VALUES) { _Py_CODEUNIT* const this_instr = frame->instr_ptr = next_instr; + (void)this_instr; next_instr += 10; INSTRUCTION_STATS(LOAD_ATTR_NONDESCRIPTOR_WITH_VALUES); static_assert(INLINE_CACHE_ENTRIES_LOAD_ATTR == 9, "incorrect cache size"); @@ -5799,6 +5897,7 @@ TARGET(LOAD_ATTR_PROPERTY) { _Py_CODEUNIT* const this_instr = frame->instr_ptr = next_instr; + (void)this_instr; next_instr += 10; INSTRUCTION_STATS(LOAD_ATTR_PROPERTY); static_assert(INLINE_CACHE_ENTRIES_LOAD_ATTR == 9, "incorrect cache size"); @@ -5864,6 +5963,7 @@ TARGET(LOAD_ATTR_SLOT) { _Py_CODEUNIT* const this_instr = frame->instr_ptr = next_instr; + (void)this_instr; next_instr += 10; INSTRUCTION_STATS(LOAD_ATTR_SLOT); static_assert(INLINE_CACHE_ENTRIES_LOAD_ATTR == 9, "incorrect cache size"); @@ -5909,6 +6009,7 @@ TARGET(LOAD_ATTR_WITH_HINT) { _Py_CODEUNIT* const this_instr = frame->instr_ptr = next_instr; + (void)this_instr; next_instr += 10; INSTRUCTION_STATS(LOAD_ATTR_WITH_HINT); static_assert(INLINE_CACHE_ENTRIES_LOAD_ATTR == 9, "incorrect cache size"); @@ -6326,6 +6427,7 @@ TARGET(LOAD_GLOBAL_BUILTIN) { _Py_CODEUNIT* const this_instr = frame->instr_ptr = next_instr; + (void)this_instr; next_instr += 5; INSTRUCTION_STATS(LOAD_GLOBAL_BUILTIN); static_assert(INLINE_CACHE_ENTRIES_LOAD_GLOBAL == 4, "incorrect cache size"); @@ -6380,6 +6482,7 @@ TARGET(LOAD_GLOBAL_MODULE) { _Py_CODEUNIT* const this_instr = frame->instr_ptr = next_instr; + (void)this_instr; next_instr += 5; INSTRUCTION_STATS(LOAD_GLOBAL_MODULE); static_assert(INLINE_CACHE_ENTRIES_LOAD_GLOBAL == 4, "incorrect cache size"); @@ -7557,6 +7660,7 @@ TARGET(STORE_ATTR_INSTANCE_VALUE) { _Py_CODEUNIT* const this_instr = frame->instr_ptr = next_instr; + (void)this_instr; next_instr += 5; INSTRUCTION_STATS(STORE_ATTR_INSTANCE_VALUE); static_assert(INLINE_CACHE_ENTRIES_STORE_ATTR == 4, "incorrect cache size"); @@ -7615,6 +7719,7 @@ TARGET(STORE_ATTR_SLOT) { _Py_CODEUNIT* const this_instr = frame->instr_ptr = next_instr; + (void)this_instr; next_instr += 5; INSTRUCTION_STATS(STORE_ATTR_SLOT); static_assert(INLINE_CACHE_ENTRIES_STORE_ATTR == 4, "incorrect cache size"); @@ -7652,6 +7757,7 @@ TARGET(STORE_ATTR_WITH_HINT) { _Py_CODEUNIT* const this_instr = frame->instr_ptr = next_instr; + (void)this_instr; next_instr += 5; INSTRUCTION_STATS(STORE_ATTR_WITH_HINT); static_assert(INLINE_CACHE_ENTRIES_STORE_ATTR == 4, "incorrect cache size"); @@ -8046,6 +8152,7 @@ TARGET(TO_BOOL_ALWAYS_TRUE) { _Py_CODEUNIT* const this_instr = frame->instr_ptr = next_instr; + (void)this_instr; next_instr += 4; INSTRUCTION_STATS(TO_BOOL_ALWAYS_TRUE); static_assert(INLINE_CACHE_ENTRIES_TO_BOOL == 3, "incorrect cache size"); diff --git a/Python/opcode_targets.h b/Python/opcode_targets.h index 873378b43374c5..09a834bb38fa67 100644 --- a/Python/opcode_targets.h +++ b/Python/opcode_targets.h @@ -194,6 +194,8 @@ static void *opcode_targets[256] = { &&TARGET_FOR_ITER_LIST, &&TARGET_FOR_ITER_RANGE, &&TARGET_FOR_ITER_TUPLE, + &&TARGET_JUMP_BACKWARD_JIT, + &&TARGET_JUMP_BACKWARD_NO_JIT, &&TARGET_LOAD_ATTR_CLASS, &&TARGET_LOAD_ATTR_CLASS_WITH_METACLASS_CHECK, &&TARGET_LOAD_ATTR_GETATTRIBUTE_OVERRIDDEN, @@ -232,8 +234,6 @@ static void *opcode_targets[256] = { &&_unknown_opcode, &&_unknown_opcode, &&_unknown_opcode, - &&_unknown_opcode, - &&_unknown_opcode, &&TARGET_INSTRUMENTED_END_FOR, &&TARGET_INSTRUMENTED_POP_ITER, &&TARGET_INSTRUMENTED_END_SEND, diff --git a/Python/optimizer.c b/Python/optimizer.c index e3950843964f11..b16695a3c3d33e 100644 --- a/Python/optimizer.c +++ b/Python/optimizer.c @@ -91,70 +91,13 @@ insert_executor(PyCodeObject *code, _Py_CODEUNIT *instr, int index, _PyExecutorO instr->op.arg = index; } - -static int -never_optimize( - _PyOptimizerObject* self, - _PyInterpreterFrame *frame, - _Py_CODEUNIT *instr, - _PyExecutorObject **exec, - int Py_UNUSED(stack_entries), - bool Py_UNUSED(progress_needed)) -{ - // This may be called if the optimizer is reset - return 0; -} - -PyTypeObject _PyDefaultOptimizer_Type = { - PyVarObject_HEAD_INIT(&PyType_Type, 0) - .tp_name = "noop_optimizer", - .tp_basicsize = sizeof(_PyOptimizerObject), - .tp_itemsize = 0, - .tp_flags = Py_TPFLAGS_DEFAULT | Py_TPFLAGS_DISALLOW_INSTANTIATION, -}; - -static _PyOptimizerObject _PyOptimizer_Default = { - PyObject_HEAD_INIT(&_PyDefaultOptimizer_Type) - .optimize = never_optimize, -}; - -_PyOptimizerObject * -_Py_GetOptimizer(void) -{ - PyInterpreterState *interp = _PyInterpreterState_GET(); - if (interp->optimizer == &_PyOptimizer_Default) { - return NULL; - } - Py_INCREF(interp->optimizer); - return interp->optimizer; -} - static _PyExecutorObject * make_executor_from_uops(_PyUOpInstruction *buffer, int length, const _PyBloomFilter *dependencies); -_PyOptimizerObject * -_Py_SetOptimizer(PyInterpreterState *interp, _PyOptimizerObject *optimizer) -{ - if (optimizer == NULL) { - optimizer = &_PyOptimizer_Default; - } - _PyOptimizerObject *old = interp->optimizer; - if (old == NULL) { - old = &_PyOptimizer_Default; - } - Py_INCREF(optimizer); - interp->optimizer = optimizer; - return old; -} - -int -_Py_SetTier2Optimizer(_PyOptimizerObject *optimizer) -{ - PyInterpreterState *interp = _PyInterpreterState_GET(); - _PyOptimizerObject *old = _Py_SetOptimizer(interp, optimizer); - Py_XDECREF(old); - return old == NULL ? -1 : 0; -} +static int +uop_optimize(_PyInterpreterFrame *frame, _Py_CODEUNIT *instr, + _PyExecutorObject **exec_ptr, int curr_stackentries, + bool progress_needed); /* Returns 1 if optimized, 0 if not optimized, and -1 for an error. * If optimized, *executor_ptr contains a new reference to the executor @@ -164,6 +107,7 @@ _PyOptimizer_Optimize( _PyInterpreterFrame *frame, _Py_CODEUNIT *start, _PyStackRef *stack_pointer, _PyExecutorObject **executor_ptr, int chain_depth) { + assert(_PyInterpreterState_GET()->jit); // The first executor in a chain and the MAX_CHAIN_DEPTH'th executor *must* // make progress in order to avoid infinite loops or excessively-long // side-exit chains. We can only insert the executor into the bytecode if @@ -172,12 +116,10 @@ _PyOptimizer_Optimize( bool progress_needed = chain_depth == 0; PyCodeObject *code = _PyFrame_GetCode(frame); assert(PyCode_Check(code)); - PyInterpreterState *interp = _PyInterpreterState_GET(); if (progress_needed && !has_space_for_executor(code, start)) { return 0; } - _PyOptimizerObject *opt = interp->optimizer; - int err = opt->optimize(opt, frame, start, executor_ptr, (int)(stack_pointer - _PyFrame_Stackbase(frame)), progress_needed); + int err = uop_optimize(frame, start, executor_ptr, (int)(stack_pointer - _PyFrame_Stackbase(frame)), progress_needed); if (err <= 0) { return err; } @@ -684,6 +626,7 @@ translate_bytecode_to_trace( } case JUMP_BACKWARD: + case JUMP_BACKWARD_JIT: ADD_TO_TRACE(_CHECK_PERIODIC, 0, 0, target); _Py_FALLTHROUGH; case JUMP_BACKWARD_NO_INTERRUPT: @@ -1241,7 +1184,6 @@ int effective_trace_length(_PyUOpInstruction *buffer, int length) static int uop_optimize( - _PyOptimizerObject *self, _PyInterpreterFrame *frame, _Py_CODEUNIT *instr, _PyExecutorObject **exec_ptr, @@ -1299,31 +1241,6 @@ uop_optimize( return 1; } -static void -uop_opt_dealloc(PyObject *self) { - PyObject_Free(self); -} - -PyTypeObject _PyUOpOptimizer_Type = { - PyVarObject_HEAD_INIT(&PyType_Type, 0) - .tp_name = "uop_optimizer", - .tp_basicsize = sizeof(_PyOptimizerObject), - .tp_itemsize = 0, - .tp_flags = Py_TPFLAGS_DEFAULT | Py_TPFLAGS_DISALLOW_INSTANTIATION, - .tp_dealloc = uop_opt_dealloc, -}; - -PyObject * -_PyOptimizer_NewUOpOptimizer(void) -{ - _PyOptimizerObject *opt = PyObject_New(_PyOptimizerObject, &_PyUOpOptimizer_Type); - if (opt == NULL) { - return NULL; - } - opt->optimize = uop_optimize; - return (PyObject *)opt; -} - /***************************************** * Executor management diff --git a/Python/pylifecycle.c b/Python/pylifecycle.c index f357ddfbcfb033..00a98af998cfce 100644 --- a/Python/pylifecycle.c +++ b/Python/pylifecycle.c @@ -1306,14 +1306,7 @@ init_interp_main(PyThreadState *tstate) } else #endif { - PyObject *opt = _PyOptimizer_NewUOpOptimizer(); - if (opt == NULL) { - return _PyStatus_ERR("can't initialize optimizer"); - } - if (_Py_SetTier2Optimizer((_PyOptimizerObject *)opt)) { - return _PyStatus_ERR("can't install optimizer"); - } - Py_DECREF(opt); + interp->jit = true; } } } @@ -1665,11 +1658,10 @@ finalize_modules(PyThreadState *tstate) { PyInterpreterState *interp = tstate->interp; + // Invalidate all executors and turn off JIT: + interp->jit = false; #ifdef _Py_TIER2 - // Invalidate all executors and turn off tier 2 optimizer _Py_Executors_InvalidateAll(interp, 0); - _PyOptimizerObject *old = _Py_SetOptimizer(interp, NULL); - Py_XDECREF(old); #endif // Stop watching __builtin__ modifications diff --git a/Python/pystate.c b/Python/pystate.c index 26047edb459480..e6770ef40df740 100644 --- a/Python/pystate.c +++ b/Python/pystate.c @@ -655,11 +655,9 @@ init_interpreter(PyInterpreterState *interp, } interp->sys_profile_initialized = false; interp->sys_trace_initialized = false; -#ifdef _Py_TIER2 - (void)_Py_SetOptimizer(interp, NULL); + interp->jit = false; interp->executor_list_head = NULL; interp->trace_run_counter = JIT_CLEANUP_THRESHOLD; -#endif if (interp != &runtime->_main_interpreter) { /* Fix the self-referential, statically initialized fields. */ interp->dtoa = (struct _dtoa_state)_dtoa_state_INIT(interp); @@ -829,12 +827,6 @@ interpreter_clear(PyInterpreterState *interp, PyThreadState *tstate) tstate->_status.cleared = 0; } -#ifdef _Py_TIER2 - _PyOptimizerObject *old = _Py_SetOptimizer(interp, NULL); - assert(old != NULL); - Py_DECREF(old); -#endif - /* It is possible that any of the objects below have a finalizer that runs Python code or otherwise relies on a thread state or even the interpreter state. For now we trust that isn't diff --git a/Python/sysmodule.c b/Python/sysmodule.c index 887591a681b25c..7e4cb45af05672 100644 --- a/Python/sysmodule.c +++ b/Python/sysmodule.c @@ -2265,9 +2265,7 @@ sys_activate_stack_trampoline_impl(PyObject *module, const char *backend) { #ifdef PY_HAVE_PERF_TRAMPOLINE #ifdef _Py_JIT - _PyOptimizerObject* optimizer = _Py_GetOptimizer(); - if (optimizer != NULL) { - Py_DECREF(optimizer); + if (_PyInterpreterState_GET()->jit) { PyErr_SetString(PyExc_ValueError, "Cannot activate the perf trampoline if the JIT is active"); return NULL; } diff --git a/Tools/c-analyzer/cpython/ignored.tsv b/Tools/c-analyzer/cpython/ignored.tsv index fbb84fc7950fae..415d20e5b7fabb 100644 --- a/Tools/c-analyzer/cpython/ignored.tsv +++ b/Tools/c-analyzer/cpython/ignored.tsv @@ -388,9 +388,7 @@ Python/sysmodule.c - perf_map_state - Python/sysmodule.c - _PySys_ImplCacheTag - Python/sysmodule.c - _PySys_ImplName - Python/sysmodule.c - whatstrings - -Python/optimizer.c - _PyDefaultOptimizer_Type - Python/optimizer.c - _PyUOpExecutor_Type - -Python/optimizer.c - _PyUOpOptimizer_Type - Python/optimizer.c - _PyOptimizer_Default - Python/optimizer.c - _ColdExit_Type - Python/optimizer.c - Py_FatalErrorExecutor - diff --git a/Tools/cases_generator/tier1_generator.py b/Tools/cases_generator/tier1_generator.py index 59ce5c95852d28..13430524b26dcd 100644 --- a/Tools/cases_generator/tier1_generator.py +++ b/Tools/cases_generator/tier1_generator.py @@ -202,7 +202,7 @@ def generate_tier1_cases( needs_this = uses_this(inst) out.emit("\n") out.emit(f"TARGET({name}) {{\n") - unused_guard = "(void)this_instr;\n" if inst.family is None else "" + unused_guard = "(void)this_instr;\n" if inst.properties.needs_prev: out.emit(f"_Py_CODEUNIT* const prev_instr = frame->instr_ptr;\n") if needs_this and not inst.is_target: