-
-
Notifications
You must be signed in to change notification settings - Fork 31.1k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
gh-89279: In ceval.c, redefine some macros for speed #32387
Changes from 23 commits
0be9d62
abd9118
1724056
50e3d7b
59da3bf
ab6660e
ae6b53c
01cff81
69da380
2894f6e
4b70976
5617fcd
a901b91
cceb531
26e8107
11084c0
b7dfcdc
9a5c57c
a8cba6e
c43ec56
9a15194
5c992d2
7b342e4
3b21c52
41cb067
87aaf81
3508f45
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
Improve interpreter performance on Windows by inlining a few specific macros. |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -44,6 +44,34 @@ | |
# error "ceval.c must be build with Py_BUILD_CORE define for best performance" | ||
#endif | ||
|
||
#ifndef Py_DEBUG | ||
// GH-89279: The MSVC compiler does not inline these static inline functions | ||
// in PGO build in _PyEval_EvalFrameDefault(), because this function is over | ||
// the limit of PGO, and that limit cannot be configured. | ||
// Define them as macros to make sure that they are always inlined by the | ||
// preprocessor. | ||
|
||
#undef Py_DECREF | ||
#define Py_DECREF(arg) do { PyObject *op = _PyObject_CAST(arg); if (--op->ob_refcnt == 0) { destructor d = Py_TYPE(op)->tp_dealloc; (*d)(op); } } while (0) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Such long line is one of the reason why I wrote PEP 670 :-) I propose to make the code more readable by putting one statement per line, as done in static inline functions: #ifndef Py_DEBUG
// GH-89279: The MSVC compiler does not inline these static inline functions
// in PGO build in _PyEval_EvalFrameDefault(), because this function is over
// the limit of PGO, and that limit cannot be configured.
// Define them as macros to make sure that they are always inlined by the
// preprocessor.
#undef Py_DECREF
#define Py_DECREF(arg) \
do { \
PyObject *op = _PyObject_CAST(arg); \
if (--op->ob_refcnt == 0) { \
destructor dealloc = Py_TYPE(op)->tp_dealloc; \
(*dealloc)(op); \
} \
} while (0)
#undef Py_XDECREF
#define Py_XDECREF(arg) \
do { \
PyObject *xop = _PyObject_CAST(arg); \
if (xop != NULL) { \
Py_DECREF(xop); \
} \
} while (0)
#undef Py_IS_TYPE
#define Py_IS_TYPE(ob, type) \
(_PyObject_CAST_CONST(ob)->ob_type == (type))
#endif
// GH-89279: Similar to above, force inlining by using a macro.
#if defined(_MSC_VER) && SIZEOF_INT == 4
#define _Py_atomic_load_relaxed_int32(ATOMIC_VAL) \
(assert(sizeof((ATOMIC_VAL)->_value) == 4), \
*((volatile int*)&((ATOMIC_VAL)->_value)))
#else
#define _Py_atomic_load_relaxed_int32(ATOMIC_VAL) \
_Py_atomic_load_relaxed(ATOMIC_VAL)
#endif I declared Py_XDECREF just after Py_DECREF. I also renamed "op1" to "xop" in Py_XDECREF. UPDATE: Oh, and to copy/paste Py_IS_TYPE(), I used _PyObject_CAST_CONST(). |
||
|
||
#undef Py_IS_TYPE | ||
#define Py_IS_TYPE(ob, type) (_PyObject_CAST(ob)->ob_type == (type)) | ||
|
||
#undef Py_XDECREF | ||
#define Py_XDECREF(arg) do { PyObject *op1 = _PyObject_CAST(arg); if (op1 != NULL) { Py_DECREF(op1); } } while (0) | ||
|
||
#undef _Py_DECREF_SPECIALIZED | ||
#define _Py_DECREF_SPECIALIZED(arg, dealloc) do { PyObject *op = _PyObject_CAST(arg); if (--op->ob_refcnt == 0) { (dealloc)(op); } } while (0) | ||
#endif | ||
gvanrossum marked this conversation as resolved.
Show resolved
Hide resolved
|
||
|
||
// GH-89279: Similar to above, force inlining by using a macro. | ||
#if defined(_MSC_VER) && SIZEOF_INT == 4 | ||
#define _Py_atomic_load_relaxed_int32(ATOMIC_VAL) (assert(sizeof((ATOMIC_VAL)->_value) == 4), *((volatile int*)&((ATOMIC_VAL)->_value))) | ||
gvanrossum marked this conversation as resolved.
Show resolved
Hide resolved
|
||
#else | ||
#define _Py_atomic_load_relaxed_int32(ATOMIC_VAL) _Py_atomic_load_relaxed(ATOMIC_VAL) | ||
#endif | ||
|
||
|
||
/* Forward declarations */ | ||
static PyObject *trace_call_function( | ||
PyThreadState *tstate, PyObject *callable, PyObject **stack, | ||
|
@@ -192,10 +220,10 @@ COMPUTE_EVAL_BREAKER(PyInterpreterState *interp, | |
struct _ceval_state *ceval2) | ||
{ | ||
_Py_atomic_store_relaxed(&ceval2->eval_breaker, | ||
_Py_atomic_load_relaxed(&ceval2->gil_drop_request) | ||
| (_Py_atomic_load_relaxed(&ceval->signals_pending) | ||
_Py_atomic_load_relaxed_int32(&ceval2->gil_drop_request) | ||
| (_Py_atomic_load_relaxed_int32(&ceval->signals_pending) | ||
&& _Py_ThreadCanHandleSignals(interp)) | ||
| (_Py_atomic_load_relaxed(&ceval2->pending.calls_to_do) | ||
| (_Py_atomic_load_relaxed_int32(&ceval2->pending.calls_to_do) | ||
&& _Py_ThreadCanHandlePendingCalls()) | ||
| ceval2->pending.async_exc); | ||
} | ||
|
@@ -740,7 +768,7 @@ _Py_FinishPendingCalls(PyThreadState *tstate) | |
|
||
struct _pending_calls *pending = &tstate->interp->ceval.pending; | ||
|
||
if (!_Py_atomic_load_relaxed(&(pending->calls_to_do))) { | ||
if (!_Py_atomic_load_relaxed_int32(&(pending->calls_to_do))) { | ||
return; | ||
} | ||
|
||
|
@@ -1187,22 +1215,22 @@ eval_frame_handle_pending(PyThreadState *tstate) | |
struct _ceval_runtime_state *ceval = &runtime->ceval; | ||
|
||
/* Pending signals */ | ||
if (_Py_atomic_load_relaxed(&ceval->signals_pending)) { | ||
if (_Py_atomic_load_relaxed_int32(&ceval->signals_pending)) { | ||
if (handle_signals(tstate) != 0) { | ||
return -1; | ||
} | ||
} | ||
|
||
/* Pending calls */ | ||
struct _ceval_state *ceval2 = &tstate->interp->ceval; | ||
if (_Py_atomic_load_relaxed(&ceval2->pending.calls_to_do)) { | ||
if (_Py_atomic_load_relaxed_int32(&ceval2->pending.calls_to_do)) { | ||
if (make_pending_calls(tstate->interp) != 0) { | ||
return -1; | ||
} | ||
} | ||
|
||
/* GIL drop request */ | ||
if (_Py_atomic_load_relaxed(&ceval2->gil_drop_request)) { | ||
if (_Py_atomic_load_relaxed_int32(&ceval2->gil_drop_request)) { | ||
/* Give another thread a chance */ | ||
if (_PyThreadState_Swap(&runtime->gilstate, NULL) != tstate) { | ||
Py_FatalError("tstate mix-up"); | ||
|
@@ -1360,7 +1388,7 @@ eval_frame_handle_pending(PyThreadState *tstate) | |
|
||
#define CHECK_EVAL_BREAKER() \ | ||
_Py_CHECK_EMSCRIPTEN_SIGNALS_PERIODICALLY(); \ | ||
if (_Py_atomic_load_relaxed(eval_breaker)) { \ | ||
if (_Py_atomic_load_relaxed_int32(eval_breaker)) { \ | ||
goto handle_eval_breaker; \ | ||
} | ||
|
||
|
@@ -1640,10 +1668,8 @@ typedef struct { | |
PyObject *kwnames; | ||
} CallShape; | ||
|
||
static inline bool | ||
is_method(PyObject **stack_pointer, int args) { | ||
return PEEK(args+2) != NULL; | ||
} | ||
// GH-89279: Must be a macro to be sure it's inlined by MSVC. | ||
#define is_method(stack_pointer, args) (PEEK((args)+2) != NULL) | ||
gvanrossum marked this conversation as resolved.
Show resolved
Hide resolved
gvanrossum marked this conversation as resolved.
Show resolved
Hide resolved
|
||
|
||
#define KWNAMES_LEN() \ | ||
(call_shape.kwnames == NULL ? 0 : ((int)PyTuple_GET_SIZE(call_shape.kwnames))) | ||
|
@@ -1794,7 +1820,7 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, _PyInterpreterFrame *frame, int | |
PREDICTED(RESUME_QUICK); | ||
assert(tstate->cframe == &cframe); | ||
assert(frame == cframe.current_frame); | ||
if (_Py_atomic_load_relaxed(eval_breaker) && oparg < 2) { | ||
if (_Py_atomic_load_relaxed_int32(eval_breaker) && oparg < 2) { | ||
goto handle_eval_breaker; | ||
} | ||
DISPATCH(); | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
if only MSVC is affected, only redefine these macros for MSVC, no?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I prefer us running the same code on all platforms if possible, so that it's less likely that someone accidentally breaks the Windows version by want appears to be an innocent change on Linux.
(I have to make an exception for load_relaxed because it is heavily platform-specific.)
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
To be extra safe, you may also check Py_REF_DEBUG, even if Py_REF_DEBUG should not be defined if Py_DEBUG is defined.
Or maybe object.h should fail with
#error
with Py_REF_DEBUG is defined whereas Py_DEBUG is not.