-
-
Notifications
You must be signed in to change notification settings - Fork 31.1k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
gh-89279: In ceval.c, redefine some macros for speed #32387
Changes from 7 commits
0be9d62
abd9118
1724056
50e3d7b
59da3bf
ab6660e
ae6b53c
01cff81
69da380
2894f6e
4b70976
5617fcd
a901b91
cceb531
26e8107
11084c0
b7dfcdc
9a5c57c
a8cba6e
c43ec56
9a15194
5c992d2
7b342e4
3b21c52
41cb067
87aaf81
3508f45
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -44,6 +44,23 @@ | |
# error "ceval.c must be build with Py_BUILD_CORE define for best performance" | ||
#endif | ||
|
||
#if !defined(Py_DEBUG) | ||
// bpo-45116: The MSVC compiler fails to inline these in PGO build, | ||
// and they're kind of important for performance. | ||
|
||
#undef Py_DECREF | ||
#define Py_DECREF(arg) do { PyObject *op = _PyObject_CAST(arg); if (--op->ob_refcnt == 0) { destructor d = Py_TYPE(op)->tp_dealloc; (*d)(op); } } while (0) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Such long line is one of the reason why I wrote PEP 670 :-) I propose to make the code more readable by putting one statement per line, as done in static inline functions: #ifndef Py_DEBUG
// GH-89279: The MSVC compiler does not inline these static inline functions
// in PGO build in _PyEval_EvalFrameDefault(), because this function is over
// the limit of PGO, and that limit cannot be configured.
// Define them as macros to make sure that they are always inlined by the
// preprocessor.
#undef Py_DECREF
#define Py_DECREF(arg) \
do { \
PyObject *op = _PyObject_CAST(arg); \
if (--op->ob_refcnt == 0) { \
destructor dealloc = Py_TYPE(op)->tp_dealloc; \
(*dealloc)(op); \
} \
} while (0)
#undef Py_XDECREF
#define Py_XDECREF(arg) \
do { \
PyObject *xop = _PyObject_CAST(arg); \
if (xop != NULL) { \
Py_DECREF(xop); \
} \
} while (0)
#undef Py_IS_TYPE
#define Py_IS_TYPE(ob, type) \
(_PyObject_CAST_CONST(ob)->ob_type == (type))
#endif
// GH-89279: Similar to above, force inlining by using a macro.
#if defined(_MSC_VER) && SIZEOF_INT == 4
#define _Py_atomic_load_relaxed_int32(ATOMIC_VAL) \
(assert(sizeof((ATOMIC_VAL)->_value) == 4), \
*((volatile int*)&((ATOMIC_VAL)->_value)))
#else
#define _Py_atomic_load_relaxed_int32(ATOMIC_VAL) \
_Py_atomic_load_relaxed(ATOMIC_VAL)
#endif I declared Py_XDECREF just after Py_DECREF. I also renamed "op1" to "xop" in Py_XDECREF. UPDATE: Oh, and to copy/paste Py_IS_TYPE(), I used _PyObject_CAST_CONST(). |
||
|
||
#undef Py_IS_TYPE | ||
#define Py_IS_TYPE(ob, type) (_PyObject_CAST(ob)->ob_type == (type)) | ||
|
||
#undef Py_XDECREF | ||
#define Py_XDECREF(arg) do { PyObject *op1 = _PyObject_CAST(arg); if (op1 != NULL) { Py_DECREF(op1); } } while (0) | ||
|
||
#endif | ||
|
||
#define _Py_atomic_load_32bit_impl(value, order) (assert((order) == _Py_memory_order_relaxed), *(value)) | ||
sweeneyde marked this conversation as resolved.
Show resolved
Hide resolved
|
||
|
||
/* Forward declarations */ | ||
static PyObject *trace_call_function( | ||
PyThreadState *tstate, PyObject *callable, PyObject **stack, | ||
|
@@ -1574,10 +1591,7 @@ typedef struct { | |
PyObject *kwnames; | ||
} CallShape; | ||
|
||
static inline bool | ||
is_method(PyObject **stack_pointer, int args) { | ||
return PEEK(args+2) != NULL; | ||
} | ||
#define is_method(stack_pointer, args) (PEEK((args)+2) != NULL) | ||
gvanrossum marked this conversation as resolved.
Show resolved
Hide resolved
gvanrossum marked this conversation as resolved.
Show resolved
Hide resolved
|
||
|
||
#define KWNAMES_LEN() \ | ||
(call_shape.kwnames == NULL ? 0 : ((int)PyTuple_GET_SIZE(call_shape.kwnames))) | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
You might explain that MSVC can inline them, but it's just an arbitrary limit which cannot be configured.
neonene wrote "PR25244 told me the amount of code in _PyEval_EvalFrameDefault() is over the limit of PGO."
I suggest something like:
Maybe tomorrow, MSVC PGO limits will change or become configurable, and it will become possible to remove these macros on more recent MSVC versions.