Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Function dispatcher optimization #227

Merged
merged 2 commits into from
Jun 3, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 8 additions & 1 deletion docs/changelog.rst
Original file line number Diff line number Diff line change
Expand Up @@ -15,10 +15,17 @@ case, both modules must use the same nanobind ABI version, or they will be
isolated from each other. Releases that don't explicitly mention an ABI version
below inherit that of the preceding release.

Version 1.4.0 (TBA)
-------------------

* Improved the efficiency of the function dispatch loop. (PR `#227
<https://github.com/wjakob/nanobind/pull/227>`__).
* ABI version 9.

Version 1.3.2 (June 2, 2023)
----------------------------

* Fixed compilation on 32 bit processorss (only i686 tested so far).
* Fixed compilation on 32 bit processors (only ``i686`` tested so far).
(PR `#224 <https://github.com/wjakob/nanobind/pull/224>`__).
* Fixed compilation on PyPy 3.8. (commit `cd8135
<https://github.com/wjakob/nanobind/commit/cd8135baa1da1213252272b5c9ecbf909e947597>`__).
Expand Down
2 changes: 1 addition & 1 deletion docs/lowlevel.rst
Original file line number Diff line number Diff line change
Expand Up @@ -218,7 +218,7 @@ Here is what this might look like in an implementation:
};

// Register a new type MyTensor, and reserve space for sizeof(MyTensorMedadata)
nb::class_<MyTensor> cls(m, "MyTensor", nb::supplement<MyTensorMedadata>(), nb::is_final())
nb::class_<MyTensor> cls(m, "MyTensor", nb::supplement<MyTensorMedadata>())

/// Mutable reference to 'MyTensorMedadata' portion in Python type object
MyTensorMedadata &supplement = nb::type_supplement<MyTensorMedadata>(cls);
Expand Down
10 changes: 2 additions & 8 deletions include/nanobind/nb_class.h
Original file line number Diff line number Diff line change
Expand Up @@ -45,14 +45,11 @@ enum class type_flags : uint32_t {
/// The class uses an intrusive reference counting approach
intrusive_ptr = (1 << 11),

/// Is this a trampoline class meant to be overloaded in Python?
is_trampoline = (1 << 12),

/// Is this a class that inherits from enable_shared_from_this?
/// If so, type_data::keep_shared_from_this_alive is also set.
has_shared_from_this = (1 << 13),
has_shared_from_this = (1 << 12),

// Five more flag bits available (14 through 18) without needing
// Six more flag bits available (13 through 18) without needing
// a larger reorganization
};

Expand Down Expand Up @@ -361,9 +358,6 @@ class class_ : public object {
d.flags |= (uint32_t) detail::type_init_flags::has_base;
}

if constexpr (!std::is_same_v<Alias, T>)
d.flags |= (uint32_t) detail::type_flags::is_trampoline;

if constexpr (detail::is_copy_constructible_v<T>) {
d.flags |= (uint32_t) detail::type_flags::is_copy_constructible;

Expand Down
42 changes: 25 additions & 17 deletions include/nanobind/trampoline.h
Original file line number Diff line number Diff line change
Expand Up @@ -15,49 +15,57 @@
NAMESPACE_BEGIN(NB_NAMESPACE)
NAMESPACE_BEGIN(detail)

struct ticket;

NB_CORE void trampoline_new(void **data, size_t size, void *ptr) noexcept;
NB_CORE void trampoline_release(void **data, size_t size) noexcept;

NB_CORE PyObject *trampoline_lookup(void **data, size_t size, const char *name,
bool pure);
NB_CORE void trampoline_enter(void **data, size_t size, const char *name,
bool pure, ticket *ticket);
NB_CORE void trampoline_leave(ticket *ticket) noexcept;

template <size_t Size> struct trampoline {
mutable void *data[2 * Size + 1];

NB_INLINE trampoline(void *ptr) {
trampoline_new(data, Size, ptr);
}

NB_INLINE trampoline(void *ptr) { trampoline_new(data, Size, ptr); }
NB_INLINE ~trampoline() { trampoline_release(data, Size); }

NB_INLINE handle lookup(const char *name, bool pure) const {
return trampoline_lookup(data, Size, name, pure);
NB_INLINE handle base() const { return (PyObject *) data[0]; }
};

struct ticket {
handle self;
handle key;
ticket *prev{};
PyGILState_STATE state{};

template <size_t Size>
NB_INLINE ticket(const trampoline<Size> &t, const char *name, bool pure) {
trampoline_enter(t.data, Size, name, pure, this);
}

NB_INLINE handle base() const { return (PyObject *) data[0]; }
NB_INLINE ~ticket() noexcept { trampoline_leave(this); }
};


#define NB_TRAMPOLINE(base, size) \
using NBBase = base; \
using NBBase::NBBase; \
nanobind::detail::trampoline<size> nb_trampoline{ this }

#define NB_OVERRIDE_NAME(name, func, ...) \
nanobind::handle nb_key = nb_trampoline.lookup(name, false); \
using nb_ret_type = decltype(NBBase::func(__VA_ARGS__)); \
if (nb_key.is_valid()) { \
nanobind::gil_scoped_acquire nb_guard; \
nanobind::detail::ticket nb_ticket(nb_trampoline, name, false); \
if (nb_ticket.key.is_valid()) { \
return nanobind::cast<nb_ret_type>( \
nb_trampoline.base().attr(nb_key)(__VA_ARGS__)); \
nb_trampoline.base().attr(nb_ticket.key)(__VA_ARGS__)); \
} else \
return NBBase::func(__VA_ARGS__)

#define NB_OVERRIDE_PURE_NAME(name, func, ...) \
nanobind::handle nb_key = nb_trampoline.lookup(name, true); \
using nb_ret_type = decltype(NBBase::func(__VA_ARGS__)); \
nanobind::gil_scoped_acquire nb_guard; \
nanobind::detail::ticket nb_ticket(nb_trampoline, name, true); \
return nanobind::cast<nb_ret_type>( \
nb_trampoline.base().attr(nb_key)(__VA_ARGS__))
nb_trampoline.base().attr(nb_ticket.key)(__VA_ARGS__))

#define NB_OVERRIDE(func, ...) \
NB_OVERRIDE_NAME(#func, func, __VA_ARGS__)
Expand Down
90 changes: 16 additions & 74 deletions src/nb_func.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -221,7 +221,9 @@ PyObject *nb_func_new(const void *in_) noexcept {
PyErr_Clear();
}

is_constructor = strcmp(f->name, "__init__") == 0;
// Is this method a constructor that takes a class binding as first parameter?
is_constructor = is_method && strcmp(f->name, "__init__") == 0 &&
strncmp(f->descr, "({%}", 4) == 0;

// Don't use implicit conversions in copy constructors (causes infinite recursion)
if (is_constructor && f->nargs == 2 && f->descr_types[0] &&
Expand Down Expand Up @@ -460,37 +462,11 @@ static PyObject *nb_func_vectorcall_complex(PyObject *self,

func_data *fr = nb_func_data(self);

const bool is_method = fr->flags & (uint32_t) func_flags::is_method;
bool is_constructor = false;
const bool is_method = fr->flags & (uint32_t) func_flags::is_method,
is_constructor = fr->flags & (uint32_t) func_flags::is_constructor;

uint32_t self_flags = 0;

PyObject *result = nullptr, *self_arg = nullptr;

if (is_method) {
self_arg = nargs_in > 0 ? args_in[0] : nullptr;

if (NB_LIKELY(self_arg)) {
PyTypeObject *self_tp = Py_TYPE(self_arg);

if (NB_LIKELY(nb_type_check((PyObject *) self_tp))) {
self_flags = nb_type_data(self_tp)->flags;
if (self_flags & (uint32_t) type_flags::is_trampoline)
current_method_data = current_method{ fr->name, self_arg };

is_constructor = fr->flags & (uint32_t) func_flags::is_constructor;
if (is_constructor) {
if (((nb_inst *) self_arg)->ready) {
PyErr_SetString(
PyExc_RuntimeError,
"nanobind::detail::nb_func_vectorcall(): the __init__ "
"method should not be called on an initialized object!");
return nullptr;
}
}
}
}
}
PyObject *result = nullptr,
*self_arg = (is_method && nargs_in > 0) ? args_in[0] : nullptr;

/* The following lines allocate memory on the stack, which is very efficient
but also potentially dangerous since it can be used to generate stack
Expand All @@ -503,7 +479,6 @@ static PyObject *nb_func_vectorcall_complex(PyObject *self,
return nullptr;
}


// Handler routine that will be invoked in case of an error condition
PyObject *(*error_handler)(PyObject *, PyObject *const *, size_t,
PyObject *) noexcept = nullptr;
Expand Down Expand Up @@ -688,8 +663,7 @@ static PyObject *nb_func_vectorcall_complex(PyObject *self,
nb_inst *self_arg_nb = (nb_inst *) self_arg;
self_arg_nb->destruct = true;
self_arg_nb->ready = true;

if (NB_UNLIKELY(self_flags & (uint32_t) type_flags::intrusive_ptr))
if (NB_UNLIKELY(self_arg_nb->intrusive))
nb_type_data(Py_TYPE(self_arg))
->set_self_py(inst_ptr(self_arg_nb), self_arg);
}
Expand All @@ -704,12 +678,9 @@ static PyObject *nb_func_vectorcall_complex(PyObject *self,
done:
cleanup.release();

if (error_handler)
if (NB_UNLIKELY(error_handler))
result = error_handler(self, args_in, nargs_in, kwargs_in);

if (self_flags & (uint32_t) type_flags::is_trampoline)
current_method_data = current_method{ nullptr, nullptr };

return result;
}

Expand All @@ -724,36 +695,11 @@ static PyObject *nb_func_vectorcall_simple(PyObject *self,
const size_t count = (size_t) Py_SIZE(self),
nargs_in = (size_t) NB_VECTORCALL_NARGS(nargsf);

const bool is_method = fr->flags & (uint32_t) func_flags::is_method;
bool is_constructor = false;

uint32_t self_flags = 0;

PyObject *result = nullptr, *self_arg = nullptr;

if (is_method) {
self_arg = nargs_in > 0 ? args_in[0] : nullptr;
const bool is_method = fr->flags & (uint32_t) func_flags::is_method,
is_constructor = fr->flags & (uint32_t) func_flags::is_constructor;

if (NB_LIKELY(self_arg)) {
PyTypeObject *self_tp = Py_TYPE(self_arg);
if (NB_LIKELY(nb_type_check((PyObject *) self_tp))) {
self_flags = nb_type_data(self_tp)->flags;
if (NB_UNLIKELY(self_flags & (uint32_t) type_flags::is_trampoline))
current_method_data = current_method{ fr->name, self_arg };

is_constructor = fr->flags & (uint32_t) func_flags::is_constructor;
if (is_constructor) {
if (NB_UNLIKELY(((nb_inst *) self_arg)->ready)) {
PyErr_SetString(PyExc_RuntimeError,
"nanobind::detail::nb_func_vectorcall_simple():"
" the __init__ method should not be called on "
"an initialized object!");
return nullptr;
}
}
}
}
}
PyObject *result = nullptr,
*self_arg = (is_method && nargs_in > 0) ? args_in[0] : nullptr;

/// Small array holding temporaries (implicit conversion/*args/**kwargs)
cleanup_list cleanup(self_arg);
Expand Down Expand Up @@ -817,8 +763,7 @@ static PyObject *nb_func_vectorcall_simple(PyObject *self,
nb_inst *self_arg_nb = (nb_inst *) self_arg;
self_arg_nb->destruct = true;
self_arg_nb->ready = true;

if (NB_UNLIKELY(self_flags & (uint32_t) type_flags::intrusive_ptr))
if (NB_UNLIKELY(self_arg_nb->intrusive))
nb_type_data(Py_TYPE(self_arg))
->set_self_py(inst_ptr(self_arg_nb), self_arg);
}
Expand All @@ -836,9 +781,6 @@ static PyObject *nb_func_vectorcall_simple(PyObject *self,
if (NB_UNLIKELY(error_handler))
result = error_handler(self, args_in, nargs_in, kwargs_in);

if (NB_UNLIKELY(self_flags & (uint32_t) type_flags::is_trampoline))
current_method_data = current_method{ nullptr, nullptr };

return result;
}

Expand Down Expand Up @@ -996,7 +938,7 @@ static void nb_func_render_signature(const func_data *f) noexcept {

case '%':
check(*descr_type,
"nb::detail::nb_func_finalize(): missing type!");
"nb::detail::nb_func_render_signature(): missing type!");

if (!(is_method && arg_index == 0)) {
auto it = internals.type_c2p.find(std::type_index(**descr_type));
Expand All @@ -1023,7 +965,7 @@ static void nb_func_render_signature(const func_data *f) noexcept {
}

check(arg_index == f->nargs && !*descr_type,
"nanobind::detail::nb_func_finalize(%s): arguments inconsistent.",
"nanobind::detail::nb_func_render_signature(%s): arguments inconsistent.",
f->name);
}

Expand Down
5 changes: 1 addition & 4 deletions src/nb_internals.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@

/// Tracks the ABI of nanobind
#ifndef NB_INTERNALS_VERSION
# define NB_INTERNALS_VERSION 8
# define NB_INTERNALS_VERSION 9
#endif

/// On MSVC, debug and release builds are not ABI-compatible!
Expand Down Expand Up @@ -190,9 +190,6 @@ static PyType_Spec nb_bound_method_spec = {
/* .slots = */ nb_bound_method_slots
};

NB_THREAD_LOCAL current_method current_method_data =
current_method{ nullptr, nullptr };

nb_internals *internals_p = nullptr;

void default_exception_translator(const std::exception_ptr &p, void *) {
Expand Down
9 changes: 3 additions & 6 deletions src/nb_internals.h
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,9 @@ struct nb_inst { // usually: 24 bytes

/// Does this instance hold reference to others? (via internals.keep_alive)
bool clear_keep_alive : 1;

/// Does this instance use intrusive reference counting?
bool intrusive : 1;
};

static_assert(sizeof(nb_inst) == sizeof(PyObject) + sizeof(uint32_t) * 2);
Expand Down Expand Up @@ -246,12 +249,6 @@ struct nb_internals {
# define NB_SLOT(internals, type, name) type.name
#endif

struct current_method {
const char *name;
PyObject *self;
};

extern NB_THREAD_LOCAL current_method current_method_data;
extern nb_internals *internals_p;
extern nb_internals *internals_fetch();

Expand Down
16 changes: 10 additions & 6 deletions src/nb_type.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -114,6 +114,8 @@ PyObject *inst_new_impl(PyTypeObject *tp, void *value) {
self->internal = false;
}

self->intrusive = t->flags & (uint32_t) type_flags::intrusive_ptr;

// Update hash table that maps from C++ to Python instance
auto [it, success] =
internals_get().inst_c2p.try_emplace(value, self);
Expand Down Expand Up @@ -982,12 +984,14 @@ bool nb_type_get(const std::type_info *cpp_type, PyObject *src, uint8_t flags,
if (valid) {
nb_inst *inst = (nb_inst *) src;

if (!inst->ready &&
(flags & (uint8_t) cast_flags::construct) == 0) {
PyErr_WarnFormat(PyExc_RuntimeWarning, 1,
"nanobind: attempted to access an "
"uninitialized instance of type '%s'!\n",
t->name);
if (((flags & (uint8_t) cast_flags::construct) != 0) == inst->ready) {
PyErr_WarnFormat(
PyExc_RuntimeWarning, 1, "nanobind: %s of type '%s'!\n",
inst->ready
? "attempted to initialize an already-initialized "
"instance"
: "attempted to access an uninitialized instance",
t->name);
return false;
}

Expand Down
Loading