Skip to content

Commit

Permalink
Fix race condition in free-threaded Python (fixes issue #867)
Browse files Browse the repository at this point in the history
This commit addresses an issue arising when multiple threads want to
access the Python object associated with the same C++ instance, which
does not exist yet and therefore must be created. @vfdev-5 reported that
TSAN detects a race condition in code that uses this pattern, caused by
concurrent unprotected reads/writes of internal ``nb_inst`` fields.

To fix this issue, we split instance creation and registration into
a two-step process. The latter is only done when the object is fully
constructed.
  • Loading branch information
wjakob committed Jan 28, 2025
1 parent 534fd8c commit 0035899
Show file tree
Hide file tree
Showing 3 changed files with 32 additions and 11 deletions.
5 changes: 5 additions & 0 deletions docs/free_threaded.rst
Original file line number Diff line number Diff line change
Expand Up @@ -103,6 +103,11 @@ supplemental locking. The :ref:`next section <free-threaded-locks>` explains a
Python-specific locking primitive that can be used in binding code besides
the solutions mentioned above.

Multi-threaded code that concurrently returns the same C++ instance via the
:cpp:enumerator:`nb::rv_policy::reference` policy may observe situations, where
multiple Python objects are created that all wrap the same C++ instance
(however, this is harmless aside from the duplication).

.. _free-threaded-locks:

Python locks
Expand Down
30 changes: 23 additions & 7 deletions src/nb_type.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -99,6 +99,8 @@ PyObject *inst_new_int(PyTypeObject *tp, PyObject * /* args */,
self->clear_keep_alive = 0;
self->intrusive = intrusive;
self->unused = 0;

// Make the object compatible with nb_try_inc_ref (free-threaded builds only)
nb_enable_try_inc_ref((PyObject *) self);

// Update hash table that maps from C++ to Python instance
Expand All @@ -111,7 +113,9 @@ PyObject *inst_new_int(PyTypeObject *tp, PyObject * /* args */,
return (PyObject *) self;
}

/// Allocate memory for a nb_type instance with external storage
/// Allocate memory for a nb_type instance with external storage. In contrast to
/// 'inst_new_int()', this does not yet register the instance in the internal
/// data structures. The function 'inst_register()' must be used to do so.
PyObject *inst_new_ext(PyTypeObject *tp, void *value) {
bool gc = PyType_HasFeature(tp, Py_TPFLAGS_HAVE_GC);

Expand Down Expand Up @@ -165,13 +169,20 @@ PyObject *inst_new_ext(PyTypeObject *tp, void *value) {
self->clear_keep_alive = 0;
self->intrusive = intrusive;
self->unused = 0;

// Make the object compatible with nb_try_inc_ref (free-threaded builds only)
nb_enable_try_inc_ref((PyObject *) self);

return (PyObject *) self;
}

/// Register the object constructed by 'inst_new_ext()' in the internal data structures
static void inst_register(PyObject *inst, void *value) noexcept {
nb_shard &shard = internals->shard(value);
lock_shard guard(shard);

// Update hash table that maps from C++ to Python instance
auto [it, success] = shard.inst_c2p.try_emplace(value, self);
auto [it, success] = shard.inst_c2p.try_emplace(value, inst);

if (NB_UNLIKELY(!success)) {
void *entry = it->second;
Expand All @@ -188,8 +199,9 @@ PyObject *inst_new_ext(PyTypeObject *tp, void *value) {

nb_inst_seq *seq = nb_get_seq(entry);
while (true) {
check((nb_inst *) seq->inst != self,
"nanobind::detail::inst_new_ext(): duplicate instance!");
// The following should never happen
check(inst != seq->inst, "nanobind::detail::inst_new_ext(): duplicate instance!");

if (!seq->next)
break;
seq = seq->next;
Expand All @@ -199,14 +211,13 @@ PyObject *inst_new_ext(PyTypeObject *tp, void *value) {
check(next,
"nanobind::detail::inst_new_ext(): list element allocation failed!");

next->inst = (PyObject *) self;
next->inst = (PyObject *) inst;
next->next = nullptr;
seq->next = next;
}

return (PyObject *) self;
}


static void inst_dealloc(PyObject *self) {
PyTypeObject *tp = Py_TYPE(self);
const type_data *t = nb_type_data(tp);
Expand Down Expand Up @@ -1737,6 +1748,9 @@ static PyObject *nb_type_put_common(void *value, type_data *t, rv_policy rvp,
if (intrusive)
t->set_self_py(new_value, (PyObject *) inst);

if (!create_new)
inst_register((PyObject *) inst, value);

return (PyObject *) inst;
}

Expand Down Expand Up @@ -2082,6 +2096,7 @@ PyObject *nb_inst_reference(PyTypeObject *t, void *ptr, PyObject *parent) {
nbi->state = nb_inst::state_ready;
if (parent)
keep_alive(result, parent);
inst_register(result, ptr);
return result;
}

Expand All @@ -2092,6 +2107,7 @@ PyObject *nb_inst_take_ownership(PyTypeObject *t, void *ptr) {
nb_inst *nbi = (nb_inst *) result;
nbi->destruct = nbi->cpp_delete = true;
nbi->state = nb_inst::state_ready;
inst_register(result, ptr);
return result;
}

Expand Down
8 changes: 4 additions & 4 deletions tests/test_thread.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ def test02_global_lock(n_threads=8):
n = 100000
c = Counter()
def f():
for i in range(n):
for _ in range(n):
t.inc_global(c)

parallelize(f, n_threads=n_threads)
Expand All @@ -53,7 +53,7 @@ def test04_locked_function(n_threads=8):
n = 100000
c = Counter()
def f():
for i in range(n):
for _ in range(n):
t.inc_safe(c)

parallelize(f, n_threads=n_threads)
Expand All @@ -77,11 +77,11 @@ def f():
assert c.value == n * n_threads


def test_06_global_wrapper(n_threads=8):
def test06_global_wrapper(n_threads=8):
# Check wrapper lookup racing with wrapper deallocation
n = 10000
def f():
for i in range(n):
for _ in range(n):
GlobalData.get()
GlobalData.get()
GlobalData.get()
Expand Down

0 comments on commit 0035899

Please sign in to comment.