Skip to content

Commit

Permalink
pmark dispatch
Browse files Browse the repository at this point in the history
  • Loading branch information
Diogo Netto committed Sep 7, 2022
1 parent ca30667 commit d68f4f3
Show file tree
Hide file tree
Showing 5 changed files with 127 additions and 121 deletions.
142 changes: 123 additions & 19 deletions src/gc.c
Original file line number Diff line number Diff line change
Expand Up @@ -1560,13 +1560,117 @@ static void gc_sweep_perm_alloc(void)

// mark phase

void (*gc_markqueue_push)(jl_gc_markqueue_t *mq, void *v) JL_NOTSAFEPOINT;
void *(*gc_markqueue_pop)(jl_gc_markqueue_t *mq) JL_NOTSAFEPOINT;
void (*gc_chunkqueue_push)(jl_gc_markqueue_t *mq, jl_gc_chunk_t *c) JL_NOTSAFEPOINT;
jl_gc_chunk_t (*gc_chunkqueue_pop)(jl_gc_markqueue_t *mq) JL_NOTSAFEPOINT;
void (*gc_mark_entry_seq)(jl_ptls_t ptls) JL_NOTSAFEPOINT;
void (*gc_mark_exit_seq)(jl_ptls_t ptls) JL_NOTSAFEPOINT;
void (*gc_wake_workers)(jl_ptls_t ptls) JL_NOTSAFEPOINT;
STATIC_INLINE void gc_markqueue_push(jl_gc_markqueue_t *mq, void *v) JL_NOTSAFEPOINT
{
if (jl_options.parallel_marking == 1) {
gc_markqueue_push1(mq, v);
}
else {
gc_markqueue_push2(mq, v);
}
}

STATIC_INLINE void gc_chunkqueue_push(jl_gc_markqueue_t *mq, jl_gc_chunk_t *c) JL_NOTSAFEPOINT
{
if (jl_options.parallel_marking == 1) {
gc_chunkqueue_push1(mq, c);
}
else {
gc_chunkqueue_push2(mq, c);
}
}

STATIC_INLINE void *gc_markqueue_pop(jl_gc_markqueue_t *mq) JL_NOTSAFEPOINT
{
if (jl_options.parallel_marking == 1) {
return gc_markqueue_pop1(mq);
}
else {
return gc_markqueue_pop2(mq);
}
}

STATIC_INLINE jl_gc_chunk_t gc_chunkqueue_pop(jl_gc_markqueue_t *mq) JL_NOTSAFEPOINT
{
if (jl_options.parallel_marking == 1) {
return gc_chunkqueue_pop1(mq);
}
else {
return gc_chunkqueue_pop2(mq);
}
}

STATIC_INLINE void *gc_markqueue_steal_from(jl_gc_markqueue_t *mq) JL_NOTSAFEPOINT
{
#ifndef GC_VERIFY
if (jl_options.parallel_marking == 1) {
return ws_queue_steal_from(&mq->q);
}
#endif
return gc_markqueue_pop1(mq);
}

STATIC_INLINE jl_gc_chunk_t gc_chunkqueue_steal_from(jl_gc_markqueue_t *mq) JL_NOTSAFEPOINT
{
#ifndef GC_VERIFY
if (jl_options.parallel_marking == 1) {
jl_gc_chunk_t c;
c.cid = empty_chunk;
idemp_ws_queue_t *cq = &mq->cq;
ws_anchor_t anc = jl_atomic_load_acquire(&cq->anchor);
ws_array_t *ary = jl_atomic_load_acquire(&cq->array);
if (anc.tail == 0) {
// Empty queue
return c;
}
c = ((jl_gc_chunk_t *)ary->buffer)[anc.tail - 1];
ws_anchor_t anc2 = {anc.tail - 1, anc.tag};
if (!jl_atomic_cmpswap(&cq->anchor, &anc, anc2)) {
// Steal failed
c.cid = empty_chunk;
}
return c;
}
#endif
return gc_chunkqueue_pop1(mq);
}

STATIC_INLINE void gc_mark_entry_seq(jl_ptls_t ptls) JL_NOTSAFEPOINT
{
#ifndef GC_VERIFY
if (jl_options.parallel_marking == 1) {
jl_atomic_fetch_add(&nworkers_marking, 1);
jl_atomic_exchange(&ptls->gc_state, JL_GC_STATE_PARALLEL);
}
#endif
}

STATIC_INLINE void gc_mark_exit_seq(jl_ptls_t ptls) JL_NOTSAFEPOINT
{
#ifndef GC_VERIFY
if (jl_options.parallel_marking == 1) {
jl_atomic_store_release(&ptls->gc_state, JL_GC_STATE_WAITING);
jl_atomic_fetch_add(&nworkers_marking, -1);
}
#endif
}

void gc_wake_workers(jl_ptls_t ptls) JL_NOTSAFEPOINT
{
#ifndef GC_VERIFY
if (jl_options.parallel_marking == 1) {
jl_fence();
if (jl_n_threads > 1) {
jl_wake_libuv();
uv_cond_broadcast(&safepoint_cond);
}
for (int i = 0; i < jl_n_threads; i++) {
if (i != ptls->tid)
uv_cond_signal(&jl_all_tls_states[i]->wake_signal);
}
}
#endif
}

JL_DLLEXPORT void jl_gc_queue_root(const jl_value_t *ptr)
{
Expand Down Expand Up @@ -1690,7 +1794,7 @@ STATIC_INLINE void gc_mark_push_remset(jl_ptls_t ptls, jl_value_t *obj,
}

// Enqueue an unmarked obj. last bit of `nptr` is set if `_obj` is young
static void gc_try_claim_and_push(jl_gc_markqueue_t *mq, void *_obj,
STATIC_INLINE void gc_try_claim_and_push(jl_gc_markqueue_t *mq, void *_obj,
uintptr_t *nptr) JL_NOTSAFEPOINT
{
if (!_obj)
Expand All @@ -1704,7 +1808,7 @@ static void gc_try_claim_and_push(jl_gc_markqueue_t *mq, void *_obj,
}

// Mark object with 8bit field descriptors
static jl_value_t *gc_mark_obj8(jl_ptls_t ptls, char *obj8_parent, uint8_t *obj8_begin,
STATIC_INLINE jl_value_t *gc_mark_obj8(jl_ptls_t ptls, char *obj8_parent, uint8_t *obj8_begin,
uint8_t *obj8_end, uintptr_t nptr) JL_NOTSAFEPOINT
{
(void)jl_assume(obj8_begin < obj8_end);
Expand Down Expand Up @@ -1732,7 +1836,7 @@ static jl_value_t *gc_mark_obj8(jl_ptls_t ptls, char *obj8_parent, uint8_t *obj8
}

// Mark object with 16bit field descriptors
static jl_value_t *gc_mark_obj16(jl_ptls_t ptls, char *obj16_parent, uint16_t *obj16_begin,
STATIC_INLINE jl_value_t *gc_mark_obj16(jl_ptls_t ptls, char *obj16_parent, uint16_t *obj16_begin,
uint16_t *obj16_end, uintptr_t nptr) JL_NOTSAFEPOINT
{
(void)jl_assume(obj16_begin < obj16_end);
Expand Down Expand Up @@ -1760,7 +1864,7 @@ static jl_value_t *gc_mark_obj16(jl_ptls_t ptls, char *obj16_parent, uint16_t *o
}

// Mark object with 32bit field descriptors
static jl_value_t *gc_mark_obj32(jl_ptls_t ptls, char *obj32_parent, uint32_t *obj32_begin,
STATIC_INLINE jl_value_t *gc_mark_obj32(jl_ptls_t ptls, char *obj32_parent, uint32_t *obj32_begin,
uint32_t *obj32_end, uintptr_t nptr) JL_NOTSAFEPOINT
{
(void)jl_assume(obj32_begin < obj32_end);
Expand Down Expand Up @@ -1788,7 +1892,7 @@ static jl_value_t *gc_mark_obj32(jl_ptls_t ptls, char *obj32_parent, uint32_t *o
}

// Mark object array
static void gc_mark_objarray(jl_ptls_t ptls, jl_value_t *obj_parent, jl_value_t **obj_begin,
STATIC_INLINE void gc_mark_objarray(jl_ptls_t ptls, jl_value_t *obj_parent, jl_value_t **obj_begin,
jl_value_t **obj_end, uint32_t step,
uintptr_t nptr) JL_NOTSAFEPOINT
{
Expand Down Expand Up @@ -1816,7 +1920,7 @@ static void gc_mark_objarray(jl_ptls_t ptls, jl_value_t *obj_parent, jl_value_t
}

// Mark array with 8bit field descriptors
static void gc_mark_array8(jl_ptls_t ptls, jl_value_t *ary8_parent, jl_value_t **ary8_begin,
STATIC_INLINE void gc_mark_array8(jl_ptls_t ptls, jl_value_t *ary8_parent, jl_value_t **ary8_begin,
jl_value_t **ary8_end, uint8_t *elem_begin, uint8_t *elem_end,
uintptr_t nptr) JL_NOTSAFEPOINT
{
Expand Down Expand Up @@ -1847,7 +1951,7 @@ static void gc_mark_array8(jl_ptls_t ptls, jl_value_t *ary8_parent, jl_value_t *
}

// Mark array with 16bit field descriptors
static void gc_mark_array16(jl_ptls_t ptls, jl_value_t *ary16_parent,
STATIC_INLINE void gc_mark_array16(jl_ptls_t ptls, jl_value_t *ary16_parent,
jl_value_t **ary16_begin, jl_value_t **ary16_end,
uint16_t *elem_begin, uint16_t *elem_end,
uintptr_t nptr) JL_NOTSAFEPOINT
Expand Down Expand Up @@ -1879,7 +1983,7 @@ static void gc_mark_array16(jl_ptls_t ptls, jl_value_t *ary16_parent,
}

// Mark chunk of large array
void gc_mark_chunk(jl_ptls_t ptls, jl_gc_markqueue_t *mq, jl_gc_chunk_t c) JL_NOTSAFEPOINT
STATIC_INLINE void gc_mark_chunk(jl_ptls_t ptls, jl_gc_markqueue_t *mq, jl_gc_chunk_t c) JL_NOTSAFEPOINT
{
#ifndef GC_VERIFY
switch (c.cid) {
Expand Down Expand Up @@ -1931,7 +2035,7 @@ void gc_mark_chunk(jl_ptls_t ptls, jl_gc_markqueue_t *mq, jl_gc_chunk_t c) JL_NO
}

// Mark gc frame
static void gc_mark_stack(jl_ptls_t ptls, jl_gcframe_t *s, uint32_t nroots,
STATIC_INLINE void gc_mark_stack(jl_ptls_t ptls, jl_gcframe_t *s, uint32_t nroots,
uintptr_t offset, uintptr_t lb, uintptr_t ub) JL_NOTSAFEPOINT
{
jl_gc_markqueue_t *mq = &ptls->mark_queue;
Expand Down Expand Up @@ -1965,7 +2069,7 @@ static void gc_mark_stack(jl_ptls_t ptls, jl_gcframe_t *s, uint32_t nroots,
}

// Mark exception stack
static void gc_mark_excstack(jl_ptls_t ptls, jl_excstack_t *excstack,
STATIC_INLINE void gc_mark_excstack(jl_ptls_t ptls, jl_excstack_t *excstack,
size_t itr) JL_NOTSAFEPOINT
{
jl_gc_markqueue_t *mq = &ptls->mark_queue;
Expand Down Expand Up @@ -1994,7 +2098,7 @@ static void gc_mark_excstack(jl_ptls_t ptls, jl_excstack_t *excstack,
}

// Mark module binding
static void gc_mark_module_binding(jl_ptls_t ptls, jl_module_t *mb_parent,
STATIC_INLINE void gc_mark_module_binding(jl_ptls_t ptls, jl_module_t *mb_parent,
jl_binding_t **mb_begin, jl_binding_t **mb_end,
uintptr_t nptr, uint8_t bits) JL_NOTSAFEPOINT
{
Expand Down Expand Up @@ -2094,7 +2198,7 @@ JL_DLLEXPORT void jl_gc_mark_queue_objarray(jl_ptls_t ptls, jl_value_t *parent,
// Enqueue and mark all outgoing references from `new_obj` which have not been marked
// yet. `meta_updated` is mostly used to make sure we don't update metadata twice for
// objects which have been enqueued into the `remset`
NOINLINE void gc_mark_outrefs(jl_ptls_t ptls, jl_gc_markqueue_t *mq, void *_new_obj,
STATIC_INLINE void gc_mark_outrefs(jl_ptls_t ptls, jl_gc_markqueue_t *mq, void *_new_obj,
int meta_updated) JL_NOTSAFEPOINT
{
jl_value_t *new_obj = (jl_value_t *)_new_obj;
Expand Down
80 changes: 1 addition & 79 deletions src/gc.h
Original file line number Diff line number Diff line change
Expand Up @@ -83,7 +83,7 @@ typedef struct {
uint64_t total_mark_time;
} jl_gc_num_t;

// GC mark-queue
// GC marking

// Push gc work item `v` into `mq`
STATIC_INLINE void gc_markqueue_push1(jl_gc_markqueue_t *mq, void *v) JL_NOTSAFEPOINT
Expand Down Expand Up @@ -124,15 +124,6 @@ STATIC_INLINE void *gc_markqueue_pop2(jl_gc_markqueue_t *mq) JL_NOTSAFEPOINT
return gc_markqueue_pop1(mq);
#endif
}
// Steal gc work item enqueued in `mq`
STATIC_INLINE void *gc_markqueue_steal_from(jl_gc_markqueue_t *mq) JL_NOTSAFEPOINT
{
#ifndef GC_VERIFY
return ws_queue_steal_from(&mq->q);
#else
return gc_markqueue_pop1(mq);
#endif
}
// Push chunk `*c` into `mq`
STATIC_INLINE void gc_chunkqueue_push1(jl_gc_markqueue_t *mq, jl_gc_chunk_t *c) JL_NOTSAFEPOINT
{
Expand Down Expand Up @@ -192,76 +183,7 @@ STATIC_INLINE jl_gc_chunk_t gc_chunkqueue_pop2(jl_gc_markqueue_t *mq) JL_NOTSAFE
return gc_chunkqueue_pop1(mq);
#endif
}
STATIC_INLINE jl_gc_chunk_t gc_chunkqueue_steal_from(jl_gc_markqueue_t *mq) JL_NOTSAFEPOINT
{
#ifndef GC_VERIFY
jl_gc_chunk_t c;
c.cid = empty_chunk;
idemp_ws_queue_t *cq = &mq->cq;
ws_anchor_t anc = jl_atomic_load_acquire(&cq->anchor);
ws_array_t *ary = jl_atomic_load_acquire(&cq->array);
if (anc.tail == 0) {
// Empty queue
return c;
}
c = ((jl_gc_chunk_t *)ary->buffer)[anc.tail - 1];
ws_anchor_t anc2 = {anc.tail - 1, anc.tag};
if (!jl_atomic_cmpswap(&cq->anchor, &anc, anc2)) {
// Steal failed
c.cid = empty_chunk;
}
return c;
#else
gc_chunkqueue_pop1(mq);
#endif
}

extern _Atomic(int32_t) nworkers_marking;
extern uv_cond_t safepoint_cond;
STATIC_INLINE void gc_mark_entry_seq1(jl_ptls_t ptls) JL_NOTSAFEPOINT
{
}
STATIC_INLINE void gc_mark_exit_seq1(jl_ptls_t ptls) JL_NOTSAFEPOINT
{
}
STATIC_INLINE void gc_mark_entry_seq2(jl_ptls_t ptls) JL_NOTSAFEPOINT
{
#ifndef GC_VERIFY
jl_atomic_fetch_add(&nworkers_marking, 1);
jl_atomic_exchange(&ptls->gc_state, JL_GC_STATE_PARALLEL);
#endif
}
STATIC_INLINE void gc_mark_exit_seq2(jl_ptls_t ptls) JL_NOTSAFEPOINT
{
#ifndef GC_VERIFY
jl_atomic_store_release(&ptls->gc_state, JL_GC_STATE_WAITING);
jl_atomic_fetch_add(&nworkers_marking, -1);
#endif
}
// Wake-up workers to partake in parallel marking
STATIC_INLINE void gc_wake_workers1(jl_ptls_t ptls) JL_NOTSAFEPOINT
{
}
STATIC_INLINE void gc_wake_workers2(jl_ptls_t ptls) JL_NOTSAFEPOINT
{
jl_fence();
if (jl_n_threads > 1) {
jl_wake_libuv();
uv_cond_broadcast(&safepoint_cond);
}
for (int i = 0; i < jl_n_threads; i++) {
if (i != ptls->tid)
uv_cond_signal(&jl_all_tls_states[i]->wake_signal);
}
}
// GC mark helper functions
extern void (*gc_markqueue_push)(jl_gc_markqueue_t *mq, void *v) JL_NOTSAFEPOINT;
extern void *(*gc_markqueue_pop)(jl_gc_markqueue_t *mq) JL_NOTSAFEPOINT;
extern void (*gc_chunkqueue_push)(jl_gc_markqueue_t *mq, jl_gc_chunk_t *c) JL_NOTSAFEPOINT;
extern jl_gc_chunk_t (*gc_chunkqueue_pop)(jl_gc_markqueue_t *mq) JL_NOTSAFEPOINT;
extern void (*gc_mark_entry_seq)(jl_ptls_t ptls) JL_NOTSAFEPOINT;
extern void (*gc_mark_exit_seq)(jl_ptls_t ptls) JL_NOTSAFEPOINT;
extern void (*gc_wake_workers)(jl_ptls_t ptls) JL_NOTSAFEPOINT;

// layout for big (>2k) objects

Expand Down
2 changes: 1 addition & 1 deletion src/julia_threads.h
Original file line number Diff line number Diff line change
Expand Up @@ -189,7 +189,7 @@ typedef struct _jl_gc_chunk_t {
uintptr_t nptr;
} jl_gc_chunk_t;

#define MAX_REFS_AT_ONCE (1 << 16)
#define MAX_REFS_AT_ONCE (1 << 28)

typedef struct {
struct _jl_value_t **start;
Expand Down
20 changes: 0 additions & 20 deletions src/threading.c
Original file line number Diff line number Diff line change
Expand Up @@ -525,26 +525,6 @@ void jl_init_threading(void)
jl_n_threads_per_pool[0] = nthreads;
jl_n_threads_per_pool[1] = nthreadsi;

// Determine whether to turn parallel marking on
if (jl_options.parallel_marking == 0) {
gc_markqueue_push = &gc_markqueue_push1;
gc_markqueue_pop = &gc_markqueue_pop1;
gc_chunkqueue_push = &gc_chunkqueue_push1;
gc_chunkqueue_pop = &gc_chunkqueue_pop1;
gc_mark_entry_seq = &gc_mark_entry_seq1;
gc_mark_exit_seq = &gc_mark_exit_seq1;
gc_wake_workers = &gc_wake_workers1;
}
else {
gc_markqueue_push = &gc_markqueue_push2;
gc_markqueue_pop = &gc_markqueue_pop2;
gc_chunkqueue_push = &gc_chunkqueue_push2;
gc_chunkqueue_pop = &gc_chunkqueue_pop2;
gc_mark_entry_seq = &gc_mark_entry_seq2;
gc_mark_exit_seq = &gc_mark_exit_seq2;
gc_wake_workers = &gc_wake_workers2;
}

#ifndef __clang_gcanalyzer__
jl_all_tls_states = (jl_ptls_t*)calloc(jl_n_threads, sizeof(void*));
#endif
Expand Down
4 changes: 2 additions & 2 deletions src/wsqueue.c
Original file line number Diff line number Diff line change
Expand Up @@ -10,8 +10,8 @@ extern "C" {

ws_array_t *create_ws_array(size_t capacity, size_t eltsz)
{
ws_array_t *a = (ws_array_t *)malloc(sizeof(ws_array_t));
a->buffer = (void **)malloc(capacity * eltsz);
ws_array_t *a = (ws_array_t *)malloc_s(sizeof(ws_array_t));
a->buffer = (void **)malloc_s(capacity * eltsz);
a->capacity = capacity;
return a;
}
Expand Down

0 comments on commit d68f4f3

Please sign in to comment.