Skip to content

Commit

Permalink
cleanup remset logic a bit (JuliaLang#55021)
Browse files Browse the repository at this point in the history
I think that keeping a single `remset` (instead of two and keep
alternating between them) should be a bit easier to understand and
possibly even a bit faster (since we will be accessing the `remset` only
once), though that should be a very small difference.
  • Loading branch information
d-netto committed Jul 11, 2024
1 parent a19c7b1 commit c8f56c6
Show file tree
Hide file tree
Showing 2 changed files with 39 additions and 54 deletions.
87 changes: 37 additions & 50 deletions src/gc.c
Original file line number Diff line number Diff line change
Expand Up @@ -1895,7 +1895,7 @@ JL_DLLEXPORT void jl_gc_queue_root(const jl_value_t *ptr)
// which is not idempotent. See comments in https://github.com/JuliaLang/julia/issues/50419
uintptr_t header = jl_atomic_fetch_and_relaxed((_Atomic(uintptr_t) *)&o->header, ~GC_OLD);
if (header & GC_OLD) { // write barrier has not been triggered in this object yet
arraylist_push(ptls->heap.remset, (jl_value_t*)ptr);
arraylist_push(&ptls->heap.remset, (jl_value_t*)ptr);
ptls->heap.remset_nptr++; // conservative
}
}
Expand Down Expand Up @@ -2002,7 +2002,7 @@ STATIC_INLINE void gc_mark_push_remset(jl_ptls_t ptls, jl_value_t *obj,
{
if (__unlikely((nptr & 0x3) == 0x3)) {
ptls->heap.remset_nptr += nptr >> 2;
arraylist_t *remset = ptls->heap.remset;
arraylist_t *remset = &ptls->heap.remset;
size_t len = remset->len;
if (__unlikely(len >= remset->max)) {
arraylist_push(remset, obj);
Expand Down Expand Up @@ -3187,23 +3187,6 @@ void gc_mark_clean_reclaim_sets(void)
}
}

static void gc_premark(jl_ptls_t ptls2)
{
arraylist_t *remset = ptls2->heap.remset;
ptls2->heap.remset = ptls2->heap.last_remset;
ptls2->heap.last_remset = remset;
ptls2->heap.remset->len = 0;
ptls2->heap.remset_nptr = 0;
// avoid counting remembered objects
// in `perm_scanned_bytes`
size_t len = remset->len;
void **items = remset->items;
for (size_t i = 0; i < len; i++) {
jl_value_t *item = (jl_value_t *)items[i];
jl_astaggedvalue(item)->bits.gc = GC_OLD_MARKED;
}
}

static void gc_queue_thread_local(jl_gc_markqueue_t *mq, jl_ptls_t ptls2)
{
jl_task_t *task;
Expand Down Expand Up @@ -3247,14 +3230,29 @@ static void gc_queue_bt_buf(jl_gc_markqueue_t *mq, jl_ptls_t ptls2)
}
}

static void gc_queue_remset(jl_ptls_t ptls, jl_ptls_t ptls2)
static void gc_queue_remset(jl_gc_markqueue_t *mq, jl_ptls_t ptls2)
{
size_t len = ptls2->heap.last_remset->len;
void **items = ptls2->heap.last_remset->items;
void **items = ptls2->heap.remset.items;
size_t len = ptls2->heap.remset.len;
for (size_t i = 0; i < len; i++) {
// Tag the pointer to indicate it's in the remset
jl_value_t *v = (jl_value_t *)((uintptr_t)items[i] | GC_REMSET_PTR_TAG);
gc_ptr_queue_push(&ptls->mark_queue, v);
void *_v = items[i];
jl_astaggedvalue(_v)->bits.gc = GC_OLD_MARKED;
jl_value_t *v = (jl_value_t *)((uintptr_t)_v | GC_REMSET_PTR_TAG);
gc_ptr_queue_push(mq, v);
}
// Don't forget to clear the remset
ptls2->heap.remset.len = 0;
ptls2->heap.remset_nptr = 0;
}

static void gc_check_all_remsets_are_empty(void)
{
for (int i = 0; i < gc_n_threads; i++) {
jl_ptls_t ptls2 = gc_all_tls_states[i];
if (ptls2 != NULL) {
assert(ptls2->heap.remset.len == 0);
assert(ptls2->heap.remset_nptr == 0);
}
}
}

Expand Down Expand Up @@ -3456,15 +3454,6 @@ static int _jl_gc_collect(jl_ptls_t ptls, jl_gc_collection_t collection)
JL_PROBE_GC_MARK_BEGIN();
{
JL_TIMING(GC, GC_Mark);

// 1. fix GC bits of objects in the remset.
assert(gc_n_threads);
for (int t_i = 0; t_i < gc_n_threads; t_i++) {
jl_ptls_t ptls2 = gc_all_tls_states[t_i];
if (ptls2 != NULL)
gc_premark(ptls2);
}

assert(gc_n_threads);
int single_threaded_mark = (jl_n_markthreads == 0 || gc_heap_snapshot_enabled);
for (int t_i = 0; t_i < gc_n_threads; t_i++) {
Expand All @@ -3477,17 +3466,18 @@ static int _jl_gc_collect(jl_ptls_t ptls, jl_gc_collection_t collection)
mq_dest = &ptls_dest->mark_queue;
}
if (ptls2 != NULL) {
// 2.1. mark every thread local root
// 1.1. mark every thread local root
gc_queue_thread_local(mq_dest, ptls2);
// 2.2. mark any managed objects in the backtrace buffer
// 1.2. mark any managed objects in the backtrace buffer
// TODO: treat these as roots for gc_heap_snapshot_record
gc_queue_bt_buf(mq_dest, ptls2);
// 2.3. mark every object in the `last_remsets` and `rem_binding`
gc_queue_remset(ptls_dest, ptls2);
// 1.3. mark every object in the remset
gc_queue_remset(mq_dest, ptls2);
}
}
gc_check_all_remsets_are_empty();

// 3. walk roots
// 2. walk roots
gc_mark_roots(mq);
if (gc_cblist_root_scanner) {
gc_invoke_callbacks(jl_gc_cb_root_scanner_t,
Expand All @@ -3497,7 +3487,7 @@ static int _jl_gc_collect(jl_ptls_t ptls, jl_gc_collection_t collection)
gc_mark_loop_barrier();
gc_mark_clean_reclaim_sets();

// 4. check for objects to finalize
// 3. check for objects to finalize
clear_weak_refs();
// Record the length of the marked list since we need to
// mark the object moved to the marked list from the
Expand Down Expand Up @@ -3560,7 +3550,7 @@ static int _jl_gc_collect(jl_ptls_t ptls, jl_gc_collection_t collection)
gc_num.total_allocd += gc_num.allocd;
if (!prev_sweep_full)
promoted_bytes += perm_scanned_bytes - last_perm_scanned_bytes;
// 5. next collection decision
// 4. next collection decision
int not_freed_enough = (collection == JL_GC_AUTO) && estimate_freed < (7*(actual_allocd/10));
int nptr = 0;
assert(gc_n_threads);
Expand Down Expand Up @@ -3619,7 +3609,7 @@ static int _jl_gc_collect(jl_ptls_t ptls, jl_gc_collection_t collection)
last_long_collect_interval = gc_num.interval;
}
scanned_bytes = 0;
// 6. start sweeping
// 5. start sweeping
uint64_t start_sweep_time = jl_hrtime();
JL_PROBE_GC_SWEEP_BEGIN(sweep_full);
{
Expand Down Expand Up @@ -3658,21 +3648,21 @@ static int _jl_gc_collect(jl_ptls_t ptls, jl_gc_collection_t collection)
}

// sweeping is over
// 7. if it is a quick sweep, put back the remembered objects in queued state
// 6. if it is a quick sweep, put back the remembered objects in queued state
// so that we don't trigger the barrier again on them.
assert(gc_n_threads);
for (int t_i = 0; t_i < gc_n_threads; t_i++) {
jl_ptls_t ptls2 = gc_all_tls_states[t_i];
if (ptls2 == NULL)
continue;
if (!sweep_full) {
for (int i = 0; i < ptls2->heap.remset->len; i++) {
void *ptr = ptls2->heap.remset->items[i];
for (int i = 0; i < ptls2->heap.remset.len; i++) {
void *ptr = ptls2->heap.remset.items[i];
jl_astaggedvalue(ptr)->bits.gc = GC_MARKED;
}
}
else {
ptls2->heap.remset->len = 0;
ptls2->heap.remset.len = 0;
}
}

Expand Down Expand Up @@ -3887,10 +3877,7 @@ void jl_init_thread_heap(jl_ptls_t ptls)
heap->mallocarrays = NULL;
heap->mafreelist = NULL;
heap->big_objects = NULL;
heap->remset = &heap->_remset[0];
heap->last_remset = &heap->_remset[1];
arraylist_new(heap->remset, 0);
arraylist_new(heap->last_remset, 0);
arraylist_new(&heap->remset, 0);
arraylist_new(&ptls->finalizers, 0);
arraylist_new(&ptls->sweep_objs, 0);

Expand Down
6 changes: 2 additions & 4 deletions src/julia_threads.h
Original file line number Diff line number Diff line change
Expand Up @@ -153,12 +153,10 @@ typedef struct {
// variables for tracking big objects
struct _bigval_t *big_objects;

// variables for tracking "remembered set"
arraylist_t _remset[2]; // contains jl_value_t*
// lower bound of the number of pointers inside remembered values
int remset_nptr;
arraylist_t *remset;
arraylist_t *last_remset;
// remembered set
arraylist_t remset;

// variables for allocating objects from pools
#define JL_GC_N_MAX_POOLS 51 // conservative. must be kept in sync with `src/julia_internal.h`
Expand Down

0 comments on commit c8f56c6

Please sign in to comment.