Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

test performance of upstream linked list PR #166

Closed
wants to merge 1 commit into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 4 additions & 4 deletions src/gc-debug.c
Original file line number Diff line number Diff line change
Expand Up @@ -132,7 +132,7 @@ static void clear_mark(int bits)
}
bigval_t *v;
for (int i = 0; i < gc_n_threads; i++) {
v = gc_all_tls_states[i]->heap.big_objects;
v = gc_all_tls_states[i]->heap.young_generation_of_bigvals;
while (v != NULL) {
void *gcv = &v->header;
if (!gc_verifying)
Expand All @@ -142,7 +142,7 @@ static void clear_mark(int bits)
}
}

v = big_objects_marked;
v = oldest_generation_of_bigvals;
while (v != NULL) {
void *gcv = &v->header;
if (!gc_verifying)
Expand Down Expand Up @@ -994,15 +994,15 @@ void gc_stats_big_obj(void)
size_t nused=0, nbytes=0, nused_old=0, nbytes_old=0;
for (int t_i = 0; t_i < gc_n_threads; t_i++) {
jl_ptls_t ptls2 = gc_all_tls_states[t_i];
bigval_t *v = ptls2->heap.big_objects;
bigval_t *v = ptls2->heap.young_generation_of_bigvals;
while (v != NULL) {
if (gc_marked(v->bits.gc)) {
nused++;
nbytes += v->sz & ~3;
}
v = v->next;
}
v = big_objects_marked;
v = oldest_generation_of_bigvals;
while (v != NULL) {
if (gc_marked(v->bits.gc)) {
nused_old++;
Expand Down
162 changes: 78 additions & 84 deletions src/gc.c
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,8 @@ uv_cond_t gc_threads_cond;
uv_sem_t gc_sweep_assists_needed;
// Mutex used to coordinate entry of GC threads in the mark loop
uv_mutex_t gc_queue_observer_lock;
// Tag for sentinel nodes in bigval list
uintptr_t gc_bigval_sentinel_tag;

// Linked list of callback functions

Expand Down Expand Up @@ -150,7 +152,6 @@ JL_DLLEXPORT void jl_gc_set_cb_notify_gc_pressure(jl_gc_cb_notify_gc_pressure_t
// is going to realloc the buffer (of its own list) or accessing the
// list of another thread
static jl_mutex_t finalizers_lock;
static uv_mutex_t gc_cache_lock;

// mutex for gc-heap-snapshot.
jl_mutex_t heapsnapshot_lock;
Expand Down Expand Up @@ -201,8 +202,8 @@ JL_DLLEXPORT uintptr_t jl_get_buff_tag(void) JL_NOTSAFEPOINT
return jl_buff_tag;
}

// List of marked big objects. Not per-thread. Accessed only by master thread.
bigval_t *big_objects_marked = NULL;
// List of big objects in oldest generation (`GC_OLD_MARKED`). Not per-thread. Accessed only by master thread.
bigval_t *oldest_generation_of_bigvals = NULL;

// -- Finalization --
// `ptls->finalizers` and `finalizer_list_marked` might have tagged pointers.
Expand Down Expand Up @@ -759,60 +760,25 @@ static int64_t t_start = 0; // Time GC starts;
static int64_t last_trim_maxrss = 0;
#endif

static void gc_sync_cache_nolock(jl_ptls_t ptls, jl_gc_mark_cache_t *gc_cache) JL_NOTSAFEPOINT
static void gc_sync_cache(jl_ptls_t ptls, jl_gc_mark_cache_t *gc_cache) JL_NOTSAFEPOINT
{
const int nbig = gc_cache->nbig_obj;
for (int i = 0; i < nbig; i++) {
void *ptr = gc_cache->big_obj[i];
bigval_t *hdr = (bigval_t*)gc_ptr_clear_tag(ptr, 1);
gc_big_object_unlink(hdr);
if (gc_ptr_tag(ptr, 1)) {
gc_big_object_link(hdr, &ptls->heap.big_objects);
}
else {
// Move hdr from `big_objects` list to `big_objects_marked list`
gc_big_object_link(hdr, &big_objects_marked);
}
}
gc_cache->nbig_obj = 0;
perm_scanned_bytes += gc_cache->perm_scanned_bytes;
scanned_bytes += gc_cache->scanned_bytes;
gc_cache->perm_scanned_bytes = 0;
gc_cache->scanned_bytes = 0;
}

static void gc_sync_cache(jl_ptls_t ptls) JL_NOTSAFEPOINT
{
uv_mutex_lock(&gc_cache_lock);
gc_sync_cache_nolock(ptls, &ptls->gc_cache);
uv_mutex_unlock(&gc_cache_lock);
}

// No other threads can be running marking at the same time
static void gc_sync_all_caches_nolock(jl_ptls_t ptls)
static void gc_sync_all_caches(jl_ptls_t ptls)
{
assert(gc_n_threads);
for (int t_i = 0; t_i < gc_n_threads; t_i++) {
jl_ptls_t ptls2 = gc_all_tls_states[t_i];
if (ptls2 != NULL)
gc_sync_cache_nolock(ptls, &ptls2->gc_cache);
gc_sync_cache(ptls, &ptls2->gc_cache);
}
}

STATIC_INLINE void gc_queue_big_marked(jl_ptls_t ptls, bigval_t *hdr,
int toyoung) JL_NOTSAFEPOINT
{
const int nentry = sizeof(ptls->gc_cache.big_obj) / sizeof(void*);
size_t nobj = ptls->gc_cache.nbig_obj;
if (__unlikely(nobj >= nentry)) {
gc_sync_cache(ptls);
nobj = 0;
}
uintptr_t v = (uintptr_t)hdr;
ptls->gc_cache.big_obj[nobj] = (void*)(toyoung ? (v | 1) : v);
ptls->gc_cache.nbig_obj = nobj + 1;
}

// Atomically set the mark bit for object and return whether it was previously unmarked
FORCE_INLINE int gc_try_setmark_tag(jl_taggedvalue_t *o, uint8_t mark_mode) JL_NOTSAFEPOINT
{
Expand Down Expand Up @@ -849,16 +815,14 @@ STATIC_INLINE void gc_setmark_big(jl_ptls_t ptls, jl_taggedvalue_t *o,
bigval_t *hdr = bigval_header(o);
if (mark_mode == GC_OLD_MARKED) {
ptls->gc_cache.perm_scanned_bytes += hdr->sz;
gc_queue_big_marked(ptls, hdr, 0);
}
else {
ptls->gc_cache.scanned_bytes += hdr->sz;
// We can't easily tell if the object is old or being promoted
// from the gc bits but if the `age` is `0` then the object
// must be already on a young list.
if (mark_reset_age) {
assert(jl_atomic_load(&gc_n_threads_marking) == 0); // `mark_reset_age` is only used during single-threaded marking
// Reset the object as if it was just allocated
gc_queue_big_marked(ptls, hdr, 1);
gc_big_object_unlink(hdr);
gc_big_object_link(ptls->heap.young_generation_of_bigvals, hdr);
}
}
}
Expand Down Expand Up @@ -1023,7 +987,7 @@ STATIC_INLINE jl_value_t *jl_gc_big_alloc_inner(jl_ptls_t ptls, size_t sz)
memset(v, 0xee, allocsz);
#endif
v->sz = allocsz;
gc_big_object_link(v, &ptls->heap.big_objects);
gc_big_object_link(ptls->heap.young_generation_of_bigvals, v);
return jl_valueof(&v->header);
}

Expand All @@ -1049,60 +1013,85 @@ jl_value_t *jl_gc_big_alloc_noinline(jl_ptls_t ptls, size_t sz) {
return jl_gc_big_alloc_inner(ptls, sz);
}

// Sweep list rooted at *pv, removing and freeing any unmarked objects.
// Return pointer to last `next` field in the culled list.
static bigval_t **sweep_big_list(int sweep_full, bigval_t **pv) JL_NOTSAFEPOINT
FORCE_INLINE void sweep_unlink_and_free(bigval_t *v) JL_NOTSAFEPOINT
{
gc_big_object_unlink(v);
gc_num.freed += v->sz;
#ifdef MEMDEBUG
memset(v, 0xbb, v->sz);
#endif
gc_invoke_callbacks(jl_gc_cb_notify_external_free_t, gc_cblist_notify_external_free, (v));
jl_free_aligned(v);
}

static bigval_t *sweep_list_of_young_bigvals(bigval_t *young) JL_NOTSAFEPOINT
{
bigval_t *v = *pv;
bigval_t *last_node = young;
bigval_t *v = young->next; // skip the sentinel
bigval_t *old = oldest_generation_of_bigvals;
int sweep_full = current_sweep_full; // don't load the global in the hot loop
while (v != NULL) {
bigval_t *nxt = v->next;
int bits = v->bits.gc;
int old_bits = bits;
if (gc_marked(bits)) {
pv = &v->next;
if (sweep_full || bits == GC_MARKED) {
bits = GC_OLD;
last_node = v;
}
else { // `bits == GC_OLD_MARKED`
assert(bits == GC_OLD_MARKED);
// reached oldest generation, move from young list to old list
gc_big_object_unlink(v);
gc_big_object_link(old, v);
}
v->bits.gc = bits;
}
else {
// Remove v from list and free it
*pv = nxt;
if (nxt)
nxt->prev = pv;
gc_num.freed += v->sz;
#ifdef MEMDEBUG
memset(v, 0xbb, v->sz);
#endif
gc_invoke_callbacks(jl_gc_cb_notify_external_free_t,
gc_cblist_notify_external_free, (v));
jl_free_aligned(v);
sweep_unlink_and_free(v);
}
gc_time_count_big(old_bits, bits);
v = nxt;
}
return pv;
return last_node;
}

static void sweep_list_of_oldest_bigvals(bigval_t *young) JL_NOTSAFEPOINT
{
bigval_t *v = oldest_generation_of_bigvals->next; // skip the sentinel
while (v != NULL) {
bigval_t *nxt = v->next;
assert(v->bits.gc == GC_OLD_MARKED);
v->bits.gc = GC_OLD;
gc_time_count_big(GC_OLD_MARKED, GC_OLD);
v = nxt;
}
}

static void sweep_big(jl_ptls_t ptls, int sweep_full) JL_NOTSAFEPOINT
static void sweep_big(jl_ptls_t ptls) JL_NOTSAFEPOINT
{
gc_time_big_start();
assert(gc_n_threads);
bigval_t *last_node_in_my_list = NULL;
for (int i = 0; i < gc_n_threads; i++) {
jl_ptls_t ptls2 = gc_all_tls_states[i];
if (ptls2 != NULL)
sweep_big_list(sweep_full, &ptls2->heap.big_objects);
if (ptls2 != NULL) {
bigval_t *last_node = sweep_list_of_young_bigvals(ptls2->heap.young_generation_of_bigvals);
if (ptls == ptls2) {
last_node_in_my_list = last_node;
}
}
}
if (sweep_full) {
bigval_t **last_next = sweep_big_list(sweep_full, &big_objects_marked);
// Move all survivors from big_objects_marked list to the big_objects list of this thread.
if (ptls->heap.big_objects)
ptls->heap.big_objects->prev = last_next;
*last_next = ptls->heap.big_objects;
ptls->heap.big_objects = big_objects_marked;
if (ptls->heap.big_objects)
ptls->heap.big_objects->prev = &ptls->heap.big_objects;
big_objects_marked = NULL;
if (current_sweep_full) {
sweep_list_of_oldest_bigvals(ptls->heap.young_generation_of_bigvals);
// move all nodes in `oldest_generation_of_bigvals` to my list of bigvals
assert(last_node_in_my_list != NULL);
assert(last_node_in_my_list->next == NULL);
last_node_in_my_list->next = oldest_generation_of_bigvals->next; // skip the sentinel
if (oldest_generation_of_bigvals->next != NULL) {
oldest_generation_of_bigvals->next->prev = last_node_in_my_list;
}
oldest_generation_of_bigvals->next = NULL;
}
gc_time_big_end();
}
Expand Down Expand Up @@ -1548,10 +1537,10 @@ STATIC_INLINE void gc_sweep_pool_page(gc_page_profiler_serializer_t *s, jl_gc_pa
}

// sweep over all memory that is being used and not in a pool
static void gc_sweep_other(jl_ptls_t ptls, int sweep_full) JL_NOTSAFEPOINT
static void gc_sweep_other(jl_ptls_t ptls) JL_NOTSAFEPOINT
{
sweep_malloced_arrays();
sweep_big(ptls, sweep_full);
sweep_big(ptls);
}

static void gc_pool_sync_nfree(jl_gc_pagemeta_t *pg, jl_taggedvalue_t *last) JL_NOTSAFEPOINT
Expand Down Expand Up @@ -3537,7 +3526,7 @@ static int _jl_gc_collect(jl_ptls_t ptls, jl_gc_collection_t collection)
// marking is over

// Flush everything in mark cache
gc_sync_all_caches_nolock(ptls);
gc_sync_all_caches(ptls);

int64_t live_sz_ub = live_bytes + actual_allocd;
int64_t live_sz_est = scanned_bytes + perm_scanned_bytes;
Expand Down Expand Up @@ -3625,7 +3614,7 @@ static int _jl_gc_collect(jl_ptls_t ptls, jl_gc_collection_t collection)
sweep_weak_refs();
sweep_stack_pools();
gc_sweep_foreign_objs();
gc_sweep_other(ptls, sweep_full);
gc_sweep_other(ptls);
gc_scrub();
gc_verify_tags();
gc_sweep_pool();
Expand Down Expand Up @@ -3876,15 +3865,16 @@ void jl_init_thread_heap(jl_ptls_t ptls)
small_arraylist_new(&heap->free_stacks[i], 0);
heap->mallocarrays = NULL;
heap->mafreelist = NULL;
heap->big_objects = NULL;
heap->young_generation_of_bigvals = (bigval_t*)calloc_s(sizeof(bigval_t)); // sentinel
assert(gc_bigval_sentinel_tag != 0); // make sure the sentinel is initialized
heap->young_generation_of_bigvals->header = gc_bigval_sentinel_tag;
arraylist_new(&heap->remset, 0);
arraylist_new(&ptls->finalizers, 0);
arraylist_new(&ptls->sweep_objs, 0);

jl_gc_mark_cache_t *gc_cache = &ptls->gc_cache;
gc_cache->perm_scanned_bytes = 0;
gc_cache->scanned_bytes = 0;
gc_cache->nbig_obj = 0;

// Initialize GC mark-queue
jl_gc_markqueue_t *mq = &ptls->mark_queue;
Expand All @@ -3910,12 +3900,16 @@ void jl_gc_init(void)
JL_MUTEX_INIT(&heapsnapshot_lock, "heapsnapshot_lock");
JL_MUTEX_INIT(&finalizers_lock, "finalizers_lock");
uv_mutex_init(&page_profile_lock);
uv_mutex_init(&gc_cache_lock);
uv_mutex_init(&gc_perm_lock);
uv_mutex_init(&gc_threads_lock);
uv_cond_init(&gc_threads_cond);
uv_sem_init(&gc_sweep_assists_needed, 0);
uv_mutex_init(&gc_queue_observer_lock);
void *_addr = (void*)calloc_s(1); // dummy allocation to get the sentinel tag
uintptr_t addr = (uintptr_t)_addr;
gc_bigval_sentinel_tag = addr;
oldest_generation_of_bigvals = (bigval_t*)calloc_s(sizeof(bigval_t)); // sentinel
oldest_generation_of_bigvals->header = gc_bigval_sentinel_tag;

jl_gc_init_page();
jl_gc_debug_init();
Expand Down
Loading
Loading