diff --git a/src/gc-debug.c b/src/gc-debug.c index 5b90ce7cd955b..91e8b7fa9f63d 100644 --- a/src/gc-debug.c +++ b/src/gc-debug.c @@ -132,7 +132,7 @@ static void clear_mark(int bits) } bigval_t *v; for (int i = 0; i < gc_n_threads; i++) { - v = gc_all_tls_states[i]->gc_tls.heap.big_objects; + v = gc_all_tls_states[i]->gc_tls.heap.young_generation_of_bigvals; while (v != NULL) { void *gcv = &v->header; if (!gc_verifying) @@ -142,7 +142,7 @@ static void clear_mark(int bits) } } - v = big_objects_marked; + v = oldest_generation_of_bigvals; while (v != NULL) { void *gcv = &v->header; if (!gc_verifying) @@ -965,7 +965,7 @@ void gc_stats_big_obj(void) size_t nused=0, nbytes=0, nused_old=0, nbytes_old=0; for (int t_i = 0; t_i < gc_n_threads; t_i++) { jl_ptls_t ptls2 = gc_all_tls_states[t_i]; - bigval_t *v = ptls2->gc_tls.heap.big_objects; + bigval_t *v = ptls2->gc_tls.heap.young_generation_of_bigvals; while (v != NULL) { if (gc_marked(v->bits.gc)) { nused++; @@ -973,7 +973,7 @@ void gc_stats_big_obj(void) } v = v->next; } - v = big_objects_marked; + v = oldest_generation_of_bigvals; while (v != NULL) { if (gc_marked(v->bits.gc)) { nused_old++; diff --git a/src/gc-tls.h b/src/gc-tls.h index 3f8662b467e30..dc15cda48fa58 100644 --- a/src/gc-tls.h +++ b/src/gc-tls.h @@ -31,8 +31,8 @@ typedef struct { struct _mallocarray_t *mallocarrays; struct _mallocarray_t *mafreelist; - // variables for tracking big objects - struct _bigval_t *big_objects; + // variable for tracking young (i.e. not in `GC_OLD_MARKED`/last generation) large objects + struct _bigval_t *young_generation_of_bigvals; // lower bound of the number of pointers inside remembered values int remset_nptr; diff --git a/src/gc.c b/src/gc.c index e96fc9637d6f9..c70eee6905ee9 100644 --- a/src/gc.c +++ b/src/gc.c @@ -34,6 +34,8 @@ uv_cond_t gc_threads_cond; uv_sem_t gc_sweep_assists_needed; // Mutex used to coordinate entry of GC threads in the mark loop uv_mutex_t gc_queue_observer_lock; +// Tag for sentinel nodes in bigval list +uintptr_t gc_bigval_sentinel_tag; // Linked list of callback functions @@ -150,7 +152,6 @@ JL_DLLEXPORT void jl_gc_set_cb_notify_gc_pressure(jl_gc_cb_notify_gc_pressure_t // is going to realloc the buffer (of its own list) or accessing the // list of another thread static jl_mutex_t finalizers_lock; -static uv_mutex_t gc_cache_lock; // mutex for gc-heap-snapshot. jl_mutex_t heapsnapshot_lock; @@ -201,8 +202,8 @@ JL_DLLEXPORT uintptr_t jl_get_buff_tag(void) JL_NOTSAFEPOINT return jl_buff_tag; } -// List of marked big objects. Not per-thread. Accessed only by master thread. -bigval_t *big_objects_marked = NULL; +// List of big objects in oldest generation (`GC_OLD_MARKED`). Not per-thread. Accessed only by master thread. +bigval_t *oldest_generation_of_bigvals = NULL; // -- Finalization -- // `ptls->finalizers` and `finalizer_list_marked` might have tagged pointers. @@ -759,60 +760,25 @@ static int64_t t_start = 0; // Time GC starts; static int64_t last_trim_maxrss = 0; #endif -static void gc_sync_cache_nolock(jl_ptls_t ptls, jl_gc_mark_cache_t *gc_cache) JL_NOTSAFEPOINT +static void gc_sync_cache(jl_ptls_t ptls, jl_gc_mark_cache_t *gc_cache) JL_NOTSAFEPOINT { - const int nbig = gc_cache->nbig_obj; - for (int i = 0; i < nbig; i++) { - void *ptr = gc_cache->big_obj[i]; - bigval_t *hdr = (bigval_t*)gc_ptr_clear_tag(ptr, 1); - gc_big_object_unlink(hdr); - if (gc_ptr_tag(ptr, 1)) { - gc_big_object_link(hdr, &ptls->gc_tls.heap.big_objects); - } - else { - // Move hdr from `big_objects` list to `big_objects_marked list` - gc_big_object_link(hdr, &big_objects_marked); - } - } - gc_cache->nbig_obj = 0; perm_scanned_bytes += gc_cache->perm_scanned_bytes; scanned_bytes += gc_cache->scanned_bytes; gc_cache->perm_scanned_bytes = 0; gc_cache->scanned_bytes = 0; } -static void gc_sync_cache(jl_ptls_t ptls) JL_NOTSAFEPOINT -{ - uv_mutex_lock(&gc_cache_lock); - gc_sync_cache_nolock(ptls, &ptls->gc_tls.gc_cache); - uv_mutex_unlock(&gc_cache_lock); -} - // No other threads can be running marking at the same time -static void gc_sync_all_caches_nolock(jl_ptls_t ptls) +static void gc_sync_all_caches(jl_ptls_t ptls) { assert(gc_n_threads); for (int t_i = 0; t_i < gc_n_threads; t_i++) { jl_ptls_t ptls2 = gc_all_tls_states[t_i]; if (ptls2 != NULL) - gc_sync_cache_nolock(ptls, &ptls2->gc_tls.gc_cache); + gc_sync_cache(ptls, &ptls2->gc_tls.gc_cache); } } -STATIC_INLINE void gc_queue_big_marked(jl_ptls_t ptls, bigval_t *hdr, - int toyoung) JL_NOTSAFEPOINT -{ - const int nentry = sizeof(ptls->gc_tls.gc_cache.big_obj) / sizeof(void*); - size_t nobj = ptls->gc_tls.gc_cache.nbig_obj; - if (__unlikely(nobj >= nentry)) { - gc_sync_cache(ptls); - nobj = 0; - } - uintptr_t v = (uintptr_t)hdr; - ptls->gc_tls.gc_cache.big_obj[nobj] = (void*)(toyoung ? (v | 1) : v); - ptls->gc_tls.gc_cache.nbig_obj = nobj + 1; -} - // Atomically set the mark bit for object and return whether it was previously unmarked FORCE_INLINE int gc_try_setmark_tag(jl_taggedvalue_t *o, uint8_t mark_mode) JL_NOTSAFEPOINT { @@ -849,16 +815,14 @@ STATIC_INLINE void gc_setmark_big(jl_ptls_t ptls, jl_taggedvalue_t *o, bigval_t *hdr = bigval_header(o); if (mark_mode == GC_OLD_MARKED) { ptls->gc_tls.gc_cache.perm_scanned_bytes += hdr->sz; - gc_queue_big_marked(ptls, hdr, 0); } else { ptls->gc_tls.gc_cache.scanned_bytes += hdr->sz; - // We can't easily tell if the object is old or being promoted - // from the gc bits but if the `age` is `0` then the object - // must be already on a young list. if (mark_reset_age) { + assert(jl_atomic_load(&gc_n_threads_marking) == 0); // `mark_reset_age` is only used during single-threaded marking // Reset the object as if it was just allocated - gc_queue_big_marked(ptls, hdr, 1); + gc_big_object_unlink(hdr); + gc_big_object_link(ptls->gc_tls.heap.young_generation_of_bigvals, hdr); } } } @@ -1023,7 +987,7 @@ STATIC_INLINE jl_value_t *jl_gc_big_alloc_inner(jl_ptls_t ptls, size_t sz) memset(v, 0xee, allocsz); #endif v->sz = allocsz; - gc_big_object_link(v, &ptls->gc_tls.heap.big_objects); + gc_big_object_link(ptls->gc_tls.heap.young_generation_of_bigvals, v); return jl_valueof(&v->header); } @@ -1043,60 +1007,85 @@ jl_value_t *jl_gc_big_alloc_noinline(jl_ptls_t ptls, size_t sz) { return jl_gc_big_alloc_inner(ptls, sz); } -// Sweep list rooted at *pv, removing and freeing any unmarked objects. -// Return pointer to last `next` field in the culled list. -static bigval_t **sweep_big_list(int sweep_full, bigval_t **pv) JL_NOTSAFEPOINT +FORCE_INLINE void sweep_unlink_and_free(bigval_t *v) JL_NOTSAFEPOINT +{ + gc_big_object_unlink(v); + gc_num.freed += v->sz; +#ifdef MEMDEBUG + memset(v, 0xbb, v->sz); +#endif + gc_invoke_callbacks(jl_gc_cb_notify_external_free_t, gc_cblist_notify_external_free, (v)); + jl_free_aligned(v); +} + +static bigval_t *sweep_list_of_young_bigvals(bigval_t *young) JL_NOTSAFEPOINT { - bigval_t *v = *pv; + bigval_t *last_node = young; + bigval_t *v = young->next; // skip the sentinel + bigval_t *old = oldest_generation_of_bigvals; + int sweep_full = current_sweep_full; // don't load the global in the hot loop while (v != NULL) { bigval_t *nxt = v->next; int bits = v->bits.gc; int old_bits = bits; if (gc_marked(bits)) { - pv = &v->next; if (sweep_full || bits == GC_MARKED) { bits = GC_OLD; + last_node = v; + } + else { // `bits == GC_OLD_MARKED` + assert(bits == GC_OLD_MARKED); + // reached oldest generation, move from young list to old list + gc_big_object_unlink(v); + gc_big_object_link(old, v); } v->bits.gc = bits; } else { - // Remove v from list and free it - *pv = nxt; - if (nxt) - nxt->prev = pv; - gc_num.freed += v->sz; -#ifdef MEMDEBUG - memset(v, 0xbb, v->sz); -#endif - gc_invoke_callbacks(jl_gc_cb_notify_external_free_t, - gc_cblist_notify_external_free, (v)); - jl_free_aligned(v); + sweep_unlink_and_free(v); } gc_time_count_big(old_bits, bits); v = nxt; } - return pv; + return last_node; +} + +static void sweep_list_of_oldest_bigvals(bigval_t *young) JL_NOTSAFEPOINT +{ + bigval_t *v = oldest_generation_of_bigvals->next; // skip the sentinel + while (v != NULL) { + bigval_t *nxt = v->next; + assert(v->bits.gc == GC_OLD_MARKED); + v->bits.gc = GC_OLD; + gc_time_count_big(GC_OLD_MARKED, GC_OLD); + v = nxt; + } } -static void sweep_big(jl_ptls_t ptls, int sweep_full) JL_NOTSAFEPOINT +static void sweep_big(jl_ptls_t ptls) JL_NOTSAFEPOINT { gc_time_big_start(); assert(gc_n_threads); + bigval_t *last_node_in_my_list = NULL; for (int i = 0; i < gc_n_threads; i++) { jl_ptls_t ptls2 = gc_all_tls_states[i]; - if (ptls2 != NULL) - sweep_big_list(sweep_full, &ptls2->gc_tls.heap.big_objects); + if (ptls2 != NULL) { + bigval_t *last_node = sweep_list_of_young_bigvals(ptls2->gc_tls.heap.young_generation_of_bigvals); + if (ptls == ptls2) { + last_node_in_my_list = last_node; + } + } } - if (sweep_full) { - bigval_t **last_next = sweep_big_list(sweep_full, &big_objects_marked); - // Move all survivors from big_objects_marked list to the big_objects list of this thread. - if (ptls->gc_tls.heap.big_objects) - ptls->gc_tls.heap.big_objects->prev = last_next; - *last_next = ptls->gc_tls.heap.big_objects; - ptls->gc_tls.heap.big_objects = big_objects_marked; - if (ptls->gc_tls.heap.big_objects) - ptls->gc_tls.heap.big_objects->prev = &ptls->gc_tls.heap.big_objects; - big_objects_marked = NULL; + if (current_sweep_full) { + sweep_list_of_oldest_bigvals(ptls->gc_tls.heap.young_generation_of_bigvals); + // move all nodes in `oldest_generation_of_bigvals` to my list of bigvals + assert(last_node_in_my_list != NULL); + assert(last_node_in_my_list->next == NULL); + last_node_in_my_list->next = oldest_generation_of_bigvals->next; // skip the sentinel + if (oldest_generation_of_bigvals->next != NULL) { + oldest_generation_of_bigvals->next->prev = last_node_in_my_list; + } + oldest_generation_of_bigvals->next = NULL; } gc_time_big_end(); } @@ -1536,7 +1525,7 @@ STATIC_INLINE void gc_sweep_pool_page(gc_page_profiler_serializer_t *s, jl_gc_pa static void gc_sweep_other(jl_ptls_t ptls, int sweep_full) JL_NOTSAFEPOINT { sweep_malloced_arrays(); - sweep_big(ptls, sweep_full); + sweep_big(ptls); } static void gc_pool_sync_nfree(jl_gc_pagemeta_t *pg, jl_taggedvalue_t *last) JL_NOTSAFEPOINT @@ -3524,7 +3513,7 @@ static int _jl_gc_collect(jl_ptls_t ptls, jl_gc_collection_t collection) // marking is over // Flush everything in mark cache - gc_sync_all_caches_nolock(ptls); + gc_sync_all_caches(ptls); int64_t live_sz_ub = live_bytes + actual_allocd; int64_t live_sz_est = scanned_bytes + perm_scanned_bytes; @@ -3863,7 +3852,9 @@ void jl_init_thread_heap(jl_ptls_t ptls) small_arraylist_new(&heap->free_stacks[i], 0); heap->mallocarrays = NULL; heap->mafreelist = NULL; - heap->big_objects = NULL; + heap->young_generation_of_bigvals = (bigval_t*)calloc_s(sizeof(bigval_t)); // sentinel + assert(gc_bigval_sentinel_tag != 0); // make sure the sentinel is initialized + heap->young_generation_of_bigvals->header = gc_bigval_sentinel_tag; arraylist_new(&heap->remset, 0); arraylist_new(&ptls->finalizers, 0); arraylist_new(&ptls->gc_tls.sweep_objs, 0); @@ -3871,7 +3862,6 @@ void jl_init_thread_heap(jl_ptls_t ptls) jl_gc_mark_cache_t *gc_cache = &ptls->gc_tls.gc_cache; gc_cache->perm_scanned_bytes = 0; gc_cache->scanned_bytes = 0; - gc_cache->nbig_obj = 0; // Initialize GC mark-queue jl_gc_markqueue_t *mq = &ptls->gc_tls.mark_queue; @@ -3897,12 +3887,16 @@ void jl_gc_init(void) JL_MUTEX_INIT(&heapsnapshot_lock, "heapsnapshot_lock"); JL_MUTEX_INIT(&finalizers_lock, "finalizers_lock"); uv_mutex_init(&page_profile_lock); - uv_mutex_init(&gc_cache_lock); uv_mutex_init(&gc_perm_lock); uv_mutex_init(&gc_threads_lock); uv_cond_init(&gc_threads_cond); uv_sem_init(&gc_sweep_assists_needed, 0); uv_mutex_init(&gc_queue_observer_lock); + void *_addr = (void*)calloc_s(1); // dummy allocation to get the sentinel tag + uintptr_t addr = (uintptr_t)_addr; + gc_bigval_sentinel_tag = addr; + oldest_generation_of_bigvals = (bigval_t*)calloc_s(sizeof(bigval_t)); // sentinel + oldest_generation_of_bigvals->header = gc_bigval_sentinel_tag; jl_gc_init_page(); jl_gc_debug_init(); diff --git a/src/gc.h b/src/gc.h index 1a74d334c98f4..2ec249d322d94 100644 --- a/src/gc.h +++ b/src/gc.h @@ -120,9 +120,11 @@ typedef struct _jl_gc_chunk_t { // layout for big (>2k) objects +extern uintptr_t gc_bigval_sentinel_tag; + JL_EXTENSION typedef struct _bigval_t { struct _bigval_t *next; - struct _bigval_t **prev; // pointer to the next field of the prev entry + struct _bigval_t *prev; size_t sz; #ifdef _P64 // Add padding so that the value is 64-byte aligned // (8 pointers of 8 bytes each) - (4 other pointers in struct) @@ -431,7 +433,7 @@ STATIC_INLINE unsigned ffs_u32(uint32_t bitvec) #endif extern jl_gc_num_t gc_num; -extern bigval_t *big_objects_marked; +extern bigval_t *oldest_generation_of_bigvals; extern arraylist_t finalizer_list_marked; extern arraylist_t to_finalize; extern int64_t buffered_pages; @@ -529,21 +531,30 @@ STATIC_INLINE void *gc_ptr_clear_tag(void *v, uintptr_t mask) JL_NOTSAFEPOINT NOINLINE uintptr_t gc_get_stack_ptr(void); -STATIC_INLINE void gc_big_object_unlink(const bigval_t *hdr) JL_NOTSAFEPOINT +FORCE_INLINE void gc_big_object_unlink(const bigval_t *node) JL_NOTSAFEPOINT { - *hdr->prev = hdr->next; - if (hdr->next) { - hdr->next->prev = hdr->prev; + assert(node != oldest_generation_of_bigvals); + assert(node->header != gc_bigval_sentinel_tag); + assert(node->prev != NULL); + if (node->next != NULL) { + node->next->prev = node->prev; } + node->prev->next = node->next; } -STATIC_INLINE void gc_big_object_link(bigval_t *hdr, bigval_t **list) JL_NOTSAFEPOINT +FORCE_INLINE void gc_big_object_link(bigval_t *sentinel_node, bigval_t *node) JL_NOTSAFEPOINT { - hdr->next = *list; - hdr->prev = list; - if (*list) - (*list)->prev = &hdr->next; - *list = hdr; + assert(sentinel_node != NULL); + assert(sentinel_node->header == gc_bigval_sentinel_tag); + assert(sentinel_node->prev == NULL); + assert(node->header != gc_bigval_sentinel_tag); + // a new node gets linked in at the head of the list + node->next = sentinel_node->next; + node->prev = sentinel_node; + if (sentinel_node->next != NULL) { + sentinel_node->next->prev = node; + } + sentinel_node->next = node; } extern uv_mutex_t gc_threads_lock;