Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Bring a few upstream GC patches into our fork #169

Merged
merged 5 commits into from
Jul 25, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion src/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -99,7 +99,7 @@ ifeq ($(USE_SYSTEM_LIBUV),0)
UV_HEADERS += uv.h
UV_HEADERS += uv/*.h
endif
PUBLIC_HEADERS := $(BUILDDIR)/julia_version.h $(wildcard $(SRCDIR)/support/*.h) $(addprefix $(SRCDIR)/,work-stealing-queue.h julia.h julia_assert.h julia_threads.h julia_fasttls.h julia_locks.h julia_atomics.h jloptions.h)
PUBLIC_HEADERS := $(BUILDDIR)/julia_version.h $(wildcard $(SRCDIR)/support/*.h) $(addprefix $(SRCDIR)/,work-stealing-queue.h gc-tls.h julia.h julia_assert.h julia_threads.h julia_fasttls.h julia_locks.h julia_atomics.h jloptions.h)
ifeq ($(OS),WINNT)
PUBLIC_HEADERS += $(addprefix $(SRCDIR)/,win32_ucontext.h)
endif
Expand Down
2 changes: 1 addition & 1 deletion src/array.c
Original file line number Diff line number Diff line change
Expand Up @@ -499,7 +499,7 @@ JL_DLLEXPORT jl_value_t *jl_alloc_string(size_t len)
const size_t allocsz = sz + sizeof(jl_taggedvalue_t);
if (sz <= GC_MAX_SZCLASS) {
int pool_id = jl_gc_szclass_align8(allocsz);
jl_gc_pool_t *p = &ptls->heap.norm_pools[pool_id];
jl_gc_pool_t *p = &ptls->gc_tls.heap.norm_pools[pool_id];
int osize = jl_gc_sizeclasses[pool_id];
// We call `jl_gc_pool_alloc_noinline` instead of `jl_gc_pool_alloc` to avoid double-counting in
// the Allocations Profiler. (See https://github.com/JuliaLang/julia/pull/43868 for more details.)
Expand Down
77 changes: 24 additions & 53 deletions src/gc-debug.c
Original file line number Diff line number Diff line change
Expand Up @@ -97,7 +97,7 @@ static arraylist_t bits_save[4];
static void gc_clear_mark_page(jl_gc_pagemeta_t *pg, int bits)
{
jl_ptls_t ptls2 = gc_all_tls_states[pg->thread_n];
jl_gc_pool_t *pool = &ptls2->heap.norm_pools[pg->pool_n];
jl_gc_pool_t *pool = &ptls2->gc_tls.heap.norm_pools[pg->pool_n];
jl_taggedvalue_t *pv = (jl_taggedvalue_t*)(pg->data + GC_PAGE_OFFSET);
char *lim = (char*)pv + GC_PAGE_SZ - GC_PAGE_OFFSET - pool->osize;
while ((char*)pv <= lim) {
Expand All @@ -112,7 +112,7 @@ static void gc_clear_mark_outer(int bits)
{
for (int i = 0; i < gc_n_threads; i++) {
jl_ptls_t ptls2 = gc_all_tls_states[i];
jl_gc_pagemeta_t *pg = jl_atomic_load_relaxed(&ptls2->page_metadata_allocd.bottom);
jl_gc_pagemeta_t *pg = jl_atomic_load_relaxed(&ptls2->gc_tls.page_metadata_allocd.bottom);
while (pg != NULL) {
gc_clear_mark_page(pg, bits);
pg = pg->next;
Expand All @@ -132,7 +132,7 @@ static void clear_mark(int bits)
}
bigval_t *v;
for (int i = 0; i < gc_n_threads; i++) {
v = gc_all_tls_states[i]->heap.big_objects;
v = gc_all_tls_states[i]->gc_tls.heap.young_generation_of_bigvals;
while (v != NULL) {
void *gcv = &v->header;
if (!gc_verifying)
Expand All @@ -142,7 +142,7 @@ static void clear_mark(int bits)
}
}

v = big_objects_marked;
v = oldest_generation_of_bigvals;
while (v != NULL) {
void *gcv = &v->header;
if (!gc_verifying)
Expand Down Expand Up @@ -170,7 +170,7 @@ static void gc_verify_track(jl_ptls_t ptls)
return;
do {
jl_gc_markqueue_t mq;
jl_gc_markqueue_t *mq2 = &ptls->mark_queue;
jl_gc_markqueue_t *mq2 = &ptls->gc_tls.mark_queue;
ws_queue_t *cq = &mq.chunk_queue;
ws_queue_t *q = &mq.ptr_queue;
jl_atomic_store_relaxed(&cq->top, 0);
Expand Down Expand Up @@ -230,7 +230,7 @@ void gc_verify(jl_ptls_t ptls)
return;
}
jl_gc_markqueue_t mq;
jl_gc_markqueue_t *mq2 = &ptls->mark_queue;
jl_gc_markqueue_t *mq2 = &ptls->gc_tls.mark_queue;
ws_queue_t *cq = &mq.chunk_queue;
ws_queue_t *q = &mq.ptr_queue;
jl_atomic_store_relaxed(&cq->top, 0);
Expand Down Expand Up @@ -289,7 +289,7 @@ static void gc_verify_tags_page(jl_gc_pagemeta_t *pg)
int p_n = pg->pool_n;
int t_n = pg->thread_n;
jl_ptls_t ptls2 = gc_all_tls_states[t_n];
jl_gc_pool_t *p = &ptls2->heap.norm_pools[p_n];
jl_gc_pool_t *p = &ptls2->gc_tls.heap.norm_pools[p_n];
int osize = pg->osize;
char *data = pg->data;
char *page_begin = data + GC_PAGE_OFFSET;
Expand Down Expand Up @@ -349,42 +349,13 @@ static void gc_verify_tags_page(jl_gc_pagemeta_t *pg)

static void gc_verify_tags_pagetable0(pagetable0_t *pagetable0)
{
for (int pg_i = 0; pg_i < REGION0_PG_COUNT / 32; pg_i++) {
uint32_t line = pagetable0->allocmap[pg_i];
if (line) {
for (int j = 0; j < 32; j++) {
if ((line >> j) & 1) {
gc_verify_tags_page(pagetable0->meta[pg_i * 32 + j]);
}
}
}
}
}

static void gc_verify_tags_pagetable1(pagetable1_t *pagetable1)
{
for (int pg_i = 0; pg_i < REGION1_PG_COUNT / 32; pg_i++) {
uint32_t line = pagetable1->allocmap0[pg_i];
if (line) {
for (int j = 0; j < 32; j++) {
if ((line >> j) & 1) {
gc_verify_tags_pagetable0(pagetable1->meta0[pg_i * 32 + j]);
}
}
}
}
}

static void gc_verify_tags_pagetable(void)
{
for (int pg_i = 0; pg_i < (REGION2_PG_COUNT + 31) / 32; pg_i++) {
uint32_t line = memory_map.allocmap1[pg_i];
if (line) {
for (int j = 0; j < 32; j++) {
if ((line >> j) & 1) {
gc_verify_tags_pagetable1(memory_map.meta1[pg_i * 32 + j]);
}
}
for (int i = 0; i < gc_n_threads; i++) {
jl_ptls_t ptls2 = gc_all_tls_states[i];
jl_gc_page_stack_t *pgstk = &ptls2->gc_tls.page_metadata_allocd;
jl_gc_pagemeta_t *pg = jl_atomic_load_relaxed(&pgstk->bottom);
while (pg != NULL) {
gc_verify_tags_page(pg);
pg = pg->next;
}
}
}
Expand All @@ -396,7 +367,7 @@ void gc_verify_tags(void)
jl_ptls_t ptls2 = gc_all_tls_states[t_i];
for (int i = 0; i < JL_GC_N_POOLS; i++) {
// for all pools, iterate its freelist
jl_gc_pool_t *p = &ptls2->heap.norm_pools[i];
jl_gc_pool_t *p = &ptls2->gc_tls.heap.norm_pools[i];
jl_taggedvalue_t *next = p->freelist;
jl_taggedvalue_t *last = NULL;
char *allocating = gc_page_data(next);
Expand Down Expand Up @@ -837,8 +808,8 @@ void gc_time_mark_pause(int64_t t0, int64_t scanned_bytes,
int64_t remset_nptr = 0;
for (int t_i = 0; t_i < gc_n_threads; t_i++) {
jl_ptls_t ptls2 = gc_all_tls_states[t_i];
last_remset_len += ptls2->heap.last_remset->len;
remset_nptr = ptls2->heap.remset_nptr;
last_remset_len += ptls2->gc_tls.heap.last_remset->len;
remset_nptr = ptls2->gc_tls.heap.remset_nptr;
}
jl_safe_printf("GC mark pause %.2f ms | "
"scanned %" PRId64 " kB = %" PRId64 " + %" PRId64 " | "
Expand Down Expand Up @@ -969,13 +940,13 @@ void gc_stats_all_pool(void)
for (int i = 0; i < JL_GC_N_POOLS; i++) {
for (int t_i = 0; t_i < gc_n_threads; t_i++) {
jl_ptls_t ptls2 = gc_all_tls_states[t_i];
size_t b = pool_stats(&ptls2->heap.norm_pools[i], &w, &np, &nol);
size_t b = pool_stats(&ptls2->gc_tls.heap.norm_pools[i], &w, &np, &nol);
nb += b;
no += (b / ptls2->heap.norm_pools[i].osize);
no += (b / ptls2->gc_tls.heap.norm_pools[i].osize);
tw += w;
tp += np;
nold += nol;
noldbytes += nol * ptls2->heap.norm_pools[i].osize;
noldbytes += nol * ptls2->gc_tls.heap.norm_pools[i].osize;
}
}
jl_safe_printf("%lld objects (%lld%% old), %lld kB (%lld%% old) total allocated, "
Expand All @@ -994,15 +965,15 @@ void gc_stats_big_obj(void)
size_t nused=0, nbytes=0, nused_old=0, nbytes_old=0;
for (int t_i = 0; t_i < gc_n_threads; t_i++) {
jl_ptls_t ptls2 = gc_all_tls_states[t_i];
bigval_t *v = ptls2->heap.big_objects;
bigval_t *v = ptls2->gc_tls.heap.young_generation_of_bigvals;
while (v != NULL) {
if (gc_marked(v->bits.gc)) {
nused++;
nbytes += v->sz & ~3;
}
v = v->next;
}
v = big_objects_marked;
v = oldest_generation_of_bigvals;
while (v != NULL) {
if (gc_marked(v->bits.gc)) {
nused_old++;
Expand All @@ -1011,7 +982,7 @@ void gc_stats_big_obj(void)
v = v->next;
}

mallocarray_t *ma = ptls2->heap.mallocarrays;
mallocarray_t *ma = ptls2->gc_tls.heap.mallocarrays;
while (ma != NULL) {
if (gc_marked(jl_astaggedvalue(ma->a)->bits.gc)) {
nused++;
Expand Down Expand Up @@ -1057,7 +1028,7 @@ static void gc_count_pool_pagetable(void)
{
for (int i = 0; i < gc_n_threads; i++) {
jl_ptls_t ptls2 = gc_all_tls_states[i];
jl_gc_pagemeta_t *pg = jl_atomic_load_relaxed(&ptls2->page_metadata_allocd.bottom);
jl_gc_pagemeta_t *pg = jl_atomic_load_relaxed(&ptls2->gc_tls.page_metadata_allocd.bottom);
while (pg != NULL) {
if (gc_alloc_map_is_set(pg->data)) {
gc_count_pool_page(pg);
Expand Down
16 changes: 8 additions & 8 deletions src/gc-stacks.c
Original file line number Diff line number Diff line change
Expand Up @@ -119,7 +119,7 @@ static void _jl_free_stack(jl_ptls_t ptls, void *stkbuf, size_t bufsz)
if (bufsz <= pool_sizes[JL_N_STACK_POOLS - 1]) {
unsigned pool_id = select_pool(bufsz);
if (pool_sizes[pool_id] == bufsz) {
small_arraylist_push(&ptls->heap.free_stacks[pool_id], stkbuf);
small_arraylist_push(&ptls->gc_tls.heap.free_stacks[pool_id], stkbuf);
return;
}
}
Expand Down Expand Up @@ -148,7 +148,7 @@ void jl_release_task_stack(jl_ptls_t ptls, jl_task_t *task)
#ifdef _COMPILER_ASAN_ENABLED_
__asan_unpoison_stack_memory((uintptr_t)stkbuf, bufsz);
#endif
small_arraylist_push(&ptls->heap.free_stacks[pool_id], stkbuf);
small_arraylist_push(&ptls->gc_tls.heap.free_stacks[pool_id], stkbuf);
}
}
}
Expand All @@ -163,7 +163,7 @@ JL_DLLEXPORT void *jl_malloc_stack(size_t *bufsz, jl_task_t *owner) JL_NOTSAFEPO
if (ssize <= pool_sizes[JL_N_STACK_POOLS - 1]) {
unsigned pool_id = select_pool(ssize);
ssize = pool_sizes[pool_id];
small_arraylist_t *pool = &ptls->heap.free_stacks[pool_id];
small_arraylist_t *pool = &ptls->gc_tls.heap.free_stacks[pool_id];
if (pool->len > 0) {
stk = small_arraylist_pop(pool);
}
Expand All @@ -184,7 +184,7 @@ JL_DLLEXPORT void *jl_malloc_stack(size_t *bufsz, jl_task_t *owner) JL_NOTSAFEPO
}
*bufsz = ssize;
if (owner) {
small_arraylist_t *live_tasks = &ptls->heap.live_tasks;
small_arraylist_t *live_tasks = &ptls->gc_tls.heap.live_tasks;
mtarraylist_push(live_tasks, owner);
}
return stk;
Expand All @@ -209,7 +209,7 @@ void sweep_stack_pools(void)

// free half of stacks that remain unused since last sweep
for (int p = 0; p < JL_N_STACK_POOLS; p++) {
small_arraylist_t *al = &ptls2->heap.free_stacks[p];
small_arraylist_t *al = &ptls2->gc_tls.heap.free_stacks[p];
size_t n_to_free;
if (al->len > MIN_STACK_MAPPINGS_PER_POOL) {
n_to_free = al->len / 2;
Expand All @@ -225,7 +225,7 @@ void sweep_stack_pools(void)
}
}

small_arraylist_t *live_tasks = &ptls2->heap.live_tasks;
small_arraylist_t *live_tasks = &ptls2->gc_tls.heap.live_tasks;
size_t n = 0;
size_t ndel = 0;
size_t l = live_tasks->len;
Expand Down Expand Up @@ -280,7 +280,7 @@ JL_DLLEXPORT jl_array_t *jl_live_tasks(void)
jl_ptls_t ptls2 = allstates[i];
if (ptls2 == NULL)
continue;
small_arraylist_t *live_tasks = &ptls2->heap.live_tasks;
small_arraylist_t *live_tasks = &ptls2->gc_tls.heap.live_tasks;
size_t n = mtarraylist_length(live_tasks);
l += n + (ptls2->root_task->stkbuf != NULL);
}
Expand All @@ -303,7 +303,7 @@ JL_DLLEXPORT jl_array_t *jl_live_tasks(void)
goto restart;
((void**)jl_array_data(a))[j++] = t;
}
small_arraylist_t *live_tasks = &ptls2->heap.live_tasks;
small_arraylist_t *live_tasks = &ptls2->gc_tls.heap.live_tasks;
size_t n = mtarraylist_length(live_tasks);
for (size_t i = 0; i < n; i++) {
jl_task_t *t = (jl_task_t*)mtarraylist_get(live_tasks, i);
Expand Down
92 changes: 92 additions & 0 deletions src/gc-tls.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,92 @@
// This file is a part of Julia. License is MIT: https://julialang.org/license

// Meant to be included in "julia_threads.h"
#ifndef JL_GC_TLS_H
#define JL_GC_TLS_H

#include "julia_atomics.h"
#include "work-stealing-queue.h"
// GC threading ------------------------------------------------------------------

#include "arraylist.h"

#ifdef __cplusplus
extern "C" {
#endif

typedef struct {
struct _jl_taggedvalue_t *freelist; // root of list of free objects
struct _jl_taggedvalue_t *newpages; // root of list of chunks of free objects
uint16_t osize; // size of objects in this pool
} jl_gc_pool_t;

typedef struct {
// variable for tracking weak references
small_arraylist_t weak_refs;
// live tasks started on this thread
// that are holding onto a stack from the pool
small_arraylist_t live_tasks;

// variables for tracking malloc'd arrays
struct _mallocarray_t *mallocarrays;
struct _mallocarray_t *mafreelist;

// variable for tracking young (i.e. not in `GC_OLD_MARKED`/last generation) large objects
struct _bigval_t *young_generation_of_bigvals;

// lower bound of the number of pointers inside remembered values
int remset_nptr;
// remembered set
arraylist_t remset;

// variables for allocating objects from pools
#define JL_GC_N_MAX_POOLS 51 // conservative. must be kept in sync with `src/julia_internal.h`
jl_gc_pool_t norm_pools[JL_GC_N_MAX_POOLS];

#define JL_N_STACK_POOLS 16
small_arraylist_t free_stacks[JL_N_STACK_POOLS];
} jl_thread_heap_t;

typedef struct {
_Atomic(int64_t) allocd;
_Atomic(int64_t) pool_live_bytes;
_Atomic(int64_t) freed;
_Atomic(uint64_t) malloc;
_Atomic(uint64_t) realloc;
_Atomic(uint64_t) poolalloc;
_Atomic(uint64_t) bigalloc;
_Atomic(uint64_t) freecall;
} jl_thread_gc_num_t;

typedef struct {
ws_queue_t chunk_queue;
ws_queue_t ptr_queue;
arraylist_t reclaim_set;
} jl_gc_markqueue_t;

typedef struct {
// thread local increment of `perm_scanned_bytes`
size_t perm_scanned_bytes;
// thread local increment of `scanned_bytes`
size_t scanned_bytes;
} jl_gc_mark_cache_t;

typedef struct {
_Atomic(struct _jl_gc_pagemeta_t *) bottom;
} jl_gc_page_stack_t;

typedef struct {
jl_thread_heap_t heap;
jl_gc_page_stack_t page_metadata_allocd;
jl_thread_gc_num_t gc_num;
jl_gc_markqueue_t mark_queue;
jl_gc_mark_cache_t gc_cache;
_Atomic(size_t) gc_sweeps_requested;
arraylist_t sweep_objs;
} jl_gc_tls_states_t;

#ifdef __cplusplus
}
#endif

#endif // JL_GC_TLS_H
Loading
Loading