Skip to content

Commit 39178b6

Browse files
committed
GC safepoint and GC transition
1 parent 2faa94b commit 39178b6

File tree

10 files changed

+297
-42
lines changed

10 files changed

+297
-42
lines changed

src/gc.c

Lines changed: 197 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -34,8 +34,49 @@ extern "C" {
3434
#endif
3535

3636
JL_DEFINE_MUTEX(pagealloc)
37+
// Protect all access to `finalizer_list`, `finalizer_list_marked` and
38+
// `to_finalize`.
3739
JL_DEFINE_MUTEX(finalizers)
3840

41+
/**
42+
* Note about GC synchronization:
43+
*
44+
* When entering `jl_gc_collect()`, `jl_gc_running` is atomically changed from
45+
* `0` to `1` to make sure that only one thread can be running the GC. Other
46+
* threads that enters `jl_gc_collect()` at the same time (or later calling
47+
* from unmanaged code) will wait in `jl_gc_collect()` until the GC is finished.
48+
*
49+
* Before starting the mark phase the GC thread calls `jl_gc_signal_begin()`
50+
* to make sure all the thread are in a safe state for the GC. The function
51+
* activates the safepoint and wait for all the threads to get ready for the
52+
* GC (`gc_states != 0`). It also acquires the `finalizers` lock so that no
53+
* other thread will access them when the GC is running.
54+
*
55+
* During the mark and sweep phase of the GC, the threads that are not running
56+
* the GC should either be running unmanaged code (or code section that does
57+
* not have a GC critical region mainly including storing to the stack or
58+
* another object) or paused at a safepoint and wait for the GC to finish.
59+
* If a thread want to switch from running unmanaged code to running managed
60+
* code, it has to perform a GC safepoint check after setting the `gc_state`
61+
* flag (see `jl_gc_state_set_and_save()`. it is possible that the thread might
62+
* have `gc_state == 0` in the middle of the GC transition back before entering
63+
* the safepoint. This is fine since the thread won't be executing any GC
64+
* critical region during that time).
65+
*
66+
* When the GC needs to run the finalizers, it cannot keep the safepoint
67+
* activate since the code in the finalizer might trigger it and falls into
68+
* a dead loop. It also (not required since the lock is recursive) release the
69+
* `finalizers` lock so that other threads can update the finalizers list at
70+
* the same time. Since the safe point is deactivated in this phase and other
71+
* threads might have entered managed state from unmanaged state, when the
72+
* finalizers finish running, the GC thread wait for other threads to enter a
73+
* safe state again before continuing the GC. It is not possible for other
74+
* threads to enter the GC since `jl_gc_running` is still `1` in this phase.
75+
* In the future, it might be better to delay this after the GC is finished so
76+
* that we can wake up other threads and do more useful works as the finalizers
77+
* runs.
78+
*/
79+
3980
// manipulating mark bits
4081

4182
#define GC_CLEAN 0 // freshly allocated
@@ -313,7 +354,93 @@ NOINLINE static uintptr_t gc_get_stack_ptr(void)
313354

314355
#include "gc-debug.c"
315356

316-
int jl_in_gc; // referenced from switchto task.c
357+
// Only one thread can be doing the collection right now. That thread set
358+
// `jl_running_gc` to one on entering the GC and set it back afterward.
359+
static volatile uint64_t jl_gc_running = 0;
360+
361+
#ifdef JULIA_ENABLE_THREADING
362+
JL_DLLEXPORT volatile size_t *jl_gc_signal_page = NULL;
363+
364+
static void jl_wait_for_gc(void)
365+
{
366+
assert(!jl_in_gc && "Safepoint triggered in GC");
367+
// In case assertion is off. Make safepoint in GC a segfault instead
368+
// of a infinite loop.
369+
if (jl_in_gc)
370+
return;
371+
while (jl_gc_running) {
372+
jl_cpu_pause(); // yield?
373+
}
374+
}
375+
376+
void jl_gc_signal_wait(void)
377+
{
378+
int8_t state = jl_get_ptls_states()->gc_state;
379+
jl_get_ptls_states()->gc_state = 1;
380+
jl_wait_for_gc();
381+
jl_get_ptls_states()->gc_state = state;
382+
}
383+
384+
static void jl_gc_wait_for_the_world(void)
385+
{
386+
for (int i = 0;i < jl_n_threads;i++) {
387+
jl_tls_states_t *ptls = jl_all_task_states[i].ptls;
388+
while (!ptls->gc_state) {
389+
jl_cpu_pause(); // yield?
390+
}
391+
}
392+
}
393+
394+
void jl_gc_signal_init(void)
395+
{
396+
// jl_page_size isn't available yet.
397+
#ifdef _OS_WINDOWS_
398+
jl_gc_signal_page = (size_t*)VirtualAlloc(NULL, jl_getpagesize(),
399+
MEM_RESERVE, PAGE_READONLY);
400+
#else
401+
jl_gc_signal_page = (size_t*)mmap(0, jl_getpagesize(), PROT_READ,
402+
MAP_NORESERVE | MAP_PRIVATE |
403+
MAP_ANONYMOUS, -1, 0);
404+
if (jl_gc_signal_page == MAP_FAILED)
405+
jl_gc_signal_page = NULL;
406+
#endif
407+
if (jl_gc_signal_page == NULL) {
408+
jl_printf(JL_STDERR, "could not allocate GC synchronization page\n");
409+
abort();
410+
}
411+
}
412+
413+
static void jl_gc_signal_begin(void)
414+
{
415+
#ifdef _OS_WINDOWS_
416+
DWORD old_prot;
417+
VirtualProtect((void*)jl_gc_signal_page, jl_page_size,
418+
PAGE_NOACCESS, &old_prot);
419+
#else
420+
mprotect((void*)jl_gc_signal_page, jl_page_size, PROT_NONE);
421+
#endif
422+
jl_gc_wait_for_the_world();
423+
JL_LOCK_NOGC(finalizers);
424+
}
425+
426+
static void jl_gc_signal_end(void)
427+
{
428+
JL_UNLOCK(finalizers);
429+
#ifdef _OS_WINDOWS_
430+
DWORD old_prot;
431+
VirtualProtect((void*)jl_gc_signal_page, jl_page_size,
432+
PAGE_READONLY, &old_prot);
433+
#else
434+
mprotect((void*)jl_gc_signal_page, jl_page_size, PROT_READ);
435+
#endif
436+
}
437+
#else
438+
439+
#define jl_gc_signal_begin()
440+
#define jl_gc_signal_end()
441+
442+
#endif
443+
317444
static int jl_gc_finalizers_inhibited; // don't run finalizers during codegen #11956
318445

319446
// malloc wrappers, aligned allocation
@@ -375,12 +502,15 @@ static void jl_gc_push_arraylist(arraylist_t *list)
375502
jl_pgcstack = (jl_gcframe_t*)list->items;
376503
}
377504

378-
// Same assumption as `jl_gc_push_arraylist`
505+
// Same assumption as `jl_gc_push_arraylist`. Requires the finalizers lock
506+
// to be hold for the current thread and will release the lock when the
507+
// function returns.
379508
static void jl_gc_run_finalizers_in_list(arraylist_t *list)
380509
{
381510
size_t len = list->len;
382511
jl_value_t **items = (jl_value_t**)list->items;
383512
jl_gc_push_arraylist(list);
513+
JL_UNLOCK(finalizers);
384514
for (size_t i = 2;i < len;i += 2) {
385515
run_finalizer(items[i], items[i + 1]);
386516
}
@@ -389,8 +519,11 @@ static void jl_gc_run_finalizers_in_list(arraylist_t *list)
389519

390520
static void run_finalizers(void)
391521
{
392-
if (to_finalize.len == 0)
522+
JL_LOCK_NOGC(finalizers);
523+
if (to_finalize.len == 0) {
524+
JL_UNLOCK(finalizers);
393525
return;
526+
}
394527
arraylist_t copied_list;
395528
memcpy(&copied_list, &to_finalize, sizeof(copied_list));
396529
if (to_finalize.items == to_finalize._space) {
@@ -400,6 +533,7 @@ static void run_finalizers(void)
400533
// empty out the first two entries for the GC frame
401534
arraylist_push(&copied_list, copied_list.items[0]);
402535
arraylist_push(&copied_list, copied_list.items[1]);
536+
// This releases the finalizers lock.
403537
jl_gc_run_finalizers_in_list(&copied_list);
404538
arraylist_free(&copied_list);
405539
}
@@ -430,22 +564,24 @@ static void schedule_all_finalizers(arraylist_t* flist)
430564

431565
void jl_gc_run_all_finalizers(void)
432566
{
567+
JL_LOCK_NOGC(finalizers);
433568
schedule_all_finalizers(&finalizer_list);
434569
schedule_all_finalizers(&finalizer_list_marked);
570+
JL_UNLOCK(finalizers);
435571
run_finalizers();
436572
}
437573

438574
JL_DLLEXPORT void jl_gc_add_finalizer(jl_value_t *v, jl_function_t *f)
439575
{
440-
JL_LOCK(finalizers);
576+
JL_LOCK_NOGC(finalizers);
441577
arraylist_push(&finalizer_list, (void*)v);
442578
arraylist_push(&finalizer_list, (void*)f);
443579
JL_UNLOCK(finalizers);
444580
}
445581

446582
JL_DLLEXPORT void jl_finalize(jl_value_t *o)
447583
{
448-
JL_LOCK(finalizers);
584+
JL_LOCK_NOGC(finalizers);
449585
// Copy the finalizers into a temporary list so that code in the finalizer
450586
// won't change the list as we loop through them.
451587
// This list is also used as the GC frame when we are running the finalizers
@@ -457,10 +593,13 @@ JL_DLLEXPORT void jl_finalize(jl_value_t *o)
457593
// still holding a reference to the object
458594
finalize_object(&finalizer_list, o, &copied_list);
459595
finalize_object(&finalizer_list_marked, o, &copied_list);
460-
JL_UNLOCK(finalizers);
461596
if (copied_list.len > 2) {
597+
// This releases the finalizers lock.
462598
jl_gc_run_finalizers_in_list(&copied_list);
463599
}
600+
else {
601+
JL_UNLOCK(finalizers);
602+
}
464603
arraylist_free(&copied_list);
465604
}
466605

@@ -712,7 +851,7 @@ static NOINLINE void *malloc_page(void)
712851
int i;
713852
region_t* region;
714853
int region_i = 0;
715-
JL_LOCK(pagealloc);
854+
JL_LOCK_NOGC(pagealloc);
716855
while(region_i < REGION_COUNT) {
717856
region = regions[region_i];
718857
if (region == NULL) {
@@ -830,6 +969,7 @@ static inline int maybe_collect(void)
830969
jl_gc_collect(0);
831970
return 1;
832971
}
972+
jl_gc_safepoint();
833973
return 0;
834974
}
835975

@@ -1111,6 +1251,9 @@ static inline void *__pool_alloc(pool_t* p, int osize, int end_offset)
11111251
jl_gc_collect(0);
11121252
//allocd_bytes += osize;
11131253
}
1254+
else {
1255+
jl_gc_safepoint();
1256+
}
11141257
gc_num.poolalloc++;
11151258
// first try to use the freelist
11161259
v = p->freelist;
@@ -2060,23 +2203,9 @@ void prepare_sweep(void)
20602203
{
20612204
}
20622205

2063-
JL_DLLEXPORT void jl_gc_collect(int full)
2206+
// Only one thread should be running in this function
2207+
static void _jl_gc_collect(int full, char *stack_hi)
20642208
{
2065-
if (!is_gc_enabled) return;
2066-
if (jl_in_gc) return;
2067-
char *stack_hi = (char*)gc_get_stack_ptr();
2068-
gc_debug_print();
2069-
JL_SIGATOMIC_BEGIN();
2070-
2071-
#ifdef JULIA_ENABLE_THREADING
2072-
ti_threadgroup_barrier(tgworld, ti_tid);
2073-
if (ti_tid != 0) {
2074-
ti_threadgroup_barrier(tgworld, ti_tid);
2075-
return;
2076-
}
2077-
#endif
2078-
2079-
jl_in_gc = 1;
20802209
uint64_t t0 = jl_hrtime();
20812210
int recollect = 0;
20822211
#if defined(GC_TIME)
@@ -2271,8 +2400,10 @@ JL_DLLEXPORT void jl_gc_collect(int full)
22712400
#if defined(GC_FINAL_STATS) || defined(GC_TIME)
22722401
finalize_time = jl_hrtime();
22732402
#endif
2274-
if (!jl_gc_finalizers_inhibited) {
2403+
if (!jl_gc_finalizers_inhibited && to_finalize.len) {
2404+
jl_gc_signal_end();
22752405
run_finalizers();
2406+
jl_gc_signal_begin();
22762407
}
22772408
#if defined(GC_FINAL_STATS) || defined(GC_TIME)
22782409
finalize_time = jl_hrtime() - finalize_time;
@@ -2295,13 +2426,7 @@ JL_DLLEXPORT void jl_gc_collect(int full)
22952426
#ifdef GC_FINAL_STATS
22962427
max_pause = max_pause < pause ? pause : max_pause;
22972428
#endif
2298-
jl_in_gc = 0;
22992429

2300-
#ifdef JULIA_ENABLE_THREADING
2301-
ti_threadgroup_barrier(tgworld, ti_tid);
2302-
#endif
2303-
2304-
JL_SIGATOMIC_END();
23052430
#ifdef GC_TIME
23062431
if (estimate_freed != SAVE2) {
23072432
// this should not happen but it does
@@ -2310,8 +2435,49 @@ JL_DLLEXPORT void jl_gc_collect(int full)
23102435
#endif
23112436
if (recollect) {
23122437
n_pause--;
2313-
jl_gc_collect(0);
2438+
_jl_gc_collect(0, stack_hi);
2439+
}
2440+
}
2441+
2442+
JL_DLLEXPORT void jl_gc_collect(int full)
2443+
{
2444+
if (!is_gc_enabled || jl_in_gc)
2445+
return;
2446+
char *stack_hi = (char*)gc_get_stack_ptr();
2447+
gc_debug_print();
2448+
JL_SIGATOMIC_BEGIN();
2449+
2450+
int8_t old_state = jl_get_ptls_states()->gc_state;
2451+
jl_get_ptls_states()->gc_state = 1;
2452+
// In case multiple threads enter the GC at the same time, only allow
2453+
// one of them to actually run the collection. We can't just let the
2454+
// master thread do the GC since it might be running unmanaged code
2455+
// and can take arbitrarily long time before hitting a safe point.
2456+
if (!JL_ATOMIC_COMPARE_AND_SWAP(jl_gc_running, 0, 1)) {
2457+
#ifdef JULIA_ENABLE_THREADING
2458+
JL_SIGATOMIC_END();
2459+
jl_wait_for_gc();
2460+
jl_gc_state_set_and_save(old_state, 1);
2461+
#else
2462+
// For single thread, jl_in_gc is always true when jl_gc_running is
2463+
// true so this should never happen.
2464+
assert(0 && "GC synchronization failure");
2465+
#endif
2466+
return;
23142467
}
2468+
jl_gc_signal_begin();
2469+
2470+
jl_in_gc = 1;
2471+
_jl_gc_collect(full, stack_hi);
2472+
jl_in_gc = 0;
2473+
2474+
// Need to reset the page protection before resetting the flag since
2475+
// the thread will trigger a segfault immediately after returning from the
2476+
// signal handler.
2477+
jl_gc_signal_end();
2478+
jl_gc_running = 0;
2479+
JL_SIGATOMIC_END();
2480+
jl_gc_state_set_and_save(old_state, 1);
23152481
}
23162482

23172483
// allocator entry points

src/gf.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -422,7 +422,7 @@ jl_function_t *jl_method_cache_insert(jl_methtable_t *mt, jl_tupletype_t *type,
422422
int jl_in_inference = 0;
423423
void jl_type_infer(jl_lambda_info_t *li, jl_tupletype_t *argtypes, jl_lambda_info_t *def)
424424
{
425-
JL_LOCK(codegen);
425+
JL_LOCK(codegen); // Might GC
426426
int last_ii = jl_in_inference;
427427
jl_in_inference = 1;
428428
if (jl_typeinf_func != NULL) {
@@ -488,7 +488,7 @@ static jl_function_t *cache_method(jl_methtable_t *mt, jl_tupletype_t *type,
488488
jl_function_t *method, jl_tupletype_t *decl,
489489
jl_svec_t *sparams, int8_t isstaged)
490490
{
491-
JL_LOCK(codegen);
491+
JL_LOCK(codegen); // Might GC
492492
size_t i;
493493
int need_guard_entries = 0;
494494
jl_value_t *temp=NULL;

src/init.c

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -466,6 +466,7 @@ void _julia_init(JL_IMAGE_SEARCH rel)
466466
#ifdef JULIA_ENABLE_THREADING
467467
// Make sure we finalize the tls callback before starting any threads.
468468
jl_get_ptls_states_getter();
469+
jl_gc_signal_init();
469470
#endif
470471
libsupport_init();
471472
jl_io_loop = uv_default_loop(); // this loop will internal events (spawning process etc.),

src/jltypes.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1907,7 +1907,7 @@ static ssize_t lookup_type_idx(jl_typename_t *tn, jl_value_t **key, size_t n, in
19071907
static jl_value_t *lookup_type(jl_typename_t *tn, jl_value_t **key, size_t n)
19081908
{
19091909
int ord = is_typekey_ordered(key, n);
1910-
JL_LOCK(typecache);
1910+
JL_LOCK(typecache); // Might GC
19111911
ssize_t idx = lookup_type_idx(tn, key, n, ord);
19121912
jl_value_t *t = (idx < 0) ? NULL : jl_svecref(ord ? tn->cache : tn->linearcache, idx);
19131913
JL_UNLOCK(typecache);
@@ -1990,7 +1990,7 @@ jl_value_t *jl_cache_type_(jl_datatype_t *type)
19901990
{
19911991
if (is_cacheable(type)) {
19921992
int ord = is_typekey_ordered(jl_svec_data(type->parameters), jl_svec_len(type->parameters));
1993-
JL_LOCK(typecache);
1993+
JL_LOCK(typecache); // Might GC
19941994
ssize_t idx = lookup_type_idx(type->name, jl_svec_data(type->parameters),
19951995
jl_svec_len(type->parameters), ord);
19961996
if (idx >= 0)

0 commit comments

Comments
 (0)