Skip to content

Commit da7b6e0

Browse files
committed
GC safepoint and GC transition
1 parent 6bf8f9e commit da7b6e0

File tree

10 files changed

+298
-42
lines changed

10 files changed

+298
-42
lines changed

src/gc.c

+197-31
Original file line numberDiff line numberDiff line change
@@ -34,8 +34,49 @@ extern "C" {
3434
#endif
3535

3636
JL_DEFINE_MUTEX(pagealloc)
37+
// Protect all access to `finalizer_list`, `finalizer_list_marked` and
38+
// `to_finalize`.
3739
JL_DEFINE_MUTEX(finalizers)
3840

41+
/**
42+
* Note about GC synchronization:
43+
*
44+
* When entering `jl_gc_collect()`, `jl_gc_running` is atomically changed from
45+
* `0` to `1` to make sure that only one thread can be running the GC. Other
46+
* threads that enters `jl_gc_collect()` at the same time (or later calling
47+
* from unmanaged code) will wait in `jl_gc_collect()` until the GC is finished.
48+
*
49+
* Before starting the mark phase the GC thread calls `jl_gc_signal_begin()`
50+
* to make sure all the thread are in a safe state for the GC. The function
51+
* activates the safepoint and wait for all the threads to get ready for the
52+
* GC (`gc_states != 0`). It also acquires the `finalizers` lock so that no
53+
* other thread will access them when the GC is running.
54+
*
55+
* During the mark and sweep phase of the GC, the threads that are not running
56+
* the GC should either be running unmanaged code (or code section that does
57+
* not have a GC critical region mainly including storing to the stack or
58+
* another object) or paused at a safepoint and wait for the GC to finish.
59+
* If a thread want to switch from running unmanaged code to running managed
60+
* code, it has to perform a GC safepoint check after setting the `gc_state`
61+
* flag (see `jl_gc_state_set_and_save()`. it is possible that the thread might
62+
* have `gc_state == 0` in the middle of the GC transition back before entering
63+
* the safepoint. This is fine since the thread won't be executing any GC
64+
* critical region during that time).
65+
*
66+
* When the GC needs to run the finalizers, it cannot keep the safepoint
67+
* activate since the code in the finalizer might trigger it and falls into
68+
* a dead loop. It also (not required since the lock is recursive) release the
69+
* `finalizers` lock so that other threads can update the finalizers list at
70+
* the same time. Since the safe point is deactivated in this phase and other
71+
* threads might have entered managed state from unmanaged state, when the
72+
* finalizers finish running, the GC thread wait for other threads to enter a
73+
* safe state again before continuing the GC. It is not possible for other
74+
* threads to enter the GC since `jl_gc_running` is still `1` in this phase.
75+
* In the future, it might be better to delay this after the GC is finished so
76+
* that we can wake up other threads and do more useful works as the finalizers
77+
* runs.
78+
*/
79+
3980
// manipulating mark bits
4081

4182
#define GC_CLEAN 0 // freshly allocated
@@ -313,7 +354,93 @@ NOINLINE static uintptr_t gc_get_stack_ptr(void)
313354

314355
#include "gc-debug.c"
315356

316-
int jl_in_gc; // referenced from switchto task.c
357+
// Only one thread can be doing the collection right now. That thread set
358+
// `jl_running_gc` to one on entering the GC and set it back afterward.
359+
static volatile uint64_t jl_gc_running = 0;
360+
361+
#ifdef JULIA_ENABLE_THREADING
362+
DLLEXPORT volatile size_t *jl_gc_signal_page = NULL;
363+
364+
static void jl_wait_for_gc(void)
365+
{
366+
assert(!jl_in_gc && "Safepoint triggered in GC");
367+
// In case assertion is off. Make safepoint in GC a segfault instead
368+
// of a infinite loop.
369+
if (jl_in_gc)
370+
return;
371+
while (jl_gc_running) {
372+
jl_cpu_pause(); // yield?
373+
}
374+
}
375+
376+
void jl_gc_signal_wait(void)
377+
{
378+
int8_t state = jl_get_ptls_states()->gc_state;
379+
jl_get_ptls_states()->gc_state = 1;
380+
jl_wait_for_gc();
381+
jl_get_ptls_states()->gc_state = state;
382+
}
383+
384+
static void jl_gc_wait_for_the_world(void)
385+
{
386+
for (int i = 0;i < jl_n_threads;i++) {
387+
jl_tls_states_t *ptls = jl_all_task_states[i].ptls;
388+
while (!ptls->gc_state) {
389+
jl_cpu_pause(); // yield?
390+
}
391+
}
392+
}
393+
394+
void jl_gc_signal_init(void)
395+
{
396+
// jl_page_size isn't available yet.
397+
#ifdef _OS_WINDOWS_
398+
jl_gc_signal_page = (size_t*)VirtualAlloc(NULL, jl_getpagesize(),
399+
MEM_RESERVE, PAGE_READONLY);
400+
#else
401+
jl_gc_signal_page = (size_t*)mmap(0, jl_getpagesize(), PROT_READ,
402+
MAP_NORESERVE | MAP_PRIVATE |
403+
MAP_ANONYMOUS, -1, 0);
404+
if (jl_gc_signal_page == MAP_FAILED)
405+
jl_gc_signal_page = NULL;
406+
#endif
407+
if (jl_gc_signal_page == NULL) {
408+
jl_printf(JL_STDERR, "could not allocate GC synchronization page\n");
409+
abort();
410+
}
411+
}
412+
413+
static void jl_gc_signal_begin(void)
414+
{
415+
#ifdef _OS_WINDOWS_
416+
DWORD old_prot;
417+
VirtualProtect((void*)jl_gc_signal_page, jl_page_size,
418+
PAGE_NOACCESS, &old_prot);
419+
#else
420+
mprotect((void*)jl_gc_signal_page, jl_page_size, PROT_NONE);
421+
#endif
422+
jl_gc_wait_for_the_world();
423+
JL_LOCK_NOGC(finalizers);
424+
}
425+
426+
static void jl_gc_signal_end(void)
427+
{
428+
JL_UNLOCK(finalizers);
429+
#ifdef _OS_WINDOWS_
430+
DWORD old_prot;
431+
VirtualProtect((void*)jl_gc_signal_page, jl_page_size,
432+
PAGE_READONLY, &old_prot);
433+
#else
434+
mprotect((void*)jl_gc_signal_page, jl_page_size, PROT_READ);
435+
#endif
436+
}
437+
#else
438+
439+
#define jl_gc_signal_begin()
440+
#define jl_gc_signal_end()
441+
442+
#endif
443+
317444
static int jl_gc_finalizers_inhibited; // don't run finalizers during codegen #11956
318445

319446
// malloc wrappers, aligned allocation
@@ -374,12 +501,15 @@ static void jl_gc_push_arraylist(arraylist_t *list)
374501
jl_pgcstack = (jl_gcframe_t*)list->items;
375502
}
376503

377-
// Same assumption as `jl_gc_push_arraylist`
504+
// Same assumption as `jl_gc_push_arraylist`. Requires the finalizers lock
505+
// to be hold for the current thread and will release the lock when the
506+
// function returns.
378507
static void jl_gc_run_finalizers_in_list(arraylist_t *list)
379508
{
380509
size_t len = list->len;
381510
jl_value_t **items = (jl_value_t**)list->items;
382511
jl_gc_push_arraylist(list);
512+
JL_UNLOCK(finalizers);
383513
for (size_t i = 2;i < len;i += 2) {
384514
run_finalizer(items[i], items[i + 1]);
385515
}
@@ -388,8 +518,11 @@ static void jl_gc_run_finalizers_in_list(arraylist_t *list)
388518

389519
static void run_finalizers(void)
390520
{
391-
if (to_finalize.len == 0)
521+
JL_LOCK_NOGC(finalizers);
522+
if (to_finalize.len == 0) {
523+
JL_UNLOCK(finalizers);
392524
return;
525+
}
393526
arraylist_t tmp;
394527
memcpy(&tmp, &to_finalize, sizeof(tmp));
395528
if (to_finalize.items == to_finalize._space) {
@@ -399,6 +532,7 @@ static void run_finalizers(void)
399532
// empty out the first two entries for the GC frame
400533
arraylist_push(&tmp, tmp.items[0]);
401534
arraylist_push(&tmp, tmp.items[1]);
535+
// This releases the finalizers lock.
402536
jl_gc_run_finalizers_in_list(&tmp);
403537
arraylist_free(&tmp);
404538
}
@@ -429,22 +563,24 @@ static void schedule_all_finalizers(arraylist_t* flist)
429563

430564
void jl_gc_run_all_finalizers(void)
431565
{
566+
JL_LOCK_NOGC(finalizers);
432567
schedule_all_finalizers(&finalizer_list);
433568
schedule_all_finalizers(&finalizer_list_marked);
569+
JL_UNLOCK(finalizers);
434570
run_finalizers();
435571
}
436572

437573
DLLEXPORT void jl_gc_add_finalizer(jl_value_t *v, jl_function_t *f)
438574
{
439-
JL_LOCK(finalizers);
575+
JL_LOCK_NOGC(finalizers);
440576
arraylist_push(&finalizer_list, (void*)v);
441577
arraylist_push(&finalizer_list, (void*)f);
442578
JL_UNLOCK(finalizers);
443579
}
444580

445581
void jl_finalize(jl_value_t *o)
446582
{
447-
JL_LOCK(finalizers);
583+
JL_LOCK_NOGC(finalizers);
448584
// Copy the finalizers into a temporary list so that code in the finalizer
449585
// won't change the list as we loop through them.
450586
// This list is also used as the GC frame when we are running the finalizers
@@ -456,10 +592,13 @@ void jl_finalize(jl_value_t *o)
456592
// still holding a reference to the object
457593
finalize_object(&finalizer_list, o, &tmp);
458594
finalize_object(&finalizer_list_marked, o, &tmp);
459-
JL_UNLOCK(finalizers);
460595
if (tmp.len > 2) {
596+
// This releases the finalizers lock.
461597
jl_gc_run_finalizers_in_list(&tmp);
462598
}
599+
else {
600+
JL_UNLOCK(finalizers);
601+
}
463602
arraylist_free(&tmp);
464603
}
465604

@@ -711,7 +850,7 @@ static NOINLINE void *malloc_page(void)
711850
int i;
712851
region_t* region;
713852
int region_i = 0;
714-
JL_LOCK(pagealloc);
853+
JL_LOCK_NOGC(pagealloc);
715854
while(region_i < REGION_COUNT) {
716855
region = regions[region_i];
717856
if (region == NULL) {
@@ -829,6 +968,7 @@ static inline int maybe_collect(void)
829968
jl_gc_collect(0);
830969
return 1;
831970
}
971+
jl_gc_safepoint();
832972
return 0;
833973
}
834974

@@ -1110,6 +1250,9 @@ static inline void *__pool_alloc(pool_t* p, int osize, int end_offset)
11101250
jl_gc_collect(0);
11111251
//allocd_bytes += osize;
11121252
}
1253+
else {
1254+
jl_gc_safepoint();
1255+
}
11131256
gc_num.poolalloc++;
11141257
// first try to use the freelist
11151258
v = p->freelist;
@@ -2059,23 +2202,9 @@ void prepare_sweep(void)
20592202
{
20602203
}
20612204

2062-
void jl_gc_collect(int full)
2205+
// Only one thread should be running in this function
2206+
static void _jl_gc_collect(int full, char *stack_hi)
20632207
{
2064-
if (!is_gc_enabled) return;
2065-
if (jl_in_gc) return;
2066-
char *stack_hi = (char*)gc_get_stack_ptr();
2067-
gc_debug_print();
2068-
JL_SIGATOMIC_BEGIN();
2069-
2070-
#ifdef JULIA_ENABLE_THREADING
2071-
ti_threadgroup_barrier(tgworld, ti_tid);
2072-
if (ti_tid != 0) {
2073-
ti_threadgroup_barrier(tgworld, ti_tid);
2074-
return;
2075-
}
2076-
#endif
2077-
2078-
jl_in_gc = 1;
20792208
uint64_t t0 = jl_hrtime();
20802209
int recollect = 0;
20812210
#if defined(GC_TIME)
@@ -2270,8 +2399,10 @@ void jl_gc_collect(int full)
22702399
#if defined(GC_FINAL_STATS) || defined(GC_TIME)
22712400
finalize_time = jl_hrtime();
22722401
#endif
2273-
if (!jl_gc_finalizers_inhibited) {
2402+
if (!jl_gc_finalizers_inhibited && to_finalize.len) {
2403+
jl_gc_signal_end();
22742404
run_finalizers();
2405+
jl_gc_signal_begin();
22752406
}
22762407
#if defined(GC_FINAL_STATS) || defined(GC_TIME)
22772408
finalize_time = jl_hrtime() - finalize_time;
@@ -2294,13 +2425,7 @@ void jl_gc_collect(int full)
22942425
#ifdef GC_FINAL_STATS
22952426
max_pause = max_pause < pause ? pause : max_pause;
22962427
#endif
2297-
jl_in_gc = 0;
22982428

2299-
#ifdef JULIA_ENABLE_THREADING
2300-
ti_threadgroup_barrier(tgworld, ti_tid);
2301-
#endif
2302-
2303-
JL_SIGATOMIC_END();
23042429
#ifdef GC_TIME
23052430
if (estimate_freed != SAVE2) {
23062431
// this should not happen but it does
@@ -2309,8 +2434,49 @@ void jl_gc_collect(int full)
23092434
#endif
23102435
if (recollect) {
23112436
n_pause--;
2312-
jl_gc_collect(0);
2437+
_jl_gc_collect(0, stack_hi);
2438+
}
2439+
}
2440+
2441+
void jl_gc_collect(int full)
2442+
{
2443+
if (!is_gc_enabled || jl_in_gc)
2444+
return;
2445+
char *stack_hi = (char*)gc_get_stack_ptr();
2446+
gc_debug_print();
2447+
JL_SIGATOMIC_BEGIN();
2448+
2449+
int8_t old_state = jl_get_ptls_states()->gc_state;
2450+
jl_get_ptls_states()->gc_state = 1;
2451+
// In case multiple threads enter the GC at the same time, only allow
2452+
// one of them to actually run the collection. We can't just let the
2453+
// master thread do the GC since it might be running unmanaged code
2454+
// and can take arbitrarily long time before hitting a safe point.
2455+
if (!JL_ATOMIC_COMPARE_AND_SWAP(jl_gc_running, 0, 1)) {
2456+
#ifdef JULIA_ENABLE_THREADING
2457+
JL_SIGATOMIC_END();
2458+
jl_wait_for_gc();
2459+
jl_gc_state_set_and_save(old_state, 1);
2460+
#else
2461+
// For single thread, jl_in_gc is always true when jl_gc_running is
2462+
// true so this should never happen.
2463+
assert(0 && "GC synchronization failure");
2464+
#endif
2465+
return;
23132466
}
2467+
jl_gc_signal_begin();
2468+
2469+
jl_in_gc = 1;
2470+
_jl_gc_collect(full, stack_hi);
2471+
jl_in_gc = 0;
2472+
2473+
// Need to reset the page protection before resetting the flag since
2474+
// the thread will trigger a segfault immediately after returning from the
2475+
// signal handler.
2476+
jl_gc_signal_end();
2477+
jl_gc_running = 0;
2478+
JL_SIGATOMIC_END();
2479+
jl_gc_state_set_and_save(old_state, 1);
23142480
}
23152481

23162482
// allocator entry points

src/gf.c

+2-2
Original file line numberDiff line numberDiff line change
@@ -422,7 +422,7 @@ jl_function_t *jl_method_cache_insert(jl_methtable_t *mt, jl_tupletype_t *type,
422422
int jl_in_inference = 0;
423423
void jl_type_infer(jl_lambda_info_t *li, jl_tupletype_t *argtypes, jl_lambda_info_t *def)
424424
{
425-
JL_LOCK(codegen);
425+
JL_LOCK(codegen); // Might GC
426426
int last_ii = jl_in_inference;
427427
jl_in_inference = 1;
428428
if (jl_typeinf_func != NULL) {
@@ -488,7 +488,7 @@ static jl_function_t *cache_method(jl_methtable_t *mt, jl_tupletype_t *type,
488488
jl_function_t *method, jl_tupletype_t *decl,
489489
jl_svec_t *sparams, int8_t isstaged)
490490
{
491-
JL_LOCK(codegen);
491+
JL_LOCK(codegen); // Might GC
492492
size_t i;
493493
int need_guard_entries = 0;
494494
jl_value_t *temp=NULL;

src/init.c

+1
Original file line numberDiff line numberDiff line change
@@ -466,6 +466,7 @@ void _julia_init(JL_IMAGE_SEARCH rel)
466466
#ifdef JULIA_ENABLE_THREADING
467467
// Make sure we finalize the tls callback before starting any threads.
468468
jl_get_ptls_states_getter();
469+
jl_gc_signal_init();
469470
#endif
470471
libsupport_init();
471472
jl_io_loop = uv_default_loop(); // this loop will internal events (spawning process etc.),

src/jltypes.c

+2-2
Original file line numberDiff line numberDiff line change
@@ -1907,7 +1907,7 @@ static ssize_t lookup_type_idx(jl_typename_t *tn, jl_value_t **key, size_t n, in
19071907
static jl_value_t *lookup_type(jl_typename_t *tn, jl_value_t **key, size_t n)
19081908
{
19091909
int ord = is_typekey_ordered(key, n);
1910-
JL_LOCK(typecache);
1910+
JL_LOCK(typecache); // Might GC
19111911
ssize_t idx = lookup_type_idx(tn, key, n, ord);
19121912
jl_value_t *t = (idx < 0) ? NULL : jl_svecref(ord ? tn->cache : tn->linearcache, idx);
19131913
JL_UNLOCK(typecache);
@@ -1990,7 +1990,7 @@ jl_value_t *jl_cache_type_(jl_datatype_t *type)
19901990
{
19911991
if (is_cacheable(type)) {
19921992
int ord = is_typekey_ordered(jl_svec_data(type->parameters), jl_svec_len(type->parameters));
1993-
JL_LOCK(typecache);
1993+
JL_LOCK(typecache); // Might GC
19941994
ssize_t idx = lookup_type_idx(type->name, jl_svec_data(type->parameters),
19951995
jl_svec_len(type->parameters), ord);
19961996
if (idx >= 0)

0 commit comments

Comments
 (0)