@@ -34,8 +34,49 @@ extern "C" {
34
34
#endif
35
35
36
36
JL_DEFINE_MUTEX (pagealloc )
37
+ // Protect all access to `finalizer_list`, `finalizer_list_marked` and
38
+ // `to_finalize`.
37
39
JL_DEFINE_MUTEX (finalizers )
38
40
41
+ /**
42
+ * Note about GC synchronization:
43
+ *
44
+ * When entering `jl_gc_collect()`, `jl_gc_running` is atomically changed from
45
+ * `0` to `1` to make sure that only one thread can be running the GC. Other
46
+ * threads that enters `jl_gc_collect()` at the same time (or later calling
47
+ * from unmanaged code) will wait in `jl_gc_collect()` until the GC is finished.
48
+ *
49
+ * Before starting the mark phase the GC thread calls `jl_gc_signal_begin()`
50
+ * to make sure all the thread are in a safe state for the GC. The function
51
+ * activates the safepoint and wait for all the threads to get ready for the
52
+ * GC (`gc_states != 0`). It also acquires the `finalizers` lock so that no
53
+ * other thread will access them when the GC is running.
54
+ *
55
+ * During the mark and sweep phase of the GC, the threads that are not running
56
+ * the GC should either be running unmanaged code (or code section that does
57
+ * not have a GC critical region mainly including storing to the stack or
58
+ * another object) or paused at a safepoint and wait for the GC to finish.
59
+ * If a thread want to switch from running unmanaged code to running managed
60
+ * code, it has to perform a GC safepoint check after setting the `gc_state`
61
+ * flag (see `jl_gc_state_set_and_save()`. it is possible that the thread might
62
+ * have `gc_state == 0` in the middle of the GC transition back before entering
63
+ * the safepoint. This is fine since the thread won't be executing any GC
64
+ * critical region during that time).
65
+ *
66
+ * When the GC needs to run the finalizers, it cannot keep the safepoint
67
+ * activate since the code in the finalizer might trigger it and falls into
68
+ * a dead loop. It also (not required since the lock is recursive) release the
69
+ * `finalizers` lock so that other threads can update the finalizers list at
70
+ * the same time. Since the safe point is deactivated in this phase and other
71
+ * threads might have entered managed state from unmanaged state, when the
72
+ * finalizers finish running, the GC thread wait for other threads to enter a
73
+ * safe state again before continuing the GC. It is not possible for other
74
+ * threads to enter the GC since `jl_gc_running` is still `1` in this phase.
75
+ * In the future, it might be better to delay this after the GC is finished so
76
+ * that we can wake up other threads and do more useful works as the finalizers
77
+ * runs.
78
+ */
79
+
39
80
// manipulating mark bits
40
81
41
82
#define GC_CLEAN 0 // freshly allocated
@@ -313,7 +354,93 @@ NOINLINE static uintptr_t gc_get_stack_ptr(void)
313
354
314
355
#include "gc-debug.c"
315
356
316
- int jl_in_gc ; // referenced from switchto task.c
357
+ // Only one thread can be doing the collection right now. That thread set
358
+ // `jl_running_gc` to one on entering the GC and set it back afterward.
359
+ static volatile uint64_t jl_gc_running = 0 ;
360
+
361
+ #ifdef JULIA_ENABLE_THREADING
362
+ DLLEXPORT volatile size_t * jl_gc_signal_page = NULL ;
363
+
364
+ static void jl_wait_for_gc (void )
365
+ {
366
+ assert (!jl_in_gc && "Safepoint triggered in GC" );
367
+ // In case assertion is off. Make safepoint in GC a segfault instead
368
+ // of a infinite loop.
369
+ if (jl_in_gc )
370
+ return ;
371
+ while (jl_gc_running ) {
372
+ jl_cpu_pause (); // yield?
373
+ }
374
+ }
375
+
376
+ void jl_gc_signal_wait (void )
377
+ {
378
+ int8_t state = jl_get_ptls_states ()-> gc_state ;
379
+ jl_get_ptls_states ()-> gc_state = 1 ;
380
+ jl_wait_for_gc ();
381
+ jl_get_ptls_states ()-> gc_state = state ;
382
+ }
383
+
384
+ static void jl_gc_wait_for_the_world (void )
385
+ {
386
+ for (int i = 0 ;i < jl_n_threads ;i ++ ) {
387
+ jl_tls_states_t * ptls = jl_all_task_states [i ].ptls ;
388
+ while (!ptls -> gc_state ) {
389
+ jl_cpu_pause (); // yield?
390
+ }
391
+ }
392
+ }
393
+
394
+ void jl_gc_signal_init (void )
395
+ {
396
+ // jl_page_size isn't available yet.
397
+ #ifdef _OS_WINDOWS_
398
+ jl_gc_signal_page = (size_t * )VirtualAlloc (NULL , jl_getpagesize (),
399
+ MEM_RESERVE , PAGE_READONLY );
400
+ #else
401
+ jl_gc_signal_page = (size_t * )mmap (0 , jl_getpagesize (), PROT_READ ,
402
+ MAP_NORESERVE | MAP_PRIVATE |
403
+ MAP_ANONYMOUS , -1 , 0 );
404
+ if (jl_gc_signal_page == MAP_FAILED )
405
+ jl_gc_signal_page = NULL ;
406
+ #endif
407
+ if (jl_gc_signal_page == NULL ) {
408
+ jl_printf (JL_STDERR , "could not allocate GC synchronization page\n" );
409
+ abort ();
410
+ }
411
+ }
412
+
413
+ static void jl_gc_signal_begin (void )
414
+ {
415
+ #ifdef _OS_WINDOWS_
416
+ DWORD old_prot ;
417
+ VirtualProtect ((void * )jl_gc_signal_page , jl_page_size ,
418
+ PAGE_NOACCESS , & old_prot );
419
+ #else
420
+ mprotect ((void * )jl_gc_signal_page , jl_page_size , PROT_NONE );
421
+ #endif
422
+ jl_gc_wait_for_the_world ();
423
+ JL_LOCK_NOGC (finalizers );
424
+ }
425
+
426
+ static void jl_gc_signal_end (void )
427
+ {
428
+ JL_UNLOCK (finalizers );
429
+ #ifdef _OS_WINDOWS_
430
+ DWORD old_prot ;
431
+ VirtualProtect ((void * )jl_gc_signal_page , jl_page_size ,
432
+ PAGE_READONLY , & old_prot );
433
+ #else
434
+ mprotect ((void * )jl_gc_signal_page , jl_page_size , PROT_READ );
435
+ #endif
436
+ }
437
+ #else
438
+
439
+ #define jl_gc_signal_begin ()
440
+ #define jl_gc_signal_end ()
441
+
442
+ #endif
443
+
317
444
static int jl_gc_finalizers_inhibited ; // don't run finalizers during codegen #11956
318
445
319
446
// malloc wrappers, aligned allocation
@@ -374,12 +501,15 @@ static void jl_gc_push_arraylist(arraylist_t *list)
374
501
jl_pgcstack = (jl_gcframe_t * )list -> items ;
375
502
}
376
503
377
- // Same assumption as `jl_gc_push_arraylist`
504
+ // Same assumption as `jl_gc_push_arraylist`. Requires the finalizers lock
505
+ // to be hold for the current thread and will release the lock when the
506
+ // function returns.
378
507
static void jl_gc_run_finalizers_in_list (arraylist_t * list )
379
508
{
380
509
size_t len = list -> len ;
381
510
jl_value_t * * items = (jl_value_t * * )list -> items ;
382
511
jl_gc_push_arraylist (list );
512
+ JL_UNLOCK (finalizers );
383
513
for (size_t i = 2 ;i < len ;i += 2 ) {
384
514
run_finalizer (items [i ], items [i + 1 ]);
385
515
}
@@ -388,8 +518,11 @@ static void jl_gc_run_finalizers_in_list(arraylist_t *list)
388
518
389
519
static void run_finalizers (void )
390
520
{
391
- if (to_finalize .len == 0 )
521
+ JL_LOCK_NOGC (finalizers );
522
+ if (to_finalize .len == 0 ) {
523
+ JL_UNLOCK (finalizers );
392
524
return ;
525
+ }
393
526
arraylist_t tmp ;
394
527
memcpy (& tmp , & to_finalize , sizeof (tmp ));
395
528
if (to_finalize .items == to_finalize ._space ) {
@@ -399,6 +532,7 @@ static void run_finalizers(void)
399
532
// empty out the first two entries for the GC frame
400
533
arraylist_push (& tmp , tmp .items [0 ]);
401
534
arraylist_push (& tmp , tmp .items [1 ]);
535
+ // This releases the finalizers lock.
402
536
jl_gc_run_finalizers_in_list (& tmp );
403
537
arraylist_free (& tmp );
404
538
}
@@ -429,22 +563,24 @@ static void schedule_all_finalizers(arraylist_t* flist)
429
563
430
564
void jl_gc_run_all_finalizers (void )
431
565
{
566
+ JL_LOCK_NOGC (finalizers );
432
567
schedule_all_finalizers (& finalizer_list );
433
568
schedule_all_finalizers (& finalizer_list_marked );
569
+ JL_UNLOCK (finalizers );
434
570
run_finalizers ();
435
571
}
436
572
437
573
DLLEXPORT void jl_gc_add_finalizer (jl_value_t * v , jl_function_t * f )
438
574
{
439
- JL_LOCK (finalizers );
575
+ JL_LOCK_NOGC (finalizers );
440
576
arraylist_push (& finalizer_list , (void * )v );
441
577
arraylist_push (& finalizer_list , (void * )f );
442
578
JL_UNLOCK (finalizers );
443
579
}
444
580
445
581
void jl_finalize (jl_value_t * o )
446
582
{
447
- JL_LOCK (finalizers );
583
+ JL_LOCK_NOGC (finalizers );
448
584
// Copy the finalizers into a temporary list so that code in the finalizer
449
585
// won't change the list as we loop through them.
450
586
// This list is also used as the GC frame when we are running the finalizers
@@ -456,10 +592,13 @@ void jl_finalize(jl_value_t *o)
456
592
// still holding a reference to the object
457
593
finalize_object (& finalizer_list , o , & tmp );
458
594
finalize_object (& finalizer_list_marked , o , & tmp );
459
- JL_UNLOCK (finalizers );
460
595
if (tmp .len > 2 ) {
596
+ // This releases the finalizers lock.
461
597
jl_gc_run_finalizers_in_list (& tmp );
462
598
}
599
+ else {
600
+ JL_UNLOCK (finalizers );
601
+ }
463
602
arraylist_free (& tmp );
464
603
}
465
604
@@ -711,7 +850,7 @@ static NOINLINE void *malloc_page(void)
711
850
int i ;
712
851
region_t * region ;
713
852
int region_i = 0 ;
714
- JL_LOCK (pagealloc );
853
+ JL_LOCK_NOGC (pagealloc );
715
854
while (region_i < REGION_COUNT ) {
716
855
region = regions [region_i ];
717
856
if (region == NULL ) {
@@ -829,6 +968,7 @@ static inline int maybe_collect(void)
829
968
jl_gc_collect (0 );
830
969
return 1 ;
831
970
}
971
+ jl_gc_safepoint ();
832
972
return 0 ;
833
973
}
834
974
@@ -1110,6 +1250,9 @@ static inline void *__pool_alloc(pool_t* p, int osize, int end_offset)
1110
1250
jl_gc_collect (0 );
1111
1251
//allocd_bytes += osize;
1112
1252
}
1253
+ else {
1254
+ jl_gc_safepoint ();
1255
+ }
1113
1256
gc_num .poolalloc ++ ;
1114
1257
// first try to use the freelist
1115
1258
v = p -> freelist ;
@@ -2059,23 +2202,9 @@ void prepare_sweep(void)
2059
2202
{
2060
2203
}
2061
2204
2062
- void jl_gc_collect (int full )
2205
+ // Only one thread should be running in this function
2206
+ static void _jl_gc_collect (int full , char * stack_hi )
2063
2207
{
2064
- if (!is_gc_enabled ) return ;
2065
- if (jl_in_gc ) return ;
2066
- char * stack_hi = (char * )gc_get_stack_ptr ();
2067
- gc_debug_print ();
2068
- JL_SIGATOMIC_BEGIN ();
2069
-
2070
- #ifdef JULIA_ENABLE_THREADING
2071
- ti_threadgroup_barrier (tgworld , ti_tid );
2072
- if (ti_tid != 0 ) {
2073
- ti_threadgroup_barrier (tgworld , ti_tid );
2074
- return ;
2075
- }
2076
- #endif
2077
-
2078
- jl_in_gc = 1 ;
2079
2208
uint64_t t0 = jl_hrtime ();
2080
2209
int recollect = 0 ;
2081
2210
#if defined(GC_TIME )
@@ -2270,8 +2399,10 @@ void jl_gc_collect(int full)
2270
2399
#if defined(GC_FINAL_STATS ) || defined(GC_TIME )
2271
2400
finalize_time = jl_hrtime ();
2272
2401
#endif
2273
- if (!jl_gc_finalizers_inhibited ) {
2402
+ if (!jl_gc_finalizers_inhibited && to_finalize .len ) {
2403
+ jl_gc_signal_end ();
2274
2404
run_finalizers ();
2405
+ jl_gc_signal_begin ();
2275
2406
}
2276
2407
#if defined(GC_FINAL_STATS ) || defined(GC_TIME )
2277
2408
finalize_time = jl_hrtime () - finalize_time ;
@@ -2294,13 +2425,7 @@ void jl_gc_collect(int full)
2294
2425
#ifdef GC_FINAL_STATS
2295
2426
max_pause = max_pause < pause ? pause : max_pause ;
2296
2427
#endif
2297
- jl_in_gc = 0 ;
2298
2428
2299
- #ifdef JULIA_ENABLE_THREADING
2300
- ti_threadgroup_barrier (tgworld , ti_tid );
2301
- #endif
2302
-
2303
- JL_SIGATOMIC_END ();
2304
2429
#ifdef GC_TIME
2305
2430
if (estimate_freed != SAVE2 ) {
2306
2431
// this should not happen but it does
@@ -2309,8 +2434,49 @@ void jl_gc_collect(int full)
2309
2434
#endif
2310
2435
if (recollect ) {
2311
2436
n_pause -- ;
2312
- jl_gc_collect (0 );
2437
+ _jl_gc_collect (0 , stack_hi );
2438
+ }
2439
+ }
2440
+
2441
+ void jl_gc_collect (int full )
2442
+ {
2443
+ if (!is_gc_enabled || jl_in_gc )
2444
+ return ;
2445
+ char * stack_hi = (char * )gc_get_stack_ptr ();
2446
+ gc_debug_print ();
2447
+ JL_SIGATOMIC_BEGIN ();
2448
+
2449
+ int8_t old_state = jl_get_ptls_states ()-> gc_state ;
2450
+ jl_get_ptls_states ()-> gc_state = 1 ;
2451
+ // In case multiple threads enter the GC at the same time, only allow
2452
+ // one of them to actually run the collection. We can't just let the
2453
+ // master thread do the GC since it might be running unmanaged code
2454
+ // and can take arbitrarily long time before hitting a safe point.
2455
+ if (!JL_ATOMIC_COMPARE_AND_SWAP (jl_gc_running , 0 , 1 )) {
2456
+ #ifdef JULIA_ENABLE_THREADING
2457
+ JL_SIGATOMIC_END ();
2458
+ jl_wait_for_gc ();
2459
+ jl_gc_state_set_and_save (old_state , 1 );
2460
+ #else
2461
+ // For single thread, jl_in_gc is always true when jl_gc_running is
2462
+ // true so this should never happen.
2463
+ assert (0 && "GC synchronization failure" );
2464
+ #endif
2465
+ return ;
2313
2466
}
2467
+ jl_gc_signal_begin ();
2468
+
2469
+ jl_in_gc = 1 ;
2470
+ _jl_gc_collect (full , stack_hi );
2471
+ jl_in_gc = 0 ;
2472
+
2473
+ // Need to reset the page protection before resetting the flag since
2474
+ // the thread will trigger a segfault immediately after returning from the
2475
+ // signal handler.
2476
+ jl_gc_signal_end ();
2477
+ jl_gc_running = 0 ;
2478
+ JL_SIGATOMIC_END ();
2479
+ jl_gc_state_set_and_save (old_state , 1 );
2314
2480
}
2315
2481
2316
2482
// allocator entry points
0 commit comments