@@ -34,8 +34,49 @@ extern "C" {
34
34
#endif
35
35
36
36
JL_DEFINE_MUTEX (pagealloc )
37
+ // Protect all access to `finalizer_list`, `finalizer_list_marked` and
38
+ // `to_finalize`.
37
39
JL_DEFINE_MUTEX (finalizers )
38
40
41
+ /**
42
+ * Note about GC synchronization:
43
+ *
44
+ * When entering `jl_gc_collect()`, `jl_gc_running` is atomically changed from
45
+ * `0` to `1` to make sure that only one thread can be running the GC. Other
46
+ * threads that enters `jl_gc_collect()` at the same time (or later calling
47
+ * from unmanaged code) will wait in `jl_gc_collect()` until the GC is finished.
48
+ *
49
+ * Before starting the mark phase the GC thread calls `jl_gc_signal_begin()`
50
+ * to make sure all the thread are in a safe state for the GC. The function
51
+ * activates the safepoint and wait for all the threads to get ready for the
52
+ * GC (`gc_states != 0`). It also acquires the `finalizers` lock so that no
53
+ * other thread will access them when the GC is running.
54
+ *
55
+ * During the mark and sweep phase of the GC, the threads that are not running
56
+ * the GC should either be running unmanaged code (or code section that does
57
+ * not have a GC critical region mainly including storing to the stack or
58
+ * another object) or paused at a safepoint and wait for the GC to finish.
59
+ * If a thread want to switch from running unmanaged code to running managed
60
+ * code, it has to perform a GC safepoint check after setting the `gc_state`
61
+ * flag (see `jl_gc_state_set_and_save()`. it is possible that the thread might
62
+ * have `gc_state == 0` in the middle of the GC transition back before entering
63
+ * the safepoint. This is fine since the thread won't be executing any GC
64
+ * critical region during that time).
65
+ *
66
+ * When the GC needs to run the finalizers, it cannot keep the safepoint
67
+ * activate since the code in the finalizer might trigger it and falls into
68
+ * a dead loop. It also (not required since the lock is recursive) release the
69
+ * `finalizers` lock so that other threads can update the finalizers list at
70
+ * the same time. Since the safe point is deactivated in this phase and other
71
+ * threads might have entered managed state from unmanaged state, when the
72
+ * finalizers finish running, the GC thread wait for other threads to enter a
73
+ * safe state again before continuing the GC. It is not possible for other
74
+ * threads to enter the GC since `jl_gc_running` is still `1` in this phase.
75
+ * In the future, it might be better to delay this after the GC is finished so
76
+ * that we can wake up other threads and do more useful works as the finalizers
77
+ * runs.
78
+ */
79
+
39
80
// manipulating mark bits
40
81
41
82
#define GC_CLEAN 0 // freshly allocated
@@ -313,7 +354,93 @@ NOINLINE static uintptr_t gc_get_stack_ptr(void)
313
354
314
355
#include "gc-debug.c"
315
356
316
- int jl_in_gc ; // referenced from switchto task.c
357
+ // Only one thread can be doing the collection right now. That thread set
358
+ // `jl_running_gc` to one on entering the GC and set it back afterward.
359
+ static volatile uint64_t jl_gc_running = 0 ;
360
+
361
+ #ifdef JULIA_ENABLE_THREADING
362
+ JL_DLLEXPORT volatile size_t * jl_gc_signal_page = NULL ;
363
+
364
+ static void jl_wait_for_gc (void )
365
+ {
366
+ assert (!jl_in_gc && "Safepoint triggered in GC" );
367
+ // In case assertion is off. Make safepoint in GC a segfault instead
368
+ // of a infinite loop.
369
+ if (jl_in_gc )
370
+ return ;
371
+ while (jl_gc_running ) {
372
+ jl_cpu_pause (); // yield?
373
+ }
374
+ }
375
+
376
+ void jl_gc_signal_wait (void )
377
+ {
378
+ int8_t state = jl_get_ptls_states ()-> gc_state ;
379
+ jl_get_ptls_states ()-> gc_state = 1 ;
380
+ jl_wait_for_gc ();
381
+ jl_get_ptls_states ()-> gc_state = state ;
382
+ }
383
+
384
+ static void jl_gc_wait_for_the_world (void )
385
+ {
386
+ for (int i = 0 ;i < jl_n_threads ;i ++ ) {
387
+ jl_tls_states_t * ptls = jl_all_task_states [i ].ptls ;
388
+ while (!ptls -> gc_state ) {
389
+ jl_cpu_pause (); // yield?
390
+ }
391
+ }
392
+ }
393
+
394
+ void jl_gc_signal_init (void )
395
+ {
396
+ // jl_page_size isn't available yet.
397
+ #ifdef _OS_WINDOWS_
398
+ jl_gc_signal_page = (size_t * )VirtualAlloc (NULL , jl_getpagesize (),
399
+ MEM_RESERVE , PAGE_READONLY );
400
+ #else
401
+ jl_gc_signal_page = (size_t * )mmap (0 , jl_getpagesize (), PROT_READ ,
402
+ MAP_NORESERVE | MAP_PRIVATE |
403
+ MAP_ANONYMOUS , -1 , 0 );
404
+ if (jl_gc_signal_page == MAP_FAILED )
405
+ jl_gc_signal_page = NULL ;
406
+ #endif
407
+ if (jl_gc_signal_page == NULL ) {
408
+ jl_printf (JL_STDERR , "could not allocate GC synchronization page\n" );
409
+ abort ();
410
+ }
411
+ }
412
+
413
+ static void jl_gc_signal_begin (void )
414
+ {
415
+ #ifdef _OS_WINDOWS_
416
+ DWORD old_prot ;
417
+ VirtualProtect ((void * )jl_gc_signal_page , jl_page_size ,
418
+ PAGE_NOACCESS , & old_prot );
419
+ #else
420
+ mprotect ((void * )jl_gc_signal_page , jl_page_size , PROT_NONE );
421
+ #endif
422
+ jl_gc_wait_for_the_world ();
423
+ JL_LOCK_NOGC (finalizers );
424
+ }
425
+
426
+ static void jl_gc_signal_end (void )
427
+ {
428
+ JL_UNLOCK (finalizers );
429
+ #ifdef _OS_WINDOWS_
430
+ DWORD old_prot ;
431
+ VirtualProtect ((void * )jl_gc_signal_page , jl_page_size ,
432
+ PAGE_READONLY , & old_prot );
433
+ #else
434
+ mprotect ((void * )jl_gc_signal_page , jl_page_size , PROT_READ );
435
+ #endif
436
+ }
437
+ #else
438
+
439
+ #define jl_gc_signal_begin ()
440
+ #define jl_gc_signal_end ()
441
+
442
+ #endif
443
+
317
444
static int jl_gc_finalizers_inhibited ; // don't run finalizers during codegen #11956
318
445
319
446
// malloc wrappers, aligned allocation
@@ -375,12 +502,15 @@ static void jl_gc_push_arraylist(arraylist_t *list)
375
502
jl_pgcstack = (jl_gcframe_t * )list -> items ;
376
503
}
377
504
378
- // Same assumption as `jl_gc_push_arraylist`
505
+ // Same assumption as `jl_gc_push_arraylist`. Requires the finalizers lock
506
+ // to be hold for the current thread and will release the lock when the
507
+ // function returns.
379
508
static void jl_gc_run_finalizers_in_list (arraylist_t * list )
380
509
{
381
510
size_t len = list -> len ;
382
511
jl_value_t * * items = (jl_value_t * * )list -> items ;
383
512
jl_gc_push_arraylist (list );
513
+ JL_UNLOCK (finalizers );
384
514
for (size_t i = 2 ;i < len ;i += 2 ) {
385
515
run_finalizer (items [i ], items [i + 1 ]);
386
516
}
@@ -389,8 +519,11 @@ static void jl_gc_run_finalizers_in_list(arraylist_t *list)
389
519
390
520
static void run_finalizers (void )
391
521
{
392
- if (to_finalize .len == 0 )
522
+ JL_LOCK_NOGC (finalizers );
523
+ if (to_finalize .len == 0 ) {
524
+ JL_UNLOCK (finalizers );
393
525
return ;
526
+ }
394
527
arraylist_t copied_list ;
395
528
memcpy (& copied_list , & to_finalize , sizeof (copied_list ));
396
529
if (to_finalize .items == to_finalize ._space ) {
@@ -400,6 +533,7 @@ static void run_finalizers(void)
400
533
// empty out the first two entries for the GC frame
401
534
arraylist_push (& copied_list , copied_list .items [0 ]);
402
535
arraylist_push (& copied_list , copied_list .items [1 ]);
536
+ // This releases the finalizers lock.
403
537
jl_gc_run_finalizers_in_list (& copied_list );
404
538
arraylist_free (& copied_list );
405
539
}
@@ -430,22 +564,24 @@ static void schedule_all_finalizers(arraylist_t* flist)
430
564
431
565
void jl_gc_run_all_finalizers (void )
432
566
{
567
+ JL_LOCK_NOGC (finalizers );
433
568
schedule_all_finalizers (& finalizer_list );
434
569
schedule_all_finalizers (& finalizer_list_marked );
570
+ JL_UNLOCK (finalizers );
435
571
run_finalizers ();
436
572
}
437
573
438
574
JL_DLLEXPORT void jl_gc_add_finalizer (jl_value_t * v , jl_function_t * f )
439
575
{
440
- JL_LOCK (finalizers );
576
+ JL_LOCK_NOGC (finalizers );
441
577
arraylist_push (& finalizer_list , (void * )v );
442
578
arraylist_push (& finalizer_list , (void * )f );
443
579
JL_UNLOCK (finalizers );
444
580
}
445
581
446
582
JL_DLLEXPORT void jl_finalize (jl_value_t * o )
447
583
{
448
- JL_LOCK (finalizers );
584
+ JL_LOCK_NOGC (finalizers );
449
585
// Copy the finalizers into a temporary list so that code in the finalizer
450
586
// won't change the list as we loop through them.
451
587
// This list is also used as the GC frame when we are running the finalizers
@@ -457,10 +593,13 @@ JL_DLLEXPORT void jl_finalize(jl_value_t *o)
457
593
// still holding a reference to the object
458
594
finalize_object (& finalizer_list , o , & copied_list );
459
595
finalize_object (& finalizer_list_marked , o , & copied_list );
460
- JL_UNLOCK (finalizers );
461
596
if (copied_list .len > 2 ) {
597
+ // This releases the finalizers lock.
462
598
jl_gc_run_finalizers_in_list (& copied_list );
463
599
}
600
+ else {
601
+ JL_UNLOCK (finalizers );
602
+ }
464
603
arraylist_free (& copied_list );
465
604
}
466
605
@@ -712,7 +851,7 @@ static NOINLINE void *malloc_page(void)
712
851
int i ;
713
852
region_t * region ;
714
853
int region_i = 0 ;
715
- JL_LOCK (pagealloc );
854
+ JL_LOCK_NOGC (pagealloc );
716
855
while (region_i < REGION_COUNT ) {
717
856
region = regions [region_i ];
718
857
if (region == NULL ) {
@@ -830,6 +969,7 @@ static inline int maybe_collect(void)
830
969
jl_gc_collect (0 );
831
970
return 1 ;
832
971
}
972
+ jl_gc_safepoint ();
833
973
return 0 ;
834
974
}
835
975
@@ -1111,6 +1251,9 @@ static inline void *__pool_alloc(pool_t* p, int osize, int end_offset)
1111
1251
jl_gc_collect (0 );
1112
1252
//allocd_bytes += osize;
1113
1253
}
1254
+ else {
1255
+ jl_gc_safepoint ();
1256
+ }
1114
1257
gc_num .poolalloc ++ ;
1115
1258
// first try to use the freelist
1116
1259
v = p -> freelist ;
@@ -2060,23 +2203,9 @@ void prepare_sweep(void)
2060
2203
{
2061
2204
}
2062
2205
2063
- JL_DLLEXPORT void jl_gc_collect (int full )
2206
+ // Only one thread should be running in this function
2207
+ static void _jl_gc_collect (int full , char * stack_hi )
2064
2208
{
2065
- if (!is_gc_enabled ) return ;
2066
- if (jl_in_gc ) return ;
2067
- char * stack_hi = (char * )gc_get_stack_ptr ();
2068
- gc_debug_print ();
2069
- JL_SIGATOMIC_BEGIN ();
2070
-
2071
- #ifdef JULIA_ENABLE_THREADING
2072
- ti_threadgroup_barrier (tgworld , ti_tid );
2073
- if (ti_tid != 0 ) {
2074
- ti_threadgroup_barrier (tgworld , ti_tid );
2075
- return ;
2076
- }
2077
- #endif
2078
-
2079
- jl_in_gc = 1 ;
2080
2209
uint64_t t0 = jl_hrtime ();
2081
2210
int recollect = 0 ;
2082
2211
#if defined(GC_TIME )
@@ -2271,8 +2400,10 @@ JL_DLLEXPORT void jl_gc_collect(int full)
2271
2400
#if defined(GC_FINAL_STATS ) || defined(GC_TIME )
2272
2401
finalize_time = jl_hrtime ();
2273
2402
#endif
2274
- if (!jl_gc_finalizers_inhibited ) {
2403
+ if (!jl_gc_finalizers_inhibited && to_finalize .len ) {
2404
+ jl_gc_signal_end ();
2275
2405
run_finalizers ();
2406
+ jl_gc_signal_begin ();
2276
2407
}
2277
2408
#if defined(GC_FINAL_STATS ) || defined(GC_TIME )
2278
2409
finalize_time = jl_hrtime () - finalize_time ;
@@ -2295,13 +2426,7 @@ JL_DLLEXPORT void jl_gc_collect(int full)
2295
2426
#ifdef GC_FINAL_STATS
2296
2427
max_pause = max_pause < pause ? pause : max_pause ;
2297
2428
#endif
2298
- jl_in_gc = 0 ;
2299
2429
2300
- #ifdef JULIA_ENABLE_THREADING
2301
- ti_threadgroup_barrier (tgworld , ti_tid );
2302
- #endif
2303
-
2304
- JL_SIGATOMIC_END ();
2305
2430
#ifdef GC_TIME
2306
2431
if (estimate_freed != SAVE2 ) {
2307
2432
// this should not happen but it does
@@ -2310,8 +2435,49 @@ JL_DLLEXPORT void jl_gc_collect(int full)
2310
2435
#endif
2311
2436
if (recollect ) {
2312
2437
n_pause -- ;
2313
- jl_gc_collect (0 );
2438
+ _jl_gc_collect (0 , stack_hi );
2439
+ }
2440
+ }
2441
+
2442
+ JL_DLLEXPORT void jl_gc_collect (int full )
2443
+ {
2444
+ if (!is_gc_enabled || jl_in_gc )
2445
+ return ;
2446
+ char * stack_hi = (char * )gc_get_stack_ptr ();
2447
+ gc_debug_print ();
2448
+ JL_SIGATOMIC_BEGIN ();
2449
+
2450
+ int8_t old_state = jl_get_ptls_states ()-> gc_state ;
2451
+ jl_get_ptls_states ()-> gc_state = 1 ;
2452
+ // In case multiple threads enter the GC at the same time, only allow
2453
+ // one of them to actually run the collection. We can't just let the
2454
+ // master thread do the GC since it might be running unmanaged code
2455
+ // and can take arbitrarily long time before hitting a safe point.
2456
+ if (!JL_ATOMIC_COMPARE_AND_SWAP (jl_gc_running , 0 , 1 )) {
2457
+ #ifdef JULIA_ENABLE_THREADING
2458
+ JL_SIGATOMIC_END ();
2459
+ jl_wait_for_gc ();
2460
+ jl_gc_state_set_and_save (old_state , 1 );
2461
+ #else
2462
+ // For single thread, jl_in_gc is always true when jl_gc_running is
2463
+ // true so this should never happen.
2464
+ assert (0 && "GC synchronization failure" );
2465
+ #endif
2466
+ return ;
2314
2467
}
2468
+ jl_gc_signal_begin ();
2469
+
2470
+ jl_in_gc = 1 ;
2471
+ _jl_gc_collect (full , stack_hi );
2472
+ jl_in_gc = 0 ;
2473
+
2474
+ // Need to reset the page protection before resetting the flag since
2475
+ // the thread will trigger a segfault immediately after returning from the
2476
+ // signal handler.
2477
+ jl_gc_signal_end ();
2478
+ jl_gc_running = 0 ;
2479
+ JL_SIGATOMIC_END ();
2480
+ jl_gc_state_set_and_save (old_state , 1 );
2315
2481
}
2316
2482
2317
2483
// allocator entry points
0 commit comments