@@ -264,8 +264,6 @@ static size_t max_collect_interval = 500000000UL;
264
264
265
265
#define NS_TO_S (t ) ((double)(t/1000)/(1000*1000))
266
266
#define NS2MS (t ) ((double)(t/1000)/1000)
267
- static int64_t live_bytes = 0 ;
268
- static int64_t promoted_bytes = 0 ;
269
267
270
268
JL_DLLEXPORT size_t jl_gc_total_freed_bytes = 0 ;
271
269
#ifdef GC_FINAL_STATS
@@ -322,9 +320,43 @@ static int64_t scanned_bytes; // young bytes scanned while marking
322
320
static int64_t perm_scanned_bytes ; // old bytes scanned while marking
323
321
static int prev_sweep_mask = GC_MARKED ;
324
322
325
- static size_t array_nbytes (jl_array_t * );
326
323
#define inc_sat (v ,s ) v = (v) >= s ? s : (v)+1
327
324
325
+ // Full collection heuristics
326
+ static int64_t live_bytes = 0 ;
327
+ static int64_t promoted_bytes = 0 ;
328
+
329
+ static int64_t last_full_live_ub = 0 ;
330
+ static int64_t last_full_live_est = 0 ;
331
+ // upper bound and estimated live object sizes
332
+ // This heuristic should be really unlikely to trigger.
333
+ // However, this should be simple enough to trigger a full collection
334
+ // when it's necessary if other heuristics are messed up.
335
+ // It is also possible to take the total memory available into account
336
+ // if necessary.
337
+ STATIC_INLINE int gc_check_heap_size (int64_t sz_ub , int64_t sz_est )
338
+ {
339
+ if (__unlikely (!last_full_live_ub || last_full_live_ub > sz_ub )) {
340
+ last_full_live_ub = sz_ub ;
341
+ }
342
+ else if (__unlikely (last_full_live_ub * 3 / 2 < sz_ub )) {
343
+ return 1 ;
344
+ }
345
+ if (__unlikely (!last_full_live_est || last_full_live_est > sz_est )) {
346
+ last_full_live_est = sz_est ;
347
+ }
348
+ else if (__unlikely (last_full_live_est * 2 < sz_est )) {
349
+ return 1 ;
350
+ }
351
+ return 0 ;
352
+ }
353
+
354
+ STATIC_INLINE void gc_update_heap_size (int64_t sz_ub , int64_t sz_est )
355
+ {
356
+ last_full_live_ub = sz_ub ;
357
+ last_full_live_est = sz_est ;
358
+ }
359
+
328
360
static inline int gc_setmark_big (void * o , int mark_mode )
329
361
{
330
362
if (gc_verifying ) {
@@ -375,15 +407,19 @@ static inline int gc_setmark_pool(void *o, int mark_mode)
375
407
mark_mode = GC_MARKED ;
376
408
}
377
409
if (!(bits & GC_MARKED )) {
378
- if (mark_mode == GC_MARKED )
410
+ if (mark_mode == GC_MARKED ) {
379
411
perm_scanned_bytes += page -> osize ;
380
- else
412
+ page -> nold ++ ;
413
+ }
414
+ else {
381
415
scanned_bytes += page -> osize ;
416
+ }
382
417
objprofile_count (jl_typeof (jl_valueof (o )),
383
418
mark_mode == GC_MARKED , page -> osize );
384
419
}
420
+ assert (mark_mode & GC_MARKED );
421
+ page -> has_marked = 1 ;
385
422
_gc_setmark (o , mark_mode );
386
- page -> gc_bits |= mark_mode ;
387
423
verify_val (jl_valueof (o ));
388
424
return mark_mode ;
389
425
}
@@ -634,15 +670,15 @@ static void sweep_malloced_arrays(void)
634
670
// pool allocation
635
671
static inline gcval_t * reset_page (jl_gc_pool_t * p , jl_gc_pagemeta_t * pg , gcval_t * fl )
636
672
{
637
- pg -> gc_bits = 0 ;
638
673
pg -> nfree = (GC_PAGE_SZ - GC_PAGE_OFFSET ) / p -> osize ;
639
674
jl_tls_states_t * ptls = jl_all_tls_states [pg -> thread_n ];
640
675
pg -> pool_n = p - ptls -> heap .norm_pools ;
641
676
memset (pg -> ages , 0 , GC_PAGE_SZ / 8 / p -> osize + 1 );
642
677
gcval_t * beg = (gcval_t * )(pg -> data + GC_PAGE_OFFSET );
643
678
gcval_t * end = (gcval_t * )((char * )beg + (pg -> nfree - 1 )* p -> osize );
644
679
end -> next = fl ;
645
- pg -> allocd = 0 ;
680
+ pg -> has_young = 0 ;
681
+ pg -> has_marked = 0 ;
646
682
pg -> fl_begin_offset = GC_PAGE_OFFSET ;
647
683
pg -> fl_end_offset = (char * )end - (char * )beg + GC_PAGE_OFFSET ;
648
684
return beg ;
@@ -691,7 +727,7 @@ static inline void *__pool_alloc(jl_gc_pool_t *p, int osize, int end_offset)
691
727
jl_gc_pagemeta_t * pg = page_metadata (v );
692
728
assert (pg -> osize == p -> osize );
693
729
pg -> nfree = 0 ;
694
- pg -> allocd = 1 ;
730
+ pg -> has_young = 1 ;
695
731
if (next )
696
732
p -> nfree = page_metadata (next )-> nfree ;
697
733
}
@@ -712,7 +748,7 @@ static inline void *__pool_alloc(jl_gc_pool_t *p, int osize, int end_offset)
712
748
jl_gc_pagemeta_t * pg = page_metadata (v );
713
749
assert (pg -> osize == p -> osize );
714
750
pg -> nfree = 0 ;
715
- pg -> allocd = 1 ;
751
+ pg -> has_young = 1 ;
716
752
p -> newpages = v -> next ;
717
753
}
718
754
v -> flags = 0 ;
@@ -840,12 +876,15 @@ static gcval_t **sweep_page(jl_gc_pool_t *p, jl_gc_pagemeta_t *pg, gcval_t **pfl
840
876
freedall = 1 ;
841
877
old_nfree += pg -> nfree ;
842
878
843
- if (pg -> gc_bits == GC_MARKED ) {
844
- // this page only contains GC_MARKED and free cells
845
- // if we are doing a quick sweep and nothing has been allocated inside since last sweep
846
- // we can skip it
847
- if (sweep_mask == GC_MARKED_NOESC && !pg -> allocd ) {
848
- // the position of the freelist begin/end in this page is stored in its metadata
879
+ if (!pg -> has_marked )
880
+ goto free_page ;
881
+ // For quick sweep, we might be able to skip the page if the page doesn't
882
+ // have any young live cell before marking.
883
+ if (sweep_mask == GC_MARKED_NOESC && !pg -> has_young ) {
884
+ assert (prev_sweep_mask == GC_MARKED_NOESC || pg -> prev_nold >= pg -> nold );
885
+ if (prev_sweep_mask == GC_MARKED_NOESC || pg -> prev_nold == pg -> nold ) {
886
+ // the position of the freelist begin/end in this page
887
+ // is stored in its metadata
849
888
if (pg -> fl_begin_offset != (uint16_t )-1 ) {
850
889
* pfl = page_pfl_beg (pg );
851
890
pfl = prev_pfl = (gcval_t * * )page_pfl_end (pg );
@@ -855,11 +894,11 @@ static gcval_t **sweep_page(jl_gc_pool_t *p, jl_gc_pagemeta_t *pg, gcval_t **pfl
855
894
goto free_page ;
856
895
}
857
896
}
858
- else if (pg -> gc_bits == GC_CLEAN ) {
859
- goto free_page ;
860
- }
861
897
862
898
{ // scope to avoid clang goto errors
899
+ int has_marked = 0 ;
900
+ int has_young = 0 ;
901
+ int16_t prev_nold = 0 ;
863
902
int pg_nfree = 0 ;
864
903
gcval_t * * pfl_begin = NULL ;
865
904
uint8_t msk = 1 ; // mask for the age bit in the current age byte
@@ -873,14 +912,20 @@ static gcval_t **sweep_page(jl_gc_pool_t *p, jl_gc_pagemeta_t *pg, gcval_t **pfl
873
912
* ages &= ~msk ;
874
913
}
875
914
else { // marked young or old
876
- if (* ages & msk ) { // old enough
915
+ if (* ages & msk || bits == GC_MARKED ) { // old enough
916
+ // `!age && bits == GC_MARKED` is possible for
917
+ // non-first-class objects like `jl_binding_t`
877
918
if (sweep_mask == GC_MARKED || bits == GC_MARKED_NOESC ) {
878
- gc_bits (v ) = GC_QUEUED ; // promote
919
+ bits = gc_bits (v ) = GC_QUEUED ; // promote
879
920
}
921
+ prev_nold ++ ;
880
922
}
881
- else if ((sweep_mask & bits ) == sweep_mask ) {
882
- gc_bits (v ) = GC_CLEAN ; // unmark
923
+ else {
924
+ assert (bits == GC_MARKED_NOESC );
925
+ bits = gc_bits (v ) = GC_CLEAN ; // unmark
926
+ has_young = 1 ;
883
927
}
928
+ has_marked |= (bits & GC_MARKED ) != 0 ;
884
929
* ages |= msk ;
885
930
freedall = 0 ;
886
931
}
@@ -892,12 +937,18 @@ static gcval_t **sweep_page(jl_gc_pool_t *p, jl_gc_pagemeta_t *pg, gcval_t **pfl
892
937
}
893
938
}
894
939
940
+ assert (!freedall );
941
+ pg -> has_marked = has_marked ;
942
+ pg -> has_young = has_young ;
895
943
pg -> fl_begin_offset = pfl_begin ? (char * )pfl_begin - data : (uint16_t )-1 ;
896
944
pg -> fl_end_offset = pfl_begin ? (char * )pfl - data : (uint16_t )-1 ;
897
945
898
946
pg -> nfree = pg_nfree ;
947
+ if (sweep_mask == GC_MARKED ) {
948
+ pg -> nold = 0 ;
949
+ pg -> prev_nold = prev_nold ;
950
+ }
899
951
page_done ++ ;
900
- pg -> allocd = 0 ;
901
952
}
902
953
free_page :
903
954
pg_freedall += freedall ;
@@ -933,10 +984,6 @@ static gcval_t **sweep_page(jl_gc_pool_t *p, jl_gc_pagemeta_t *pg, gcval_t **pfl
933
984
nfree += obj_per_page ;
934
985
}
935
986
else {
936
- if (sweep_mask == GC_MARKED )
937
- pg -> gc_bits = GC_CLEAN ;
938
- if (sweep_mask == GC_MARKED_NOESC )
939
- pg -> gc_bits = GC_MARKED ;
940
987
nfree += pg -> nfree ;
941
988
}
942
989
@@ -991,7 +1038,7 @@ static int gc_sweep_inc(int sweep_mask)
991
1038
gcval_t * last = p -> freelist ;
992
1039
if (last ) {
993
1040
jl_gc_pagemeta_t * pg = page_metadata (last );
994
- pg -> allocd = 1 ;
1041
+ pg -> has_young = 1 ;
995
1042
pg -> nfree = p -> nfree ;
996
1043
}
997
1044
p -> freelist = NULL ;
@@ -1001,7 +1048,7 @@ static int gc_sweep_inc(int sweep_mask)
1001
1048
if (last ) {
1002
1049
jl_gc_pagemeta_t * pg = page_metadata (last );
1003
1050
pg -> nfree = (GC_PAGE_SZ - ((char * )last - gc_page_data (last ))) / p -> osize ;
1004
- pg -> allocd = 1 ;
1051
+ pg -> has_young = 1 ;
1005
1052
}
1006
1053
p -> newpages = NULL ;
1007
1054
}
@@ -1681,7 +1728,9 @@ static void _jl_gc_collect(int full, char *stack_hi)
1681
1728
#if defined(GC_TIME ) || defined(GC_FINAL_STATS )
1682
1729
post_time = jl_hrtime () - post_time ;
1683
1730
#endif
1684
- estimate_freed = live_bytes - scanned_bytes - perm_scanned_bytes + actual_allocd ;
1731
+ int64_t live_sz_ub = live_bytes + actual_allocd ;
1732
+ int64_t live_sz_est = scanned_bytes + perm_scanned_bytes ;
1733
+ estimate_freed = live_sz_ub - live_sz_est ;
1685
1734
1686
1735
gc_verify ();
1687
1736
@@ -1700,7 +1749,13 @@ static void _jl_gc_collect(int full, char *stack_hi)
1700
1749
for (int i = 0 ;i < jl_n_threads ;i ++ )
1701
1750
nptr += jl_all_tls_states [i ]-> heap .remset_nptr ;
1702
1751
int large_frontier = nptr * sizeof (void * ) >= default_collect_interval ; // many pointers in the intergen frontier => "quick" mark is not quick
1703
- if ((full || large_frontier || ((not_freed_enough || promoted_bytes >= gc_num .interval ) && (promoted_bytes >= default_collect_interval || prev_sweep_mask == GC_MARKED ))) && gc_num .pause > 1 ) {
1752
+ if ((full || large_frontier ||
1753
+ ((not_freed_enough || promoted_bytes >= gc_num .interval ) &&
1754
+ (promoted_bytes >= default_collect_interval ||
1755
+ prev_sweep_mask == GC_MARKED )) ||
1756
+ gc_check_heap_size (live_sz_ub , live_sz_est )) &&
1757
+ gc_num .pause > 1 ) {
1758
+ gc_update_heap_size (live_sz_ub , live_sz_est );
1704
1759
if (prev_sweep_mask != GC_MARKED || full ) {
1705
1760
if (full ) recollect = 1 ; // TODO enable this?
1706
1761
}
0 commit comments