Skip to content

Commit 7f95e1b

Browse files
committed
Merge pull request #16632 from JuliaLang/yyc/gc/full-thresh
Rewrite GC page metadata for sweeping fast path
2 parents 3504039 + 45a012e commit 7f95e1b

File tree

3 files changed

+175
-39
lines changed

3 files changed

+175
-39
lines changed

src/gc-debug.c

Lines changed: 59 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -348,8 +348,8 @@ static void gc_scrub_range(char *stack_lo, char *stack_hi)
348348
continue;
349349
jl_gc_pagemeta_t *pg = page_metadata(tag);
350350
// Make sure the sweep rebuild the freelist
351-
pg->allocd = 1;
352-
pg->gc_bits = 0x3;
351+
pg->has_marked = 1;
352+
pg->has_young = 1;
353353
// Find the age bit
354354
char *page_begin = gc_page_data(tag) + GC_PAGE_OFFSET;
355355
int obj_id = (((char*)tag) - page_begin) / osize;
@@ -358,6 +358,7 @@ static void gc_scrub_range(char *stack_lo, char *stack_hi)
358358
// (especially on 32bit where it's more likely to have pointer-like
359359
// bit patterns)
360360
*ages &= ~(1 << (obj_id % 8));
361+
// set mark to GC_MARKED_NOESC (young and marked)
361362
memset(tag, 0xff, osize);
362363
}
363364
}
@@ -496,6 +497,62 @@ void gc_debug_init(void)
496497
#endif
497498
}
498499

500+
// Simple and dumb way to count cells with different gc bits in allocated pages
501+
// Use as ground truth for debugging memory-leak-like issues.
502+
static int64_t poolobj_sizes[4];
503+
static int64_t empty_pages;
504+
505+
static void gc_count_pool_page(jl_gc_pagemeta_t *pg)
506+
{
507+
int osize = pg->osize;
508+
char *data = pg->data;
509+
gcval_t *v = (gcval_t*)(data + GC_PAGE_OFFSET);
510+
char *lim = (char*)v + GC_PAGE_SZ - GC_PAGE_OFFSET - osize;
511+
int has_live = 0;
512+
while ((char*)v <= lim) {
513+
int bits = gc_bits(v);
514+
if (bits & GC_MARKED)
515+
has_live = 1;
516+
poolobj_sizes[bits] += osize;
517+
v = (gcval_t*)((char*)v + osize);
518+
}
519+
if (!has_live) {
520+
empty_pages++;
521+
}
522+
}
523+
524+
static void gc_count_pool_region(region_t *region)
525+
{
526+
for (int pg_i = 0; pg_i < region->pg_cnt / 32; pg_i++) {
527+
uint32_t line = region->allocmap[pg_i];
528+
if (line) {
529+
for (int j = 0; j < 32; j++) {
530+
if ((line >> j) & 1) {
531+
gc_count_pool_page(&region->meta[pg_i*32 + j]);
532+
}
533+
}
534+
}
535+
}
536+
}
537+
538+
void gc_count_pool(void)
539+
{
540+
memset(&poolobj_sizes, 0, sizeof(poolobj_sizes));
541+
empty_pages = 0;
542+
for (int i = 0; i < REGION_COUNT; i++) {
543+
if (regions[i].pages) {
544+
gc_count_pool_region(&regions[i]);
545+
}
546+
}
547+
jl_safe_printf("****** Pool stat: ******\n");
548+
for (int i = 0;i < 4;i++)
549+
jl_safe_printf("bits(%d): %" PRId64 "\n", i, poolobj_sizes[i]);
550+
// empty_pages is inaccurate after the sweep since young objects are
551+
// also GC_CLEAN
552+
jl_safe_printf("free pages: % " PRId64 "\n", empty_pages);
553+
jl_safe_printf("************************\n");
554+
}
555+
499556
#ifdef __cplusplus
500557
}
501558
#endif

src/gc.c

Lines changed: 87 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -264,8 +264,6 @@ static size_t max_collect_interval = 500000000UL;
264264

265265
#define NS_TO_S(t) ((double)(t/1000)/(1000*1000))
266266
#define NS2MS(t) ((double)(t/1000)/1000)
267-
static int64_t live_bytes = 0;
268-
static int64_t promoted_bytes = 0;
269267

270268
JL_DLLEXPORT size_t jl_gc_total_freed_bytes=0;
271269
#ifdef GC_FINAL_STATS
@@ -322,9 +320,43 @@ static int64_t scanned_bytes; // young bytes scanned while marking
322320
static int64_t perm_scanned_bytes; // old bytes scanned while marking
323321
static int prev_sweep_mask = GC_MARKED;
324322

325-
static size_t array_nbytes(jl_array_t*);
326323
#define inc_sat(v,s) v = (v) >= s ? s : (v)+1
327324

325+
// Full collection heuristics
326+
static int64_t live_bytes = 0;
327+
static int64_t promoted_bytes = 0;
328+
329+
static int64_t last_full_live_ub = 0;
330+
static int64_t last_full_live_est = 0;
331+
// upper bound and estimated live object sizes
332+
// This heuristic should be really unlikely to trigger.
333+
// However, this should be simple enough to trigger a full collection
334+
// when it's necessary if other heuristics are messed up.
335+
// It is also possible to take the total memory available into account
336+
// if necessary.
337+
STATIC_INLINE int gc_check_heap_size(int64_t sz_ub, int64_t sz_est)
338+
{
339+
if (__unlikely(!last_full_live_ub || last_full_live_ub > sz_ub)) {
340+
last_full_live_ub = sz_ub;
341+
}
342+
else if (__unlikely(last_full_live_ub * 3 / 2 < sz_ub)) {
343+
return 1;
344+
}
345+
if (__unlikely(!last_full_live_est || last_full_live_est > sz_est)) {
346+
last_full_live_est = sz_est;
347+
}
348+
else if (__unlikely(last_full_live_est * 2 < sz_est)) {
349+
return 1;
350+
}
351+
return 0;
352+
}
353+
354+
STATIC_INLINE void gc_update_heap_size(int64_t sz_ub, int64_t sz_est)
355+
{
356+
last_full_live_ub = sz_ub;
357+
last_full_live_est = sz_est;
358+
}
359+
328360
static inline int gc_setmark_big(void *o, int mark_mode)
329361
{
330362
if (gc_verifying) {
@@ -375,15 +407,19 @@ static inline int gc_setmark_pool(void *o, int mark_mode)
375407
mark_mode = GC_MARKED;
376408
}
377409
if (!(bits & GC_MARKED)) {
378-
if (mark_mode == GC_MARKED)
410+
if (mark_mode == GC_MARKED) {
379411
perm_scanned_bytes += page->osize;
380-
else
412+
page->nold++;
413+
}
414+
else {
381415
scanned_bytes += page->osize;
416+
}
382417
objprofile_count(jl_typeof(jl_valueof(o)),
383418
mark_mode == GC_MARKED, page->osize);
384419
}
420+
assert(mark_mode & GC_MARKED);
421+
page->has_marked = 1;
385422
_gc_setmark(o, mark_mode);
386-
page->gc_bits |= mark_mode;
387423
verify_val(jl_valueof(o));
388424
return mark_mode;
389425
}
@@ -634,15 +670,15 @@ static void sweep_malloced_arrays(void)
634670
// pool allocation
635671
static inline gcval_t *reset_page(jl_gc_pool_t *p, jl_gc_pagemeta_t *pg, gcval_t *fl)
636672
{
637-
pg->gc_bits = 0;
638673
pg->nfree = (GC_PAGE_SZ - GC_PAGE_OFFSET) / p->osize;
639674
jl_tls_states_t *ptls = jl_all_tls_states[pg->thread_n];
640675
pg->pool_n = p - ptls->heap.norm_pools;
641676
memset(pg->ages, 0, GC_PAGE_SZ / 8 / p->osize + 1);
642677
gcval_t *beg = (gcval_t*)(pg->data + GC_PAGE_OFFSET);
643678
gcval_t *end = (gcval_t*)((char*)beg + (pg->nfree - 1)*p->osize);
644679
end->next = fl;
645-
pg->allocd = 0;
680+
pg->has_young = 0;
681+
pg->has_marked = 0;
646682
pg->fl_begin_offset = GC_PAGE_OFFSET;
647683
pg->fl_end_offset = (char*)end - (char*)beg + GC_PAGE_OFFSET;
648684
return beg;
@@ -691,7 +727,7 @@ static inline void *__pool_alloc(jl_gc_pool_t *p, int osize, int end_offset)
691727
jl_gc_pagemeta_t *pg = page_metadata(v);
692728
assert(pg->osize == p->osize);
693729
pg->nfree = 0;
694-
pg->allocd = 1;
730+
pg->has_young = 1;
695731
if (next)
696732
p->nfree = page_metadata(next)->nfree;
697733
}
@@ -712,7 +748,7 @@ static inline void *__pool_alloc(jl_gc_pool_t *p, int osize, int end_offset)
712748
jl_gc_pagemeta_t *pg = page_metadata(v);
713749
assert(pg->osize == p->osize);
714750
pg->nfree = 0;
715-
pg->allocd = 1;
751+
pg->has_young = 1;
716752
p->newpages = v->next;
717753
}
718754
v->flags = 0;
@@ -840,12 +876,15 @@ static gcval_t **sweep_page(jl_gc_pool_t *p, jl_gc_pagemeta_t *pg, gcval_t **pfl
840876
freedall = 1;
841877
old_nfree += pg->nfree;
842878

843-
if (pg->gc_bits == GC_MARKED) {
844-
// this page only contains GC_MARKED and free cells
845-
// if we are doing a quick sweep and nothing has been allocated inside since last sweep
846-
// we can skip it
847-
if (sweep_mask == GC_MARKED_NOESC && !pg->allocd) {
848-
// the position of the freelist begin/end in this page is stored in its metadata
879+
if (!pg->has_marked)
880+
goto free_page;
881+
// For quick sweep, we might be able to skip the page if the page doesn't
882+
// have any young live cell before marking.
883+
if (sweep_mask == GC_MARKED_NOESC && !pg->has_young) {
884+
assert(prev_sweep_mask == GC_MARKED_NOESC || pg->prev_nold >= pg->nold);
885+
if (prev_sweep_mask == GC_MARKED_NOESC || pg->prev_nold == pg->nold) {
886+
// the position of the freelist begin/end in this page
887+
// is stored in its metadata
849888
if (pg->fl_begin_offset != (uint16_t)-1) {
850889
*pfl = page_pfl_beg(pg);
851890
pfl = prev_pfl = (gcval_t**)page_pfl_end(pg);
@@ -855,11 +894,11 @@ static gcval_t **sweep_page(jl_gc_pool_t *p, jl_gc_pagemeta_t *pg, gcval_t **pfl
855894
goto free_page;
856895
}
857896
}
858-
else if (pg->gc_bits == GC_CLEAN) {
859-
goto free_page;
860-
}
861897

862898
{ // scope to avoid clang goto errors
899+
int has_marked = 0;
900+
int has_young = 0;
901+
int16_t prev_nold = 0;
863902
int pg_nfree = 0;
864903
gcval_t **pfl_begin = NULL;
865904
uint8_t msk = 1; // mask for the age bit in the current age byte
@@ -873,14 +912,20 @@ static gcval_t **sweep_page(jl_gc_pool_t *p, jl_gc_pagemeta_t *pg, gcval_t **pfl
873912
*ages &= ~msk;
874913
}
875914
else { // marked young or old
876-
if (*ages & msk) { // old enough
915+
if (*ages & msk || bits == GC_MARKED) { // old enough
916+
// `!age && bits == GC_MARKED` is possible for
917+
// non-first-class objects like `jl_binding_t`
877918
if (sweep_mask == GC_MARKED || bits == GC_MARKED_NOESC) {
878-
gc_bits(v) = GC_QUEUED; // promote
919+
bits = gc_bits(v) = GC_QUEUED; // promote
879920
}
921+
prev_nold++;
880922
}
881-
else if ((sweep_mask & bits) == sweep_mask) {
882-
gc_bits(v) = GC_CLEAN; // unmark
923+
else {
924+
assert(bits == GC_MARKED_NOESC);
925+
bits = gc_bits(v) = GC_CLEAN; // unmark
926+
has_young = 1;
883927
}
928+
has_marked |= (bits & GC_MARKED) != 0;
884929
*ages |= msk;
885930
freedall = 0;
886931
}
@@ -892,12 +937,18 @@ static gcval_t **sweep_page(jl_gc_pool_t *p, jl_gc_pagemeta_t *pg, gcval_t **pfl
892937
}
893938
}
894939

940+
assert(!freedall);
941+
pg->has_marked = has_marked;
942+
pg->has_young = has_young;
895943
pg->fl_begin_offset = pfl_begin ? (char*)pfl_begin - data : (uint16_t)-1;
896944
pg->fl_end_offset = pfl_begin ? (char*)pfl - data : (uint16_t)-1;
897945

898946
pg->nfree = pg_nfree;
947+
if (sweep_mask == GC_MARKED) {
948+
pg->nold = 0;
949+
pg->prev_nold = prev_nold;
950+
}
899951
page_done++;
900-
pg->allocd = 0;
901952
}
902953
free_page:
903954
pg_freedall += freedall;
@@ -933,10 +984,6 @@ static gcval_t **sweep_page(jl_gc_pool_t *p, jl_gc_pagemeta_t *pg, gcval_t **pfl
933984
nfree += obj_per_page;
934985
}
935986
else {
936-
if (sweep_mask == GC_MARKED)
937-
pg->gc_bits = GC_CLEAN;
938-
if (sweep_mask == GC_MARKED_NOESC)
939-
pg->gc_bits = GC_MARKED;
940987
nfree += pg->nfree;
941988
}
942989

@@ -991,7 +1038,7 @@ static int gc_sweep_inc(int sweep_mask)
9911038
gcval_t *last = p->freelist;
9921039
if (last) {
9931040
jl_gc_pagemeta_t *pg = page_metadata(last);
994-
pg->allocd = 1;
1041+
pg->has_young = 1;
9951042
pg->nfree = p->nfree;
9961043
}
9971044
p->freelist = NULL;
@@ -1001,7 +1048,7 @@ static int gc_sweep_inc(int sweep_mask)
10011048
if (last) {
10021049
jl_gc_pagemeta_t *pg = page_metadata(last);
10031050
pg->nfree = (GC_PAGE_SZ - ((char*)last - gc_page_data(last))) / p->osize;
1004-
pg->allocd = 1;
1051+
pg->has_young = 1;
10051052
}
10061053
p->newpages = NULL;
10071054
}
@@ -1681,7 +1728,9 @@ static void _jl_gc_collect(int full, char *stack_hi)
16811728
#if defined(GC_TIME) || defined(GC_FINAL_STATS)
16821729
post_time = jl_hrtime() - post_time;
16831730
#endif
1684-
estimate_freed = live_bytes - scanned_bytes - perm_scanned_bytes + actual_allocd;
1731+
int64_t live_sz_ub = live_bytes + actual_allocd;
1732+
int64_t live_sz_est = scanned_bytes + perm_scanned_bytes;
1733+
estimate_freed = live_sz_ub - live_sz_est;
16851734

16861735
gc_verify();
16871736

@@ -1700,7 +1749,13 @@ static void _jl_gc_collect(int full, char *stack_hi)
17001749
for (int i = 0;i < jl_n_threads;i++)
17011750
nptr += jl_all_tls_states[i]->heap.remset_nptr;
17021751
int large_frontier = nptr*sizeof(void*) >= default_collect_interval; // many pointers in the intergen frontier => "quick" mark is not quick
1703-
if ((full || large_frontier || ((not_freed_enough || promoted_bytes >= gc_num.interval) && (promoted_bytes >= default_collect_interval || prev_sweep_mask == GC_MARKED))) && gc_num.pause > 1) {
1752+
if ((full || large_frontier ||
1753+
((not_freed_enough || promoted_bytes >= gc_num.interval) &&
1754+
(promoted_bytes >= default_collect_interval ||
1755+
prev_sweep_mask == GC_MARKED)) ||
1756+
gc_check_heap_size(live_sz_ub, live_sz_est)) &&
1757+
gc_num.pause > 1) {
1758+
gc_update_heap_size(live_sz_ub, live_sz_est);
17041759
if (prev_sweep_mask != GC_MARKED || full) {
17051760
if (full) recollect = 1; // TODO enable this?
17061761
}

src/gc.h

Lines changed: 29 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -138,12 +138,33 @@ typedef struct _mallocarray_t {
138138
// pool page metadata
139139
typedef struct {
140140
struct {
141-
uint16_t pool_n : 8; // index (into norm_pool) of pool that owns this page
142-
uint16_t allocd : 1; // true if an allocation happened in this page since last sweep
143-
uint16_t gc_bits : 2; // this is a bitwise | of all gc_bits in this page
141+
// index of pool that owns this page
142+
uint16_t pool_n : 8;
143+
// Whether any cell in the page is marked
144+
// This bit is set before sweeping iff there's live cells in the page.
145+
// Note that before marking or after sweeping there can be live
146+
// (and young) cells in the page for `!has_marked`.
147+
uint16_t has_marked: 1;
148+
// Whether any cell was live and young **before sweeping**.
149+
// For a normal sweep (quick sweep that is NOT preceded by a
150+
// full sweep) this bit is set iff there are young or newly dead
151+
// objects in the page and the page needs to be swept.
152+
//
153+
// For a full sweep, this bit should be ignored.
154+
//
155+
// For a quick sweep preceded by a full sweep. If this bit is set,
156+
// the page needs to be swept. If this bit is not set, there could
157+
// still be old dead objects in the page and `nold` and `prev_nold`
158+
// should be used to determine if the page needs to be swept.
159+
uint16_t has_young: 1;
144160
};
145-
uint16_t nfree; // number of free objects in this page.
146-
// invalid if pool that owns this page is allocating objects from this page.
161+
// number of old objects in this page
162+
uint16_t nold;
163+
// number of old objects in this page during the previous full sweep
164+
uint16_t prev_nold;
165+
// number of free objects in this page.
166+
// invalid if pool that owns this page is allocating objects from this page.
167+
uint16_t nfree;
147168
uint16_t osize; // size of each object in this page
148169
uint16_t fl_begin_offset; // offset of first free object in this page
149170
uint16_t fl_end_offset; // offset of last free object in this page
@@ -338,6 +359,9 @@ static inline void objprofile_reset(void)
338359
}
339360
#endif
340361

362+
// For debugging
363+
void gc_count_pool(void);
364+
341365
#ifdef __cplusplus
342366
}
343367
#endif

0 commit comments

Comments
 (0)