Skip to content

Commit

Permalink
instrument GC to breakdown times spent in each step of sweeping
Browse files Browse the repository at this point in the history
  • Loading branch information
d-netto committed Sep 3, 2024
1 parent 0e5b029 commit 745139a
Show file tree
Hide file tree
Showing 3 changed files with 60 additions and 43 deletions.
3 changes: 3 additions & 0 deletions base/timing.jl
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,9 @@ struct GC_Num
sweep_time ::Int64
mark_time ::Int64
total_sweep_time ::Int64
total_sweep_page_walk_time ::Int64
total_sweep_madvise_time ::Int64
total_sweep_free_mallocd_memory_time ::Int64
total_mark_time ::Int64
last_full_sweep ::Int64
last_incremental_sweep ::Int64
Expand Down
97 changes: 54 additions & 43 deletions src/gc.c
Original file line number Diff line number Diff line change
Expand Up @@ -1518,8 +1518,11 @@ STATIC_INLINE void gc_sweep_pool_page(gc_page_profiler_serializer_t *s, jl_gc_pa
// sweep over all memory that is being used and not in a pool
static void gc_sweep_other(jl_ptls_t ptls, int sweep_full) JL_NOTSAFEPOINT
{
uint64_t t_free_mallocd_memory_start = jl_hrtime();
sweep_malloced_arrays();
sweep_big(ptls);
uint64_t t_free_mallocd_memory_end = jl_hrtime();
gc_num.total_sweep_free_mallocd_memory_time += t_free_mallocd_memory_end - t_free_mallocd_memory_start;
}

static void gc_pool_sync_nfree(jl_gc_pagemeta_t *pg, jl_taggedvalue_t *last) JL_NOTSAFEPOINT
Expand Down Expand Up @@ -1776,66 +1779,74 @@ static void gc_sweep_pool(void)
}
}

// the actual sweeping
jl_gc_padded_page_stack_t *new_gc_allocd_scratch = (jl_gc_padded_page_stack_t *) calloc_s(n_threads * sizeof(jl_gc_padded_page_stack_t));
jl_ptls_t ptls = jl_current_task->ptls;
gc_sweep_wake_all(ptls, new_gc_allocd_scratch);
gc_sweep_pool_parallel(ptls);
gc_sweep_wait_for_all();

// reset half-pages pointers
for (int t_i = 0; t_i < n_threads; t_i++) {
jl_ptls_t ptls2 = gc_all_tls_states[t_i];
if (ptls2 != NULL) {
ptls2->gc_tls.page_metadata_allocd = new_gc_allocd_scratch[t_i].stack;
for (int i = 0; i < JL_GC_N_POOLS; i++) {
jl_gc_pool_t *p = &ptls2->gc_tls.heap.norm_pools[i];
p->newpages = NULL;
uint64_t t_page_walk_start = jl_hrtime();
{
// the actual sweeping
jl_gc_padded_page_stack_t *new_gc_allocd_scratch = (jl_gc_padded_page_stack_t *) calloc_s(n_threads * sizeof(jl_gc_padded_page_stack_t));
jl_ptls_t ptls = jl_current_task->ptls;
gc_sweep_wake_all(ptls, new_gc_allocd_scratch);
gc_sweep_pool_parallel(ptls);
gc_sweep_wait_for_all();

// reset half-pages pointers
for (int t_i = 0; t_i < n_threads; t_i++) {
jl_ptls_t ptls2 = gc_all_tls_states[t_i];
if (ptls2 != NULL) {
ptls2->gc_tls.page_metadata_allocd = new_gc_allocd_scratch[t_i].stack;
for (int i = 0; i < JL_GC_N_POOLS; i++) {
jl_gc_pool_t *p = &ptls2->gc_tls.heap.norm_pools[i];
p->newpages = NULL;
}
}
}
}

// merge free lists
for (int t_i = 0; t_i < n_threads; t_i++) {
jl_ptls_t ptls2 = gc_all_tls_states[t_i];
if (ptls2 == NULL) {
continue;
}
jl_gc_pagemeta_t *pg = jl_atomic_load_relaxed(&ptls2->gc_tls.page_metadata_allocd.bottom);
while (pg != NULL) {
jl_gc_pagemeta_t *pg2 = pg->next;
if (pg->fl_begin_offset != UINT16_MAX) {
char *cur_pg = pg->data;
jl_taggedvalue_t *fl_beg = (jl_taggedvalue_t*)(cur_pg + pg->fl_begin_offset);
jl_taggedvalue_t *fl_end = (jl_taggedvalue_t*)(cur_pg + pg->fl_end_offset);
*pfl[t_i * JL_GC_N_POOLS + pg->pool_n] = fl_beg;
pfl[t_i * JL_GC_N_POOLS + pg->pool_n] = &fl_end->next;
// merge free lists
for (int t_i = 0; t_i < n_threads; t_i++) {
jl_ptls_t ptls2 = gc_all_tls_states[t_i];
if (ptls2 == NULL) {
continue;
}
jl_gc_pagemeta_t *pg = jl_atomic_load_relaxed(&ptls2->gc_tls.page_metadata_allocd.bottom);
while (pg != NULL) {
jl_gc_pagemeta_t *pg2 = pg->next;
if (pg->fl_begin_offset != UINT16_MAX) {
char *cur_pg = pg->data;
jl_taggedvalue_t *fl_beg = (jl_taggedvalue_t*)(cur_pg + pg->fl_begin_offset);
jl_taggedvalue_t *fl_end = (jl_taggedvalue_t*)(cur_pg + pg->fl_end_offset);
*pfl[t_i * JL_GC_N_POOLS + pg->pool_n] = fl_beg;
pfl[t_i * JL_GC_N_POOLS + pg->pool_n] = &fl_end->next;
}
pg = pg2;
}
pg = pg2;
}
}

// null out terminal pointers of free lists
for (int t_i = 0; t_i < n_threads; t_i++) {
jl_ptls_t ptls2 = gc_all_tls_states[t_i];
if (ptls2 != NULL) {
for (int i = 0; i < JL_GC_N_POOLS; i++) {
*pfl[t_i * JL_GC_N_POOLS + i] = NULL;
// null out terminal pointers of free lists
for (int t_i = 0; t_i < n_threads; t_i++) {
jl_ptls_t ptls2 = gc_all_tls_states[t_i];
if (ptls2 != NULL) {
for (int i = 0; i < JL_GC_N_POOLS; i++) {
*pfl[t_i * JL_GC_N_POOLS + i] = NULL;
}
}
}
}

// cleanup
free(pfl);
free(new_gc_allocd_scratch);
// cleanup
free(pfl);
free(new_gc_allocd_scratch);
}
uint64_t t_page_walk_end = jl_hrtime();
gc_num.total_sweep_page_walk_time += t_page_walk_end - t_page_walk_start;

#ifdef _P64 // only enable concurrent sweeping on 64bit
// wake thread up to sweep concurrently
if (jl_n_sweepthreads > 0) {
uv_sem_post(&gc_sweep_assists_needed);
}
else {
uint64_t t_madvise_start = jl_hrtime();
gc_free_pages();
uint64_t t_madvise_end = jl_hrtime();
gc_num.total_sweep_madvise_time += t_madvise_end - t_madvise_start;
}
#else
gc_free_pages();
Expand Down
3 changes: 3 additions & 0 deletions src/gc.h
Original file line number Diff line number Diff line change
Expand Up @@ -83,6 +83,9 @@ typedef struct {
uint64_t sweep_time;
uint64_t mark_time;
uint64_t total_sweep_time;
uint64_t total_sweep_page_walk_time;
uint64_t total_sweep_madvise_time;
uint64_t total_sweep_free_mallocd_memory_time;
uint64_t total_mark_time;
uint64_t last_full_sweep;
uint64_t last_incremental_sweep;
Expand Down

0 comments on commit 745139a

Please sign in to comment.