Skip to content

Commit 03a0247

Browse files
authored
gc: improve mallocarrays locality (#56801)
1 parent 309021d commit 03a0247

File tree

7 files changed

+31
-53
lines changed

7 files changed

+31
-53
lines changed

src/gc-common.c

+2-11
Original file line numberDiff line numberDiff line change
@@ -557,17 +557,8 @@ size_t jl_genericmemory_nbytes(jl_genericmemory_t *m) JL_NOTSAFEPOINT
557557
// tracking Memorys with malloc'd storage
558558
void jl_gc_track_malloced_genericmemory(jl_ptls_t ptls, jl_genericmemory_t *m, int isaligned){
559559
// This is **NOT** a GC safe point.
560-
mallocmemory_t *ma;
561-
if (ptls->gc_tls_common.heap.mafreelist == NULL) {
562-
ma = (mallocmemory_t*)malloc_s(sizeof(mallocmemory_t));
563-
}
564-
else {
565-
ma = ptls->gc_tls_common.heap.mafreelist;
566-
ptls->gc_tls_common.heap.mafreelist = ma->next;
567-
}
568-
ma->a = (jl_genericmemory_t*)((uintptr_t)m | !!isaligned);
569-
ma->next = ptls->gc_tls_common.heap.mallocarrays;
570-
ptls->gc_tls_common.heap.mallocarrays = ma;
560+
void *a = (void*)((uintptr_t)m | !!isaligned);
561+
small_arraylist_push(&ptls->gc_tls_common.heap.mallocarrays, a);
571562
}
572563

573564
// =========================================================================== //

src/gc-common.h

-6
Original file line numberDiff line numberDiff line change
@@ -61,12 +61,6 @@ extern jl_gc_callback_list_t *gc_cblist_notify_gc_pressure;
6161
// malloc wrappers, aligned allocation
6262
// =========================================================================== //
6363

64-
// data structure for tracking malloc'd genericmemory.
65-
typedef struct _mallocmemory_t {
66-
jl_genericmemory_t *a; // lowest bit is tagged if this is aligned memory
67-
struct _mallocmemory_t *next;
68-
} mallocmemory_t;
69-
7064
#if defined(_OS_WINDOWS_)
7165
STATIC_INLINE void *jl_malloc_aligned(size_t sz, size_t align)
7266
{

src/gc-debug.c

+4-6
Original file line numberDiff line numberDiff line change
@@ -1025,12 +1025,11 @@ void gc_stats_big_obj(void)
10251025
v = v->next;
10261026
}
10271027

1028-
mallocmemory_t *ma = ptls2->gc_tls.heap.mallocarrays;
1029-
while (ma != NULL) {
1030-
uint8_t bits =jl_astaggedvalue(ma->a)->bits.gc;
1028+
void **lst = ptls2->gc_tls.heap.mallocarrays.items;
1029+
for (size_t i = 0, l = ptls2->gc_tls.heap.mallocarrays.len; i < l; i++) {
1030+
jl_genericmemory_t *m = (jl_genericmemory_t*)((uintptr_t)lst[i] & ~(uintptr_t)1);
1031+
uint8_t bits = jl_astaggedvalue(m)->bits.gc;
10311032
if (gc_marked(bits)) {
1032-
jl_genericmemory_t *m = (jl_genericmemory_t*)ma->a;
1033-
m = (jl_genericmemory_t*)((uintptr_t)m & ~(uintptr_t)1);
10341033
size_t sz = jl_genericmemory_nbytes(m);
10351034
if (gc_old(bits)) {
10361035
assert(bits == GC_OLD_MARKED);
@@ -1042,7 +1041,6 @@ void gc_stats_big_obj(void)
10421041
stat.nbytes_used += sz;
10431042
}
10441043
}
1045-
ma = ma->next;
10461044
}
10471045
}
10481046
jl_safe_printf("%lld kB (%lld%% old) in %lld large objects (%lld%% old)\n",

src/gc-stock.c

+16-20
Original file line numberDiff line numberDiff line change
@@ -623,10 +623,9 @@ void jl_gc_reset_alloc_count(void) JL_NOTSAFEPOINT
623623
reset_thread_gc_counts();
624624
}
625625

626-
static void jl_gc_free_memory(jl_value_t *v, int isaligned) JL_NOTSAFEPOINT
626+
static void jl_gc_free_memory(jl_genericmemory_t *m, int isaligned) JL_NOTSAFEPOINT
627627
{
628-
assert(jl_is_genericmemory(v));
629-
jl_genericmemory_t *m = (jl_genericmemory_t*)v;
628+
assert(jl_is_genericmemory(m));
630629
assert(jl_genericmemory_how(m) == 1 || jl_genericmemory_how(m) == 2);
631630
char *d = (char*)m->ptr;
632631
size_t freed_bytes = memory_block_usable_size(d, isaligned);
@@ -648,25 +647,23 @@ static void sweep_malloced_memory(void) JL_NOTSAFEPOINT
648647
for (int t_i = 0; t_i < gc_n_threads; t_i++) {
649648
jl_ptls_t ptls2 = gc_all_tls_states[t_i];
650649
if (ptls2 != NULL) {
651-
mallocmemory_t *ma = ptls2->gc_tls_common.heap.mallocarrays;
652-
mallocmemory_t **pma = &ptls2->gc_tls_common.heap.mallocarrays;
653-
while (ma != NULL) {
654-
mallocmemory_t *nxt = ma->next;
655-
jl_value_t *a = (jl_value_t*)((uintptr_t)ma->a & ~1);
656-
int bits = jl_astaggedvalue(a)->bits.gc;
657-
if (gc_marked(bits)) {
658-
pma = &ma->next;
650+
size_t n = 0;
651+
size_t l = ptls2->gc_tls_common.heap.mallocarrays.len;
652+
void **lst = ptls2->gc_tls_common.heap.mallocarrays.items;
653+
// filter without preserving order
654+
while (n < l) {
655+
jl_genericmemory_t *m = (jl_genericmemory_t*)((uintptr_t)lst[n] & ~1);
656+
if (gc_marked(jl_astaggedvalue(m)->bits.gc)) {
657+
n++;
659658
}
660659
else {
661-
*pma = nxt;
662-
int isaligned = (uintptr_t)ma->a & 1;
663-
jl_gc_free_memory(a, isaligned);
664-
ma->next = ptls2->gc_tls_common.heap.mafreelist;
665-
ptls2->gc_tls_common.heap.mafreelist = ma;
660+
int isaligned = (uintptr_t)lst[n] & 1;
661+
jl_gc_free_memory(m, isaligned);
662+
l--;
663+
lst[n] = lst[l];
666664
}
667-
gc_time_count_mallocd_memory(bits);
668-
ma = nxt;
669665
}
666+
ptls2->gc_tls_common.heap.mallocarrays.len = l;
670667
}
671668
}
672669
gc_time_mallocd_memory_end();
@@ -3433,8 +3430,7 @@ void jl_init_thread_heap(jl_ptls_t ptls)
34333430
small_arraylist_new(&common_heap->live_tasks, 0);
34343431
for (int i = 0; i < JL_N_STACK_POOLS; i++)
34353432
small_arraylist_new(&common_heap->free_stacks[i], 0);
3436-
common_heap->mallocarrays = NULL;
3437-
common_heap->mafreelist = NULL;
3433+
small_arraylist_new(&common_heap->mallocarrays, 0);
34383434
heap->young_generation_of_bigvals = (bigval_t*)calloc_s(sizeof(bigval_t)); // sentinel
34393435
assert(gc_bigval_sentinel_tag != 0); // make sure the sentinel is initialized
34403436
heap->young_generation_of_bigvals->header = gc_bigval_sentinel_tag;

src/gc-tls-common.h

+2-3
Original file line numberDiff line numberDiff line change
@@ -21,9 +21,8 @@ typedef struct {
2121
// that are holding onto a stack from the pool
2222
small_arraylist_t live_tasks;
2323

24-
// variables for tracking malloc'd arrays
25-
struct _mallocmemory_t *mallocarrays;
26-
struct _mallocmemory_t *mafreelist;
24+
// variable for tracking malloc'd arrays
25+
small_arraylist_t mallocarrays;
2726

2827
#define JL_N_STACK_POOLS 16
2928
small_arraylist_t free_stacks[JL_N_STACK_POOLS];

src/mtarraylist.c

+2-2
Original file line numberDiff line numberDiff line change
@@ -14,8 +14,8 @@ extern "C" {
1414
// but there can be any number of observers
1515

1616
typedef struct {
17-
_Atomic(uint32_t) len;
18-
uint32_t max;
17+
_Atomic(size_t) len;
18+
size_t max;
1919
_Atomic(_Atomic(void*)*) items;
2020
_Atomic(void*) _space[SMALL_AL_N_INLINE];
2121
} small_mtarraylist_t;

src/support/arraylist.h

+5-5
Original file line numberDiff line numberDiff line change
@@ -5,15 +5,15 @@
55

66
#define AL_N_INLINE 29
77

8-
#define SMALL_AL_N_INLINE 6
8+
#define SMALL_AL_N_INLINE 5
99

1010
#ifdef __cplusplus
1111
extern "C" {
1212
#endif
1313

1414
#include "analyzer_annotations.h"
1515

16-
typedef struct {
16+
typedef struct { // 32 words
1717
size_t len;
1818
size_t max;
1919
void **items;
@@ -27,9 +27,9 @@ JL_DLLEXPORT void arraylist_push(arraylist_t *a, void *elt) JL_NOTSAFEPOINT;
2727
JL_DLLEXPORT void *arraylist_pop(arraylist_t *a) JL_NOTSAFEPOINT;
2828
JL_DLLEXPORT void arraylist_grow(arraylist_t *a, size_t n) JL_NOTSAFEPOINT;
2929

30-
typedef struct {
31-
uint32_t len;
32-
uint32_t max;
30+
typedef struct { // 8 words
31+
size_t len;
32+
size_t max;
3333
void **items;
3434
void *_space[SMALL_AL_N_INLINE];
3535
} small_arraylist_t;

0 commit comments

Comments
 (0)