Skip to content

Commit 6e2c0cb

Browse files
committed
gc: improve mallocarrays locality
small_arraylist_t has much better memory locality and space utilization than a linked list with individually malloc'd elements. However, seemed that it needed alignment of small_arraylist_t to both hold all elements the user might create and avoid gcc bugs.
1 parent 9118ea7 commit 6e2c0cb

File tree

7 files changed

+31
-53
lines changed

7 files changed

+31
-53
lines changed

src/gc-common.c

+2-11
Original file line numberDiff line numberDiff line change
@@ -557,17 +557,8 @@ size_t jl_genericmemory_nbytes(jl_genericmemory_t *m) JL_NOTSAFEPOINT
557557
// tracking Memorys with malloc'd storage
558558
void jl_gc_track_malloced_genericmemory(jl_ptls_t ptls, jl_genericmemory_t *m, int isaligned){
559559
// This is **NOT** a GC safe point.
560-
mallocmemory_t *ma;
561-
if (ptls->gc_tls_common.heap.mafreelist == NULL) {
562-
ma = (mallocmemory_t*)malloc_s(sizeof(mallocmemory_t));
563-
}
564-
else {
565-
ma = ptls->gc_tls_common.heap.mafreelist;
566-
ptls->gc_tls_common.heap.mafreelist = ma->next;
567-
}
568-
ma->a = (jl_genericmemory_t*)((uintptr_t)m | !!isaligned);
569-
ma->next = ptls->gc_tls_common.heap.mallocarrays;
570-
ptls->gc_tls_common.heap.mallocarrays = ma;
560+
void *a = (void*)((uintptr_t)m | !!isaligned);
561+
small_arraylist_push(&ptls->gc_tls_common.heap.mallocarrays, a);
571562
}
572563

573564
// =========================================================================== //

src/gc-common.h

-6
Original file line numberDiff line numberDiff line change
@@ -61,12 +61,6 @@ extern jl_gc_callback_list_t *gc_cblist_notify_gc_pressure;
6161
// malloc wrappers, aligned allocation
6262
// =========================================================================== //
6363

64-
// data structure for tracking malloc'd genericmemory.
65-
typedef struct _mallocmemory_t {
66-
jl_genericmemory_t *a; // lowest bit is tagged if this is aligned memory
67-
struct _mallocmemory_t *next;
68-
} mallocmemory_t;
69-
7064
#if defined(_OS_WINDOWS_)
7165
STATIC_INLINE void *jl_malloc_aligned(size_t sz, size_t align)
7266
{

src/gc-debug.c

+4-6
Original file line numberDiff line numberDiff line change
@@ -1025,12 +1025,11 @@ void gc_stats_big_obj(void)
10251025
v = v->next;
10261026
}
10271027

1028-
mallocmemory_t *ma = ptls2->gc_tls.heap.mallocarrays;
1029-
while (ma != NULL) {
1030-
uint8_t bits =jl_astaggedvalue(ma->a)->bits.gc;
1028+
void **lst = ptls2->gc_tls.heap.mallocarrays.items;
1029+
for (size_t i = 0, l = ptls2->gc_tls.heap.mallocarrays.len; i < l; i++) {
1030+
jl_genericmemory_t *m = (jl_genericmemory_t*)((uintptr_t)lst[i] & ~(uintptr_t)1);
1031+
uint8_t bits = jl_astaggedvalue(m)->bits.gc;
10311032
if (gc_marked(bits)) {
1032-
jl_genericmemory_t *m = (jl_genericmemory_t*)ma->a;
1033-
m = (jl_genericmemory_t*)((uintptr_t)m & ~(uintptr_t)1);
10341033
size_t sz = jl_genericmemory_nbytes(m);
10351034
if (gc_old(bits)) {
10361035
assert(bits == GC_OLD_MARKED);
@@ -1042,7 +1041,6 @@ void gc_stats_big_obj(void)
10421041
stat.nbytes_used += sz;
10431042
}
10441043
}
1045-
ma = ma->next;
10461044
}
10471045
}
10481046
jl_safe_printf("%lld kB (%lld%% old) in %lld large objects (%lld%% old)\n",

src/gc-stock.c

+16-20
Original file line numberDiff line numberDiff line change
@@ -629,10 +629,9 @@ void jl_gc_reset_alloc_count(void) JL_NOTSAFEPOINT
629629
reset_thread_gc_counts();
630630
}
631631

632-
static void jl_gc_free_memory(jl_value_t *v, int isaligned) JL_NOTSAFEPOINT
632+
static void jl_gc_free_memory(jl_genericmemory_t *m, int isaligned) JL_NOTSAFEPOINT
633633
{
634-
assert(jl_is_genericmemory(v));
635-
jl_genericmemory_t *m = (jl_genericmemory_t*)v;
634+
assert(jl_is_genericmemory(m));
636635
assert(jl_genericmemory_how(m) == 1 || jl_genericmemory_how(m) == 2);
637636
char *d = (char*)m->ptr;
638637
size_t freed_bytes = memory_block_usable_size(d, isaligned);
@@ -654,25 +653,23 @@ static void sweep_malloced_memory(void) JL_NOTSAFEPOINT
654653
for (int t_i = 0; t_i < gc_n_threads; t_i++) {
655654
jl_ptls_t ptls2 = gc_all_tls_states[t_i];
656655
if (ptls2 != NULL) {
657-
mallocmemory_t *ma = ptls2->gc_tls_common.heap.mallocarrays;
658-
mallocmemory_t **pma = &ptls2->gc_tls_common.heap.mallocarrays;
659-
while (ma != NULL) {
660-
mallocmemory_t *nxt = ma->next;
661-
jl_value_t *a = (jl_value_t*)((uintptr_t)ma->a & ~1);
662-
int bits = jl_astaggedvalue(a)->bits.gc;
663-
if (gc_marked(bits)) {
664-
pma = &ma->next;
656+
size_t n = 0;
657+
size_t l = ptls2->gc_tls_common.heap.mallocarrays.len;
658+
void **lst = ptls2->gc_tls_common.heap.mallocarrays.items;
659+
// filter without preserving order
660+
while (n < l) {
661+
jl_genericmemory_t *m = (jl_genericmemory_t*)((uintptr_t)lst[n] & ~1);
662+
if (gc_marked(jl_astaggedvalue(m)->bits.gc)) {
663+
n++;
665664
}
666665
else {
667-
*pma = nxt;
668-
int isaligned = (uintptr_t)ma->a & 1;
669-
jl_gc_free_memory(a, isaligned);
670-
ma->next = ptls2->gc_tls_common.heap.mafreelist;
671-
ptls2->gc_tls_common.heap.mafreelist = ma;
666+
int isaligned = (uintptr_t)lst[n] & 1;
667+
jl_gc_free_memory(m, isaligned);
668+
l--;
669+
lst[n] = lst[l];
672670
}
673-
gc_time_count_mallocd_memory(bits);
674-
ma = nxt;
675671
}
672+
ptls2->gc_tls_common.heap.mallocarrays.len = l;
676673
}
677674
}
678675
gc_time_mallocd_memory_end();
@@ -3439,8 +3436,7 @@ void jl_init_thread_heap(jl_ptls_t ptls)
34393436
small_arraylist_new(&common_heap->live_tasks, 0);
34403437
for (int i = 0; i < JL_N_STACK_POOLS; i++)
34413438
small_arraylist_new(&common_heap->free_stacks[i], 0);
3442-
common_heap->mallocarrays = NULL;
3443-
common_heap->mafreelist = NULL;
3439+
small_arraylist_new(&common_heap->mallocarrays, 0);
34443440
heap->young_generation_of_bigvals = (bigval_t*)calloc_s(sizeof(bigval_t)); // sentinel
34453441
assert(gc_bigval_sentinel_tag != 0); // make sure the sentinel is initialized
34463442
heap->young_generation_of_bigvals->header = gc_bigval_sentinel_tag;

src/gc-tls-common.h

+2-3
Original file line numberDiff line numberDiff line change
@@ -21,9 +21,8 @@ typedef struct {
2121
// that are holding onto a stack from the pool
2222
small_arraylist_t live_tasks;
2323

24-
// variables for tracking malloc'd arrays
25-
struct _mallocmemory_t *mallocarrays;
26-
struct _mallocmemory_t *mafreelist;
24+
// variable for tracking malloc'd arrays
25+
small_arraylist_t mallocarrays;
2726

2827
#define JL_N_STACK_POOLS 16
2928
small_arraylist_t free_stacks[JL_N_STACK_POOLS];

src/mtarraylist.c

+2-2
Original file line numberDiff line numberDiff line change
@@ -14,8 +14,8 @@ extern "C" {
1414
// but there can be any number of observers
1515

1616
typedef struct {
17-
_Atomic(uint32_t) len;
18-
uint32_t max;
17+
_Atomic(size_t) len;
18+
size_t max;
1919
_Atomic(_Atomic(void*)*) items;
2020
_Atomic(void*) _space[SMALL_AL_N_INLINE];
2121
} small_mtarraylist_t;

src/support/arraylist.h

+5-5
Original file line numberDiff line numberDiff line change
@@ -5,15 +5,15 @@
55

66
#define AL_N_INLINE 29
77

8-
#define SMALL_AL_N_INLINE 6
8+
#define SMALL_AL_N_INLINE 5
99

1010
#ifdef __cplusplus
1111
extern "C" {
1212
#endif
1313

1414
#include "analyzer_annotations.h"
1515

16-
typedef struct {
16+
typedef struct { // 32 words
1717
size_t len;
1818
size_t max;
1919
void **items;
@@ -27,9 +27,9 @@ JL_DLLEXPORT void arraylist_push(arraylist_t *a, void *elt) JL_NOTSAFEPOINT;
2727
JL_DLLEXPORT void *arraylist_pop(arraylist_t *a) JL_NOTSAFEPOINT;
2828
JL_DLLEXPORT void arraylist_grow(arraylist_t *a, size_t n) JL_NOTSAFEPOINT;
2929

30-
typedef struct {
31-
uint32_t len;
32-
uint32_t max;
30+
typedef struct { // 8 words
31+
size_t len;
32+
size_t max;
3333
void **items;
3434
void *_space[SMALL_AL_N_INLINE];
3535
} small_arraylist_t;

0 commit comments

Comments
 (0)