Skip to content

Add malloc fixes to 1.11 branch #57880

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 2 commits into from
Mar 25, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 6 additions & 5 deletions src/gc-debug.c
Original file line number Diff line number Diff line change
Expand Up @@ -1100,13 +1100,14 @@ void gc_stats_big_obj(void)
v = v->next;
}

mallocarray_t *ma = ptls2->heap.mallocarrays;
while (ma != NULL) {
if (gc_marked(jl_astaggedvalue(ma->a)->bits.gc)) {
void **lst = ptls2->gc_tls.heap.mallocarrays.items;
for (size_t i = 0, l = ptls2->gc_tls.heap.mallocarrays.len; i < l; i++) {
jl_genericmemory_t *m = (jl_genericmemory_t*)((uintptr_t)lst[i] & ~(uintptr_t)1);
uint8_t bits = jl_astaggedvalue(m)->bits.gc;
if (gc_marked(bits)) {
nused++;
nbytes += jl_genericmemory_nbytes((jl_genericmemory_t*)ma->a);
nbytes += jl_genericmemory_nbytes(m);
}
ma = ma->next;
}
}

Expand Down
235 changes: 115 additions & 120 deletions src/gc.c
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,11 @@
#include "julia_atomics.h"
#include "julia_gcext.h"
#include "julia_assert.h"
#ifdef __GLIBC__
#include <stdlib.h>

#if defined(_OS_DARWIN_)
#include <malloc/malloc.h>
#else
#include <malloc.h> // for malloc_trim
#endif

Expand Down Expand Up @@ -1121,17 +1125,8 @@ static void sweep_big(jl_ptls_t ptls, int sweep_full) JL_NOTSAFEPOINT

void jl_gc_track_malloced_genericmemory(jl_ptls_t ptls, jl_genericmemory_t *m, int isaligned){
// This is **NOT** a GC safe point.
mallocarray_t *ma;
if (ptls->heap.mafreelist == NULL) {
ma = (mallocarray_t*)malloc_s(sizeof(mallocarray_t));
}
else {
ma = ptls->heap.mafreelist;
ptls->heap.mafreelist = ma->next;
}
ma->a = (jl_value_t*)((uintptr_t)m | !!isaligned);
ma->next = ptls->heap.mallocarrays;
ptls->heap.mallocarrays = ma;
void *a = (void*)((uintptr_t)m | !!isaligned);
small_arraylist_push(&ptls->heap.mallocarrays, a);
}


Expand All @@ -1143,10 +1138,6 @@ void jl_gc_count_allocd(size_t sz) JL_NOTSAFEPOINT
jl_batch_accum_heap_size(ptls, sz);
}

void jl_gc_count_freed(size_t sz) JL_NOTSAFEPOINT
{
jl_batch_accum_free_size(jl_current_task->ptls, sz);
}

// Only safe to update the heap inside the GC
static void combine_thread_gc_counts(jl_gc_num_t *dest, int update_heap) JL_NOTSAFEPOINT
Expand Down Expand Up @@ -1222,19 +1213,21 @@ size_t jl_genericmemory_nbytes(jl_genericmemory_t *m) JL_NOTSAFEPOINT
}


static void jl_gc_free_memory(jl_value_t *v, int isaligned) JL_NOTSAFEPOINT
static void jl_gc_free_memory(jl_genericmemory_t *v, int isaligned) JL_NOTSAFEPOINT
{
assert(jl_is_genericmemory(v));
jl_genericmemory_t *m = (jl_genericmemory_t*)v;
assert(jl_genericmemory_how(m) == 1 || jl_genericmemory_how(m) == 2);
char *d = (char*)m->ptr;
size_t freed_bytes = memory_block_usable_size(d, isaligned);
assert(freed_bytes != 0);
if (isaligned)
jl_free_aligned(d);
else
free(d);
jl_atomic_store_relaxed(&gc_heap_stats.heap_size,
jl_atomic_load_relaxed(&gc_heap_stats.heap_size) - jl_genericmemory_nbytes(m));
gc_num.freed += jl_genericmemory_nbytes(m);
jl_atomic_load_relaxed(&gc_heap_stats.heap_size) - freed_bytes);
gc_num.freed += freed_bytes;
gc_num.freecall++;
}

Expand All @@ -1245,24 +1238,23 @@ static void sweep_malloced_memory(void) JL_NOTSAFEPOINT
for (int t_i = 0; t_i < gc_n_threads; t_i++) {
jl_ptls_t ptls2 = gc_all_tls_states[t_i];
if (ptls2 != NULL) {
mallocarray_t *ma = ptls2->heap.mallocarrays;
mallocarray_t **pma = &ptls2->heap.mallocarrays;
while (ma != NULL) {
mallocarray_t *nxt = ma->next;
jl_value_t *a = (jl_value_t*)((uintptr_t)ma->a & ~1);
int bits = jl_astaggedvalue(a)->bits.gc;
if (gc_marked(bits)) {
pma = &ma->next;
size_t n = 0;
size_t l = ptls2->heap.mallocarrays.len;
void **lst = ptls2->heap.mallocarrays.items;
// filter without preserving order
while (n < l) {
jl_genericmemory_t *m = (jl_genericmemory_t*)((uintptr_t)lst[n] & ~1);
if (gc_marked(jl_astaggedvalue(m)->bits.gc)) {
n++;
}
else {
*pma = nxt;
int isaligned = (uintptr_t)ma->a & 1;
jl_gc_free_memory(a, isaligned);
free(ma);
int isaligned = (uintptr_t)lst[n] & 1;
jl_gc_free_memory(m, isaligned);
l--;
lst[n] = lst[l];
}
gc_time_count_mallocd_memory(bits);
ma = nxt;
}
ptls2->heap.mallocarrays.len = l;
}
}
gc_time_mallocd_memory_end();
Expand Down Expand Up @@ -3968,8 +3960,7 @@ void jl_init_thread_heap(jl_ptls_t ptls)
small_arraylist_new(&heap->live_tasks, 0);
for (int i = 0; i < JL_N_STACK_POOLS; i++)
small_arraylist_new(&heap->free_stacks[i], 0);
heap->mallocarrays = NULL;
heap->mafreelist = NULL;
small_arraylist_new(&heap->mallocarrays, 0);
heap->big_objects = NULL;
heap->remset = &heap->_remset[0];
heap->last_remset = &heap->_remset[1];
Expand Down Expand Up @@ -4069,58 +4060,44 @@ JL_DLLEXPORT void jl_throw_out_of_memory_error(void)
jl_throw(jl_memory_exception);
}

// allocation wrappers that track allocation and let collection run
// allocation wrappers that add to gc pressure

JL_DLLEXPORT void *jl_gc_counted_malloc(size_t sz)
JL_DLLEXPORT void *jl_malloc(size_t sz)
{
jl_gcframe_t **pgcstack = jl_get_pgcstack();
jl_task_t *ct = jl_current_task;
void *data = malloc(sz);
if (data != NULL && pgcstack != NULL && ct->world_age) {
jl_ptls_t ptls = ct->ptls;
maybe_collect(ptls);
jl_atomic_store_relaxed(&ptls->gc_num.allocd,
jl_atomic_load_relaxed(&ptls->gc_num.allocd) + sz);
jl_atomic_store_relaxed(&ptls->gc_num.malloc,
jl_atomic_load_relaxed(&ptls->gc_num.malloc) + 1);
jl_batch_accum_heap_size(ptls, sz);
}
return data;
return jl_gc_counted_malloc(sz);
}

JL_DLLEXPORT void *jl_gc_counted_calloc(size_t nm, size_t sz)
//_unchecked_calloc does not check for potential overflow of nm*sz
STATIC_INLINE void *_unchecked_calloc(size_t nm, size_t sz) {
size_t nmsz = nm*sz;
return jl_gc_counted_calloc(nmsz, 1);
}

JL_DLLEXPORT void *jl_calloc(size_t nm, size_t sz)
{
jl_gcframe_t **pgcstack = jl_get_pgcstack();
jl_task_t *ct = jl_current_task;
void *data = calloc(nm, sz);
if (data != NULL && pgcstack != NULL && ct->world_age) {
jl_ptls_t ptls = ct->ptls;
maybe_collect(ptls);
jl_atomic_store_relaxed(&ptls->gc_num.allocd,
jl_atomic_load_relaxed(&ptls->gc_num.allocd) + nm*sz);
jl_atomic_store_relaxed(&ptls->gc_num.malloc,
jl_atomic_load_relaxed(&ptls->gc_num.malloc) + 1);
jl_batch_accum_heap_size(ptls, sz * nm);
}
return data;
if (nm > SSIZE_MAX/sz)
return NULL;
return _unchecked_calloc(nm, sz);
}

JL_DLLEXPORT void jl_gc_counted_free_with_size(void *p, size_t sz)
JL_DLLEXPORT void jl_free(void *p)
{
jl_gcframe_t **pgcstack = jl_get_pgcstack();
jl_task_t *ct = jl_current_task;
free(p);
if (pgcstack != NULL && ct->world_age) {
jl_batch_accum_free_size(ct->ptls, sz);
if (p != NULL) {
size_t sz = memory_block_usable_size(p, 0);
free(p);
jl_task_t *ct = jl_get_current_task();
if (ct != NULL)
jl_batch_accum_free_size(ct->ptls, sz);
}
}

JL_DLLEXPORT void *jl_gc_counted_realloc_with_old_size(void *p, size_t old, size_t sz)
JL_DLLEXPORT void *jl_realloc(void *p, size_t sz)
{
jl_gcframe_t **pgcstack = jl_get_pgcstack();
jl_task_t *ct = jl_current_task;
size_t old = p ? memory_block_usable_size(p, 0) : 0;
void *data = realloc(p, sz);
if (data != NULL && pgcstack != NULL && ct->world_age) {
jl_task_t *ct = jl_get_current_task();
if (data != NULL && ct != NULL) {
sz = memory_block_usable_size(data, 0);
jl_ptls_t ptls = ct->ptls;
maybe_collect(ptls);
if (!(sz < old))
Expand All @@ -4140,63 +4117,80 @@ JL_DLLEXPORT void *jl_gc_counted_realloc_with_old_size(void *p, size_t old, size
return data;
}

// allocation wrappers that save the size of allocations, to allow using
// jl_gc_counted_* functions with a libc-compatible API.

JL_DLLEXPORT void *jl_malloc(size_t sz)
JL_DLLEXPORT void *jl_gc_counted_malloc(size_t sz)
{
int64_t *p = (int64_t *)jl_gc_counted_malloc(sz + JL_SMALL_BYTE_ALIGNMENT);
if (p == NULL)
return NULL;
p[0] = sz;
return (void *)(p + 2); // assumes JL_SMALL_BYTE_ALIGNMENT == 16
jl_task_t *ct = jl_current_task;
void *data = malloc(sz);
if (data != NULL && ct != NULL && ct->world_age) {
sz = memory_block_usable_size(data, 0);
jl_ptls_t ptls = ct->ptls;
maybe_collect(ptls);
jl_atomic_store_relaxed(&ptls->gc_num.allocd,
jl_atomic_load_relaxed(&ptls->gc_num.allocd) + sz);
jl_atomic_store_relaxed(&ptls->gc_num.malloc,
jl_atomic_load_relaxed(&ptls->gc_num.malloc) + 1);
jl_batch_accum_heap_size(ptls, sz);
}
return data;
}

//_unchecked_calloc does not check for potential overflow of nm*sz
STATIC_INLINE void *_unchecked_calloc(size_t nm, size_t sz) {
size_t nmsz = nm*sz;
int64_t *p = (int64_t *)jl_gc_counted_calloc(nmsz + JL_SMALL_BYTE_ALIGNMENT, 1);
if (p == NULL)
return NULL;
p[0] = nmsz;
return (void *)(p + 2); // assumes JL_SMALL_BYTE_ALIGNMENT == 16
JL_DLLEXPORT void *jl_gc_counted_calloc(size_t nm, size_t sz)
{
jl_task_t *ct = jl_current_task;
void *data = calloc(nm, sz);
if (data != NULL && ct != NULL && ct->world_age) {
sz = memory_block_usable_size(data, 0);
jl_ptls_t ptls = ct->ptls;
maybe_collect(ptls);
jl_atomic_store_relaxed(&ptls->gc_num.allocd,
jl_atomic_load_relaxed(&ptls->gc_num.allocd) + sz);
jl_atomic_store_relaxed(&ptls->gc_num.malloc,
jl_atomic_load_relaxed(&ptls->gc_num.malloc) + 1);
jl_batch_accum_heap_size(ptls, sz);
}
return data;
}

JL_DLLEXPORT void *jl_calloc(size_t nm, size_t sz)
JL_DLLEXPORT void jl_gc_counted_free_with_size(void *p, size_t sz)
{
if (nm > SSIZE_MAX/sz - JL_SMALL_BYTE_ALIGNMENT)
return NULL;
return _unchecked_calloc(nm, sz);
jl_free(p);
}

JL_DLLEXPORT void jl_free(void *p)
JL_DLLEXPORT void *jl_gc_counted_realloc_with_old_size(void *p, size_t old, size_t sz)
{
if (p != NULL) {
int64_t *pp = (int64_t *)p - 2;
size_t sz = pp[0];
jl_gc_counted_free_with_size(pp, sz + JL_SMALL_BYTE_ALIGNMENT);
}
return jl_realloc(p, sz);
}

JL_DLLEXPORT void *jl_realloc(void *p, size_t sz)
// =========================================================================== //
// malloc wrappers, aligned allocation
// =========================================================================== //

#if defined(_OS_WINDOWS_)
// helper function based partly on wine msvcrt80+ heap.c
// but with several fixes to improve the correctness of the computation and remove unnecessary parameters
#define SAVED_PTR(x) ((void *)((DWORD_PTR)((char *)x - sizeof(void *)) & \
~(sizeof(void *) - 1)))
static size_t _aligned_msize(void *p)
{
int64_t *pp;
size_t szold;
if (p == NULL) {
pp = NULL;
szold = 0;
}
else {
pp = (int64_t *)p - 2;
szold = pp[0] + JL_SMALL_BYTE_ALIGNMENT;
}
int64_t *pnew = (int64_t *)jl_gc_counted_realloc_with_old_size(pp, szold, sz + JL_SMALL_BYTE_ALIGNMENT);
if (pnew == NULL)
return NULL;
pnew[0] = sz;
return (void *)(pnew + 2); // assumes JL_SMALL_BYTE_ALIGNMENT == 16
void *alloc_ptr = *(void**)SAVED_PTR(p);
return _msize(alloc_ptr) - ((char*)p - (char*)alloc_ptr);
}
#undef SAVED_PTR
#endif

size_t memory_block_usable_size(void *p, int isaligned) JL_NOTSAFEPOINT
{
#if defined(_OS_WINDOWS_)
if (isaligned)
return _aligned_msize(p);
else
return _msize(p);
#elif defined(_OS_DARWIN_)
return malloc_size(p);
#else
return malloc_usable_size(p);
#endif
}
// allocating blocks for Arrays and Strings

JL_DLLEXPORT void *jl_gc_managed_malloc(size_t sz)
Expand All @@ -4214,12 +4208,13 @@ JL_DLLEXPORT void *jl_gc_managed_malloc(size_t sz)
void *b = malloc_cache_align(allocsz);
if (b == NULL)
jl_throw(jl_memory_exception);

size_t allocated_bytes = memory_block_usable_size(b, 1);
assert(allocated_bytes >= allocsz);
jl_atomic_store_relaxed(&ptls->gc_num.allocd,
jl_atomic_load_relaxed(&ptls->gc_num.allocd) + allocsz);
jl_atomic_load_relaxed(&ptls->gc_num.allocd) + allocated_bytes);
jl_atomic_store_relaxed(&ptls->gc_num.malloc,
jl_atomic_load_relaxed(&ptls->gc_num.malloc) + 1);
jl_batch_accum_heap_size(ptls, allocsz);
jl_batch_accum_heap_size(ptls, allocated_bytes);
#ifdef _OS_WINDOWS_
SetLastError(last_error);
#endif
Expand Down
5 changes: 0 additions & 5 deletions src/gc.h
Original file line number Diff line number Diff line change
Expand Up @@ -143,11 +143,6 @@ JL_EXTENSION typedef struct _bigval_t {

// data structure for tracking malloc'd arrays and genericmemory.

typedef struct _mallocarray_t {
jl_value_t *a;
struct _mallocarray_t *next;
} mallocarray_t;

// pool page metadata
typedef struct _jl_gc_pagemeta_t {
// next metadata structure in per-thread list
Expand Down
5 changes: 2 additions & 3 deletions src/genericmemory.c
Original file line number Diff line number Diff line change
Expand Up @@ -165,7 +165,8 @@ JL_DLLEXPORT jl_genericmemory_t *jl_ptr_to_genericmemory(jl_value_t *mtype, void
if (own_buffer) {
int isaligned = 0; // TODO: allow passing memalign'd buffers
jl_gc_track_malloced_genericmemory(ct->ptls, m, isaligned);
jl_gc_count_allocd(nel*elsz);
size_t allocated_bytes = memory_block_usable_size(data, isaligned);
jl_gc_count_allocd(allocated_bytes);
}
return m;
}
Expand Down Expand Up @@ -208,8 +209,6 @@ JL_DLLEXPORT jl_value_t *jl_genericmemory_to_string(jl_genericmemory_t *m, size_
JL_GC_PUSH1(&o);
jl_value_t *str = jl_pchar_to_string((const char*)m->ptr, len);
JL_GC_POP();
if (how == 1) // TODO: we might like to early-call jl_gc_free_memory here instead actually, but hopefully `m` will die soon
jl_gc_count_freed(mlength);
return str;
}
// n.b. how == 0 is always pool-allocated, so the freed bytes are computed from the pool not the object
Expand Down
Loading