Skip to content

Commit 2ccc91a

Browse files
gbaraldiKristofferC
authored andcommitted
Add malloc fixes to 1.11 branch (#57880)
Contains #55223 and #56801
1 parent 7c7c325 commit 2ccc91a

File tree

8 files changed

+133
-142
lines changed

8 files changed

+133
-142
lines changed

src/gc-debug.c

+6-5
Original file line numberDiff line numberDiff line change
@@ -1100,13 +1100,14 @@ void gc_stats_big_obj(void)
11001100
v = v->next;
11011101
}
11021102

1103-
mallocarray_t *ma = ptls2->heap.mallocarrays;
1104-
while (ma != NULL) {
1105-
if (gc_marked(jl_astaggedvalue(ma->a)->bits.gc)) {
1103+
void **lst = ptls2->gc_tls.heap.mallocarrays.items;
1104+
for (size_t i = 0, l = ptls2->gc_tls.heap.mallocarrays.len; i < l; i++) {
1105+
jl_genericmemory_t *m = (jl_genericmemory_t*)((uintptr_t)lst[i] & ~(uintptr_t)1);
1106+
uint8_t bits = jl_astaggedvalue(m)->bits.gc;
1107+
if (gc_marked(bits)) {
11061108
nused++;
1107-
nbytes += jl_genericmemory_nbytes((jl_genericmemory_t*)ma->a);
1109+
nbytes += jl_genericmemory_nbytes(m);
11081110
}
1109-
ma = ma->next;
11101111
}
11111112
}
11121113

src/gc.c

+115-120
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,11 @@
66
#include "julia_atomics.h"
77
#include "julia_gcext.h"
88
#include "julia_assert.h"
9-
#ifdef __GLIBC__
9+
#include <stdlib.h>
10+
11+
#if defined(_OS_DARWIN_)
12+
#include <malloc/malloc.h>
13+
#else
1014
#include <malloc.h> // for malloc_trim
1115
#endif
1216

@@ -1121,17 +1125,8 @@ static void sweep_big(jl_ptls_t ptls, int sweep_full) JL_NOTSAFEPOINT
11211125

11221126
void jl_gc_track_malloced_genericmemory(jl_ptls_t ptls, jl_genericmemory_t *m, int isaligned){
11231127
// This is **NOT** a GC safe point.
1124-
mallocarray_t *ma;
1125-
if (ptls->heap.mafreelist == NULL) {
1126-
ma = (mallocarray_t*)malloc_s(sizeof(mallocarray_t));
1127-
}
1128-
else {
1129-
ma = ptls->heap.mafreelist;
1130-
ptls->heap.mafreelist = ma->next;
1131-
}
1132-
ma->a = (jl_value_t*)((uintptr_t)m | !!isaligned);
1133-
ma->next = ptls->heap.mallocarrays;
1134-
ptls->heap.mallocarrays = ma;
1128+
void *a = (void*)((uintptr_t)m | !!isaligned);
1129+
small_arraylist_push(&ptls->heap.mallocarrays, a);
11351130
}
11361131

11371132

@@ -1143,10 +1138,6 @@ void jl_gc_count_allocd(size_t sz) JL_NOTSAFEPOINT
11431138
jl_batch_accum_heap_size(ptls, sz);
11441139
}
11451140

1146-
void jl_gc_count_freed(size_t sz) JL_NOTSAFEPOINT
1147-
{
1148-
jl_batch_accum_free_size(jl_current_task->ptls, sz);
1149-
}
11501141

11511142
// Only safe to update the heap inside the GC
11521143
static void combine_thread_gc_counts(jl_gc_num_t *dest, int update_heap) JL_NOTSAFEPOINT
@@ -1222,19 +1213,21 @@ size_t jl_genericmemory_nbytes(jl_genericmemory_t *m) JL_NOTSAFEPOINT
12221213
}
12231214

12241215

1225-
static void jl_gc_free_memory(jl_value_t *v, int isaligned) JL_NOTSAFEPOINT
1216+
static void jl_gc_free_memory(jl_genericmemory_t *v, int isaligned) JL_NOTSAFEPOINT
12261217
{
12271218
assert(jl_is_genericmemory(v));
12281219
jl_genericmemory_t *m = (jl_genericmemory_t*)v;
12291220
assert(jl_genericmemory_how(m) == 1 || jl_genericmemory_how(m) == 2);
12301221
char *d = (char*)m->ptr;
1222+
size_t freed_bytes = memory_block_usable_size(d, isaligned);
1223+
assert(freed_bytes != 0);
12311224
if (isaligned)
12321225
jl_free_aligned(d);
12331226
else
12341227
free(d);
12351228
jl_atomic_store_relaxed(&gc_heap_stats.heap_size,
1236-
jl_atomic_load_relaxed(&gc_heap_stats.heap_size) - jl_genericmemory_nbytes(m));
1237-
gc_num.freed += jl_genericmemory_nbytes(m);
1229+
jl_atomic_load_relaxed(&gc_heap_stats.heap_size) - freed_bytes);
1230+
gc_num.freed += freed_bytes;
12381231
gc_num.freecall++;
12391232
}
12401233

@@ -1245,24 +1238,23 @@ static void sweep_malloced_memory(void) JL_NOTSAFEPOINT
12451238
for (int t_i = 0; t_i < gc_n_threads; t_i++) {
12461239
jl_ptls_t ptls2 = gc_all_tls_states[t_i];
12471240
if (ptls2 != NULL) {
1248-
mallocarray_t *ma = ptls2->heap.mallocarrays;
1249-
mallocarray_t **pma = &ptls2->heap.mallocarrays;
1250-
while (ma != NULL) {
1251-
mallocarray_t *nxt = ma->next;
1252-
jl_value_t *a = (jl_value_t*)((uintptr_t)ma->a & ~1);
1253-
int bits = jl_astaggedvalue(a)->bits.gc;
1254-
if (gc_marked(bits)) {
1255-
pma = &ma->next;
1241+
size_t n = 0;
1242+
size_t l = ptls2->heap.mallocarrays.len;
1243+
void **lst = ptls2->heap.mallocarrays.items;
1244+
// filter without preserving order
1245+
while (n < l) {
1246+
jl_genericmemory_t *m = (jl_genericmemory_t*)((uintptr_t)lst[n] & ~1);
1247+
if (gc_marked(jl_astaggedvalue(m)->bits.gc)) {
1248+
n++;
12561249
}
12571250
else {
1258-
*pma = nxt;
1259-
int isaligned = (uintptr_t)ma->a & 1;
1260-
jl_gc_free_memory(a, isaligned);
1261-
free(ma);
1251+
int isaligned = (uintptr_t)lst[n] & 1;
1252+
jl_gc_free_memory(m, isaligned);
1253+
l--;
1254+
lst[n] = lst[l];
12621255
}
1263-
gc_time_count_mallocd_memory(bits);
1264-
ma = nxt;
12651256
}
1257+
ptls2->heap.mallocarrays.len = l;
12661258
}
12671259
}
12681260
gc_time_mallocd_memory_end();
@@ -3968,8 +3960,7 @@ void jl_init_thread_heap(jl_ptls_t ptls)
39683960
small_arraylist_new(&heap->live_tasks, 0);
39693961
for (int i = 0; i < JL_N_STACK_POOLS; i++)
39703962
small_arraylist_new(&heap->free_stacks[i], 0);
3971-
heap->mallocarrays = NULL;
3972-
heap->mafreelist = NULL;
3963+
small_arraylist_new(&heap->mallocarrays, 0);
39733964
heap->big_objects = NULL;
39743965
heap->remset = &heap->_remset[0];
39753966
heap->last_remset = &heap->_remset[1];
@@ -4069,58 +4060,44 @@ JL_DLLEXPORT void jl_throw_out_of_memory_error(void)
40694060
jl_throw(jl_memory_exception);
40704061
}
40714062

4072-
// allocation wrappers that track allocation and let collection run
4063+
// allocation wrappers that add to gc pressure
40734064

4074-
JL_DLLEXPORT void *jl_gc_counted_malloc(size_t sz)
4065+
JL_DLLEXPORT void *jl_malloc(size_t sz)
40754066
{
4076-
jl_gcframe_t **pgcstack = jl_get_pgcstack();
4077-
jl_task_t *ct = jl_current_task;
4078-
void *data = malloc(sz);
4079-
if (data != NULL && pgcstack != NULL && ct->world_age) {
4080-
jl_ptls_t ptls = ct->ptls;
4081-
maybe_collect(ptls);
4082-
jl_atomic_store_relaxed(&ptls->gc_num.allocd,
4083-
jl_atomic_load_relaxed(&ptls->gc_num.allocd) + sz);
4084-
jl_atomic_store_relaxed(&ptls->gc_num.malloc,
4085-
jl_atomic_load_relaxed(&ptls->gc_num.malloc) + 1);
4086-
jl_batch_accum_heap_size(ptls, sz);
4087-
}
4088-
return data;
4067+
return jl_gc_counted_malloc(sz);
40894068
}
40904069

4091-
JL_DLLEXPORT void *jl_gc_counted_calloc(size_t nm, size_t sz)
4070+
//_unchecked_calloc does not check for potential overflow of nm*sz
4071+
STATIC_INLINE void *_unchecked_calloc(size_t nm, size_t sz) {
4072+
size_t nmsz = nm*sz;
4073+
return jl_gc_counted_calloc(nmsz, 1);
4074+
}
4075+
4076+
JL_DLLEXPORT void *jl_calloc(size_t nm, size_t sz)
40924077
{
4093-
jl_gcframe_t **pgcstack = jl_get_pgcstack();
4094-
jl_task_t *ct = jl_current_task;
4095-
void *data = calloc(nm, sz);
4096-
if (data != NULL && pgcstack != NULL && ct->world_age) {
4097-
jl_ptls_t ptls = ct->ptls;
4098-
maybe_collect(ptls);
4099-
jl_atomic_store_relaxed(&ptls->gc_num.allocd,
4100-
jl_atomic_load_relaxed(&ptls->gc_num.allocd) + nm*sz);
4101-
jl_atomic_store_relaxed(&ptls->gc_num.malloc,
4102-
jl_atomic_load_relaxed(&ptls->gc_num.malloc) + 1);
4103-
jl_batch_accum_heap_size(ptls, sz * nm);
4104-
}
4105-
return data;
4078+
if (nm > SSIZE_MAX/sz)
4079+
return NULL;
4080+
return _unchecked_calloc(nm, sz);
41064081
}
41074082

4108-
JL_DLLEXPORT void jl_gc_counted_free_with_size(void *p, size_t sz)
4083+
JL_DLLEXPORT void jl_free(void *p)
41094084
{
4110-
jl_gcframe_t **pgcstack = jl_get_pgcstack();
4111-
jl_task_t *ct = jl_current_task;
4112-
free(p);
4113-
if (pgcstack != NULL && ct->world_age) {
4114-
jl_batch_accum_free_size(ct->ptls, sz);
4085+
if (p != NULL) {
4086+
size_t sz = memory_block_usable_size(p, 0);
4087+
free(p);
4088+
jl_task_t *ct = jl_get_current_task();
4089+
if (ct != NULL)
4090+
jl_batch_accum_free_size(ct->ptls, sz);
41154091
}
41164092
}
41174093

4118-
JL_DLLEXPORT void *jl_gc_counted_realloc_with_old_size(void *p, size_t old, size_t sz)
4094+
JL_DLLEXPORT void *jl_realloc(void *p, size_t sz)
41194095
{
4120-
jl_gcframe_t **pgcstack = jl_get_pgcstack();
4121-
jl_task_t *ct = jl_current_task;
4096+
size_t old = p ? memory_block_usable_size(p, 0) : 0;
41224097
void *data = realloc(p, sz);
4123-
if (data != NULL && pgcstack != NULL && ct->world_age) {
4098+
jl_task_t *ct = jl_get_current_task();
4099+
if (data != NULL && ct != NULL) {
4100+
sz = memory_block_usable_size(data, 0);
41244101
jl_ptls_t ptls = ct->ptls;
41254102
maybe_collect(ptls);
41264103
if (!(sz < old))
@@ -4140,63 +4117,80 @@ JL_DLLEXPORT void *jl_gc_counted_realloc_with_old_size(void *p, size_t old, size
41404117
return data;
41414118
}
41424119

4143-
// allocation wrappers that save the size of allocations, to allow using
4144-
// jl_gc_counted_* functions with a libc-compatible API.
4145-
4146-
JL_DLLEXPORT void *jl_malloc(size_t sz)
4120+
JL_DLLEXPORT void *jl_gc_counted_malloc(size_t sz)
41474121
{
4148-
int64_t *p = (int64_t *)jl_gc_counted_malloc(sz + JL_SMALL_BYTE_ALIGNMENT);
4149-
if (p == NULL)
4150-
return NULL;
4151-
p[0] = sz;
4152-
return (void *)(p + 2); // assumes JL_SMALL_BYTE_ALIGNMENT == 16
4122+
jl_task_t *ct = jl_current_task;
4123+
void *data = malloc(sz);
4124+
if (data != NULL && ct != NULL && ct->world_age) {
4125+
sz = memory_block_usable_size(data, 0);
4126+
jl_ptls_t ptls = ct->ptls;
4127+
maybe_collect(ptls);
4128+
jl_atomic_store_relaxed(&ptls->gc_num.allocd,
4129+
jl_atomic_load_relaxed(&ptls->gc_num.allocd) + sz);
4130+
jl_atomic_store_relaxed(&ptls->gc_num.malloc,
4131+
jl_atomic_load_relaxed(&ptls->gc_num.malloc) + 1);
4132+
jl_batch_accum_heap_size(ptls, sz);
4133+
}
4134+
return data;
41534135
}
41544136

4155-
//_unchecked_calloc does not check for potential overflow of nm*sz
4156-
STATIC_INLINE void *_unchecked_calloc(size_t nm, size_t sz) {
4157-
size_t nmsz = nm*sz;
4158-
int64_t *p = (int64_t *)jl_gc_counted_calloc(nmsz + JL_SMALL_BYTE_ALIGNMENT, 1);
4159-
if (p == NULL)
4160-
return NULL;
4161-
p[0] = nmsz;
4162-
return (void *)(p + 2); // assumes JL_SMALL_BYTE_ALIGNMENT == 16
4137+
JL_DLLEXPORT void *jl_gc_counted_calloc(size_t nm, size_t sz)
4138+
{
4139+
jl_task_t *ct = jl_current_task;
4140+
void *data = calloc(nm, sz);
4141+
if (data != NULL && ct != NULL && ct->world_age) {
4142+
sz = memory_block_usable_size(data, 0);
4143+
jl_ptls_t ptls = ct->ptls;
4144+
maybe_collect(ptls);
4145+
jl_atomic_store_relaxed(&ptls->gc_num.allocd,
4146+
jl_atomic_load_relaxed(&ptls->gc_num.allocd) + sz);
4147+
jl_atomic_store_relaxed(&ptls->gc_num.malloc,
4148+
jl_atomic_load_relaxed(&ptls->gc_num.malloc) + 1);
4149+
jl_batch_accum_heap_size(ptls, sz);
4150+
}
4151+
return data;
41634152
}
41644153

4165-
JL_DLLEXPORT void *jl_calloc(size_t nm, size_t sz)
4154+
JL_DLLEXPORT void jl_gc_counted_free_with_size(void *p, size_t sz)
41664155
{
4167-
if (nm > SSIZE_MAX/sz - JL_SMALL_BYTE_ALIGNMENT)
4168-
return NULL;
4169-
return _unchecked_calloc(nm, sz);
4156+
jl_free(p);
41704157
}
41714158

4172-
JL_DLLEXPORT void jl_free(void *p)
4159+
JL_DLLEXPORT void *jl_gc_counted_realloc_with_old_size(void *p, size_t old, size_t sz)
41734160
{
4174-
if (p != NULL) {
4175-
int64_t *pp = (int64_t *)p - 2;
4176-
size_t sz = pp[0];
4177-
jl_gc_counted_free_with_size(pp, sz + JL_SMALL_BYTE_ALIGNMENT);
4178-
}
4161+
return jl_realloc(p, sz);
41794162
}
41804163

4181-
JL_DLLEXPORT void *jl_realloc(void *p, size_t sz)
4164+
// =========================================================================== //
4165+
// malloc wrappers, aligned allocation
4166+
// =========================================================================== //
4167+
4168+
#if defined(_OS_WINDOWS_)
4169+
// helper function based partly on wine msvcrt80+ heap.c
4170+
// but with several fixes to improve the correctness of the computation and remove unnecessary parameters
4171+
#define SAVED_PTR(x) ((void *)((DWORD_PTR)((char *)x - sizeof(void *)) & \
4172+
~(sizeof(void *) - 1)))
4173+
static size_t _aligned_msize(void *p)
41824174
{
4183-
int64_t *pp;
4184-
size_t szold;
4185-
if (p == NULL) {
4186-
pp = NULL;
4187-
szold = 0;
4188-
}
4189-
else {
4190-
pp = (int64_t *)p - 2;
4191-
szold = pp[0] + JL_SMALL_BYTE_ALIGNMENT;
4192-
}
4193-
int64_t *pnew = (int64_t *)jl_gc_counted_realloc_with_old_size(pp, szold, sz + JL_SMALL_BYTE_ALIGNMENT);
4194-
if (pnew == NULL)
4195-
return NULL;
4196-
pnew[0] = sz;
4197-
return (void *)(pnew + 2); // assumes JL_SMALL_BYTE_ALIGNMENT == 16
4175+
void *alloc_ptr = *(void**)SAVED_PTR(p);
4176+
return _msize(alloc_ptr) - ((char*)p - (char*)alloc_ptr);
41984177
}
4178+
#undef SAVED_PTR
4179+
#endif
41994180

4181+
size_t memory_block_usable_size(void *p, int isaligned) JL_NOTSAFEPOINT
4182+
{
4183+
#if defined(_OS_WINDOWS_)
4184+
if (isaligned)
4185+
return _aligned_msize(p);
4186+
else
4187+
return _msize(p);
4188+
#elif defined(_OS_DARWIN_)
4189+
return malloc_size(p);
4190+
#else
4191+
return malloc_usable_size(p);
4192+
#endif
4193+
}
42004194
// allocating blocks for Arrays and Strings
42014195

42024196
JL_DLLEXPORT void *jl_gc_managed_malloc(size_t sz)
@@ -4214,12 +4208,13 @@ JL_DLLEXPORT void *jl_gc_managed_malloc(size_t sz)
42144208
void *b = malloc_cache_align(allocsz);
42154209
if (b == NULL)
42164210
jl_throw(jl_memory_exception);
4217-
4211+
size_t allocated_bytes = memory_block_usable_size(b, 1);
4212+
assert(allocated_bytes >= allocsz);
42184213
jl_atomic_store_relaxed(&ptls->gc_num.allocd,
4219-
jl_atomic_load_relaxed(&ptls->gc_num.allocd) + allocsz);
4214+
jl_atomic_load_relaxed(&ptls->gc_num.allocd) + allocated_bytes);
42204215
jl_atomic_store_relaxed(&ptls->gc_num.malloc,
42214216
jl_atomic_load_relaxed(&ptls->gc_num.malloc) + 1);
4222-
jl_batch_accum_heap_size(ptls, allocsz);
4217+
jl_batch_accum_heap_size(ptls, allocated_bytes);
42234218
#ifdef _OS_WINDOWS_
42244219
SetLastError(last_error);
42254220
#endif

src/gc.h

-5
Original file line numberDiff line numberDiff line change
@@ -143,11 +143,6 @@ JL_EXTENSION typedef struct _bigval_t {
143143

144144
// data structure for tracking malloc'd arrays and genericmemory.
145145

146-
typedef struct _mallocarray_t {
147-
jl_value_t *a;
148-
struct _mallocarray_t *next;
149-
} mallocarray_t;
150-
151146
// pool page metadata
152147
typedef struct _jl_gc_pagemeta_t {
153148
// next metadata structure in per-thread list

src/genericmemory.c

+2-3
Original file line numberDiff line numberDiff line change
@@ -165,7 +165,8 @@ JL_DLLEXPORT jl_genericmemory_t *jl_ptr_to_genericmemory(jl_value_t *mtype, void
165165
if (own_buffer) {
166166
int isaligned = 0; // TODO: allow passing memalign'd buffers
167167
jl_gc_track_malloced_genericmemory(ct->ptls, m, isaligned);
168-
jl_gc_count_allocd(nel*elsz);
168+
size_t allocated_bytes = memory_block_usable_size(data, isaligned);
169+
jl_gc_count_allocd(allocated_bytes);
169170
}
170171
return m;
171172
}
@@ -208,8 +209,6 @@ JL_DLLEXPORT jl_value_t *jl_genericmemory_to_string(jl_genericmemory_t *m, size_
208209
JL_GC_PUSH1(&o);
209210
jl_value_t *str = jl_pchar_to_string((const char*)m->ptr, len);
210211
JL_GC_POP();
211-
if (how == 1) // TODO: we might like to early-call jl_gc_free_memory here instead actually, but hopefully `m` will die soon
212-
jl_gc_count_freed(mlength);
213212
return str;
214213
}
215214
// n.b. how == 0 is always pool-allocated, so the freed bytes are computed from the pool not the object

0 commit comments

Comments
 (0)