6
6
#include "julia_atomics.h"
7
7
#include "julia_gcext.h"
8
8
#include "julia_assert.h"
9
- #ifdef __GLIBC__
9
+ #include <stdlib.h>
10
+
11
+ #if defined(_OS_DARWIN_ )
12
+ #include <malloc/malloc.h>
13
+ #else
10
14
#include <malloc.h> // for malloc_trim
11
15
#endif
12
16
@@ -1121,17 +1125,8 @@ static void sweep_big(jl_ptls_t ptls, int sweep_full) JL_NOTSAFEPOINT
1121
1125
1122
1126
void jl_gc_track_malloced_genericmemory (jl_ptls_t ptls , jl_genericmemory_t * m , int isaligned ){
1123
1127
// This is **NOT** a GC safe point.
1124
- mallocarray_t * ma ;
1125
- if (ptls -> heap .mafreelist == NULL ) {
1126
- ma = (mallocarray_t * )malloc_s (sizeof (mallocarray_t ));
1127
- }
1128
- else {
1129
- ma = ptls -> heap .mafreelist ;
1130
- ptls -> heap .mafreelist = ma -> next ;
1131
- }
1132
- ma -> a = (jl_value_t * )((uintptr_t )m | !!isaligned );
1133
- ma -> next = ptls -> heap .mallocarrays ;
1134
- ptls -> heap .mallocarrays = ma ;
1128
+ void * a = (void * )((uintptr_t )m | !!isaligned );
1129
+ small_arraylist_push (& ptls -> heap .mallocarrays , a );
1135
1130
}
1136
1131
1137
1132
@@ -1143,10 +1138,6 @@ void jl_gc_count_allocd(size_t sz) JL_NOTSAFEPOINT
1143
1138
jl_batch_accum_heap_size (ptls , sz );
1144
1139
}
1145
1140
1146
- void jl_gc_count_freed (size_t sz ) JL_NOTSAFEPOINT
1147
- {
1148
- jl_batch_accum_free_size (jl_current_task -> ptls , sz );
1149
- }
1150
1141
1151
1142
// Only safe to update the heap inside the GC
1152
1143
static void combine_thread_gc_counts (jl_gc_num_t * dest , int update_heap ) JL_NOTSAFEPOINT
@@ -1222,19 +1213,21 @@ size_t jl_genericmemory_nbytes(jl_genericmemory_t *m) JL_NOTSAFEPOINT
1222
1213
}
1223
1214
1224
1215
1225
- static void jl_gc_free_memory (jl_value_t * v , int isaligned ) JL_NOTSAFEPOINT
1216
+ static void jl_gc_free_memory (jl_genericmemory_t * v , int isaligned ) JL_NOTSAFEPOINT
1226
1217
{
1227
1218
assert (jl_is_genericmemory (v ));
1228
1219
jl_genericmemory_t * m = (jl_genericmemory_t * )v ;
1229
1220
assert (jl_genericmemory_how (m ) == 1 || jl_genericmemory_how (m ) == 2 );
1230
1221
char * d = (char * )m -> ptr ;
1222
+ size_t freed_bytes = memory_block_usable_size (d , isaligned );
1223
+ assert (freed_bytes != 0 );
1231
1224
if (isaligned )
1232
1225
jl_free_aligned (d );
1233
1226
else
1234
1227
free (d );
1235
1228
jl_atomic_store_relaxed (& gc_heap_stats .heap_size ,
1236
- jl_atomic_load_relaxed (& gc_heap_stats .heap_size ) - jl_genericmemory_nbytes ( m ) );
1237
- gc_num .freed += jl_genericmemory_nbytes ( m ) ;
1229
+ jl_atomic_load_relaxed (& gc_heap_stats .heap_size ) - freed_bytes );
1230
+ gc_num .freed += freed_bytes ;
1238
1231
gc_num .freecall ++ ;
1239
1232
}
1240
1233
@@ -1245,24 +1238,23 @@ static void sweep_malloced_memory(void) JL_NOTSAFEPOINT
1245
1238
for (int t_i = 0 ; t_i < gc_n_threads ; t_i ++ ) {
1246
1239
jl_ptls_t ptls2 = gc_all_tls_states [t_i ];
1247
1240
if (ptls2 != NULL ) {
1248
- mallocarray_t * ma = ptls2 -> heap . mallocarrays ;
1249
- mallocarray_t * * pma = & ptls2 -> heap .mallocarrays ;
1250
- while ( ma != NULL ) {
1251
- mallocarray_t * nxt = ma -> next ;
1252
- jl_value_t * a = ( jl_value_t * )(( uintptr_t ) ma -> a & ~ 1 );
1253
- int bits = jl_astaggedvalue ( a ) -> bits . gc ;
1254
- if (gc_marked (bits )) {
1255
- pma = & ma -> next ;
1241
+ size_t n = 0 ;
1242
+ size_t l = ptls2 -> heap .mallocarrays . len ;
1243
+ void * * lst = ptls2 -> heap . mallocarrays . items ;
1244
+ // filter without preserving order
1245
+ while ( n < l ) {
1246
+ jl_genericmemory_t * m = ( jl_genericmemory_t * )(( uintptr_t ) lst [ n ] & ~ 1 ) ;
1247
+ if (gc_marked (jl_astaggedvalue ( m ) -> bits . gc )) {
1248
+ n ++ ;
1256
1249
}
1257
1250
else {
1258
- * pma = nxt ;
1259
- int isaligned = ( uintptr_t ) ma -> a & 1 ;
1260
- jl_gc_free_memory ( a , isaligned ) ;
1261
- free ( ma ) ;
1251
+ int isaligned = ( uintptr_t ) lst [ n ] & 1 ;
1252
+ jl_gc_free_memory ( m , isaligned ) ;
1253
+ l -- ;
1254
+ lst [ n ] = lst [ l ] ;
1262
1255
}
1263
- gc_time_count_mallocd_memory (bits );
1264
- ma = nxt ;
1265
1256
}
1257
+ ptls2 -> heap .mallocarrays .len = l ;
1266
1258
}
1267
1259
}
1268
1260
gc_time_mallocd_memory_end ();
@@ -3968,8 +3960,7 @@ void jl_init_thread_heap(jl_ptls_t ptls)
3968
3960
small_arraylist_new (& heap -> live_tasks , 0 );
3969
3961
for (int i = 0 ; i < JL_N_STACK_POOLS ; i ++ )
3970
3962
small_arraylist_new (& heap -> free_stacks [i ], 0 );
3971
- heap -> mallocarrays = NULL ;
3972
- heap -> mafreelist = NULL ;
3963
+ small_arraylist_new (& heap -> mallocarrays , 0 );
3973
3964
heap -> big_objects = NULL ;
3974
3965
heap -> remset = & heap -> _remset [0 ];
3975
3966
heap -> last_remset = & heap -> _remset [1 ];
@@ -4069,58 +4060,44 @@ JL_DLLEXPORT void jl_throw_out_of_memory_error(void)
4069
4060
jl_throw (jl_memory_exception );
4070
4061
}
4071
4062
4072
- // allocation wrappers that track allocation and let collection run
4063
+ // allocation wrappers that add to gc pressure
4073
4064
4074
- JL_DLLEXPORT void * jl_gc_counted_malloc (size_t sz )
4065
+ JL_DLLEXPORT void * jl_malloc (size_t sz )
4075
4066
{
4076
- jl_gcframe_t * * pgcstack = jl_get_pgcstack ();
4077
- jl_task_t * ct = jl_current_task ;
4078
- void * data = malloc (sz );
4079
- if (data != NULL && pgcstack != NULL && ct -> world_age ) {
4080
- jl_ptls_t ptls = ct -> ptls ;
4081
- maybe_collect (ptls );
4082
- jl_atomic_store_relaxed (& ptls -> gc_num .allocd ,
4083
- jl_atomic_load_relaxed (& ptls -> gc_num .allocd ) + sz );
4084
- jl_atomic_store_relaxed (& ptls -> gc_num .malloc ,
4085
- jl_atomic_load_relaxed (& ptls -> gc_num .malloc ) + 1 );
4086
- jl_batch_accum_heap_size (ptls , sz );
4087
- }
4088
- return data ;
4067
+ return jl_gc_counted_malloc (sz );
4089
4068
}
4090
4069
4091
- JL_DLLEXPORT void * jl_gc_counted_calloc (size_t nm , size_t sz )
4070
+ //_unchecked_calloc does not check for potential overflow of nm*sz
4071
+ STATIC_INLINE void * _unchecked_calloc (size_t nm , size_t sz ) {
4072
+ size_t nmsz = nm * sz ;
4073
+ return jl_gc_counted_calloc (nmsz , 1 );
4074
+ }
4075
+
4076
+ JL_DLLEXPORT void * jl_calloc (size_t nm , size_t sz )
4092
4077
{
4093
- jl_gcframe_t * * pgcstack = jl_get_pgcstack ();
4094
- jl_task_t * ct = jl_current_task ;
4095
- void * data = calloc (nm , sz );
4096
- if (data != NULL && pgcstack != NULL && ct -> world_age ) {
4097
- jl_ptls_t ptls = ct -> ptls ;
4098
- maybe_collect (ptls );
4099
- jl_atomic_store_relaxed (& ptls -> gc_num .allocd ,
4100
- jl_atomic_load_relaxed (& ptls -> gc_num .allocd ) + nm * sz );
4101
- jl_atomic_store_relaxed (& ptls -> gc_num .malloc ,
4102
- jl_atomic_load_relaxed (& ptls -> gc_num .malloc ) + 1 );
4103
- jl_batch_accum_heap_size (ptls , sz * nm );
4104
- }
4105
- return data ;
4078
+ if (nm > SSIZE_MAX /sz )
4079
+ return NULL ;
4080
+ return _unchecked_calloc (nm , sz );
4106
4081
}
4107
4082
4108
- JL_DLLEXPORT void jl_gc_counted_free_with_size (void * p , size_t sz )
4083
+ JL_DLLEXPORT void jl_free (void * p )
4109
4084
{
4110
- jl_gcframe_t * * pgcstack = jl_get_pgcstack ();
4111
- jl_task_t * ct = jl_current_task ;
4112
- free (p );
4113
- if (pgcstack != NULL && ct -> world_age ) {
4114
- jl_batch_accum_free_size (ct -> ptls , sz );
4085
+ if (p != NULL ) {
4086
+ size_t sz = memory_block_usable_size (p , 0 );
4087
+ free (p );
4088
+ jl_task_t * ct = jl_get_current_task ();
4089
+ if (ct != NULL )
4090
+ jl_batch_accum_free_size (ct -> ptls , sz );
4115
4091
}
4116
4092
}
4117
4093
4118
- JL_DLLEXPORT void * jl_gc_counted_realloc_with_old_size (void * p , size_t old , size_t sz )
4094
+ JL_DLLEXPORT void * jl_realloc (void * p , size_t sz )
4119
4095
{
4120
- jl_gcframe_t * * pgcstack = jl_get_pgcstack ();
4121
- jl_task_t * ct = jl_current_task ;
4096
+ size_t old = p ? memory_block_usable_size (p , 0 ) : 0 ;
4122
4097
void * data = realloc (p , sz );
4123
- if (data != NULL && pgcstack != NULL && ct -> world_age ) {
4098
+ jl_task_t * ct = jl_get_current_task ();
4099
+ if (data != NULL && ct != NULL ) {
4100
+ sz = memory_block_usable_size (data , 0 );
4124
4101
jl_ptls_t ptls = ct -> ptls ;
4125
4102
maybe_collect (ptls );
4126
4103
if (!(sz < old ))
@@ -4140,63 +4117,80 @@ JL_DLLEXPORT void *jl_gc_counted_realloc_with_old_size(void *p, size_t old, size
4140
4117
return data ;
4141
4118
}
4142
4119
4143
- // allocation wrappers that save the size of allocations, to allow using
4144
- // jl_gc_counted_* functions with a libc-compatible API.
4145
-
4146
- JL_DLLEXPORT void * jl_malloc (size_t sz )
4120
+ JL_DLLEXPORT void * jl_gc_counted_malloc (size_t sz )
4147
4121
{
4148
- int64_t * p = (int64_t * )jl_gc_counted_malloc (sz + JL_SMALL_BYTE_ALIGNMENT );
4149
- if (p == NULL )
4150
- return NULL ;
4151
- p [0 ] = sz ;
4152
- return (void * )(p + 2 ); // assumes JL_SMALL_BYTE_ALIGNMENT == 16
4122
+ jl_task_t * ct = jl_current_task ;
4123
+ void * data = malloc (sz );
4124
+ if (data != NULL && ct != NULL && ct -> world_age ) {
4125
+ sz = memory_block_usable_size (data , 0 );
4126
+ jl_ptls_t ptls = ct -> ptls ;
4127
+ maybe_collect (ptls );
4128
+ jl_atomic_store_relaxed (& ptls -> gc_num .allocd ,
4129
+ jl_atomic_load_relaxed (& ptls -> gc_num .allocd ) + sz );
4130
+ jl_atomic_store_relaxed (& ptls -> gc_num .malloc ,
4131
+ jl_atomic_load_relaxed (& ptls -> gc_num .malloc ) + 1 );
4132
+ jl_batch_accum_heap_size (ptls , sz );
4133
+ }
4134
+ return data ;
4153
4135
}
4154
4136
4155
- //_unchecked_calloc does not check for potential overflow of nm*sz
4156
- STATIC_INLINE void * _unchecked_calloc (size_t nm , size_t sz ) {
4157
- size_t nmsz = nm * sz ;
4158
- int64_t * p = (int64_t * )jl_gc_counted_calloc (nmsz + JL_SMALL_BYTE_ALIGNMENT , 1 );
4159
- if (p == NULL )
4160
- return NULL ;
4161
- p [0 ] = nmsz ;
4162
- return (void * )(p + 2 ); // assumes JL_SMALL_BYTE_ALIGNMENT == 16
4137
+ JL_DLLEXPORT void * jl_gc_counted_calloc (size_t nm , size_t sz )
4138
+ {
4139
+ jl_task_t * ct = jl_current_task ;
4140
+ void * data = calloc (nm , sz );
4141
+ if (data != NULL && ct != NULL && ct -> world_age ) {
4142
+ sz = memory_block_usable_size (data , 0 );
4143
+ jl_ptls_t ptls = ct -> ptls ;
4144
+ maybe_collect (ptls );
4145
+ jl_atomic_store_relaxed (& ptls -> gc_num .allocd ,
4146
+ jl_atomic_load_relaxed (& ptls -> gc_num .allocd ) + sz );
4147
+ jl_atomic_store_relaxed (& ptls -> gc_num .malloc ,
4148
+ jl_atomic_load_relaxed (& ptls -> gc_num .malloc ) + 1 );
4149
+ jl_batch_accum_heap_size (ptls , sz );
4150
+ }
4151
+ return data ;
4163
4152
}
4164
4153
4165
- JL_DLLEXPORT void * jl_calloc ( size_t nm , size_t sz )
4154
+ JL_DLLEXPORT void jl_gc_counted_free_with_size ( void * p , size_t sz )
4166
4155
{
4167
- if (nm > SSIZE_MAX /sz - JL_SMALL_BYTE_ALIGNMENT )
4168
- return NULL ;
4169
- return _unchecked_calloc (nm , sz );
4156
+ jl_free (p );
4170
4157
}
4171
4158
4172
- JL_DLLEXPORT void jl_free (void * p )
4159
+ JL_DLLEXPORT void * jl_gc_counted_realloc_with_old_size (void * p , size_t old , size_t sz )
4173
4160
{
4174
- if (p != NULL ) {
4175
- int64_t * pp = (int64_t * )p - 2 ;
4176
- size_t sz = pp [0 ];
4177
- jl_gc_counted_free_with_size (pp , sz + JL_SMALL_BYTE_ALIGNMENT );
4178
- }
4161
+ return jl_realloc (p , sz );
4179
4162
}
4180
4163
4181
- JL_DLLEXPORT void * jl_realloc (void * p , size_t sz )
4164
+ // =========================================================================== //
4165
+ // malloc wrappers, aligned allocation
4166
+ // =========================================================================== //
4167
+
4168
+ #if defined(_OS_WINDOWS_ )
4169
+ // helper function based partly on wine msvcrt80+ heap.c
4170
+ // but with several fixes to improve the correctness of the computation and remove unnecessary parameters
4171
+ #define SAVED_PTR (x ) ((void *)((DWORD_PTR)((char *)x - sizeof(void *)) & \
4172
+ ~(sizeof(void *) - 1)))
4173
+ static size_t _aligned_msize (void * p )
4182
4174
{
4183
- int64_t * pp ;
4184
- size_t szold ;
4185
- if (p == NULL ) {
4186
- pp = NULL ;
4187
- szold = 0 ;
4188
- }
4189
- else {
4190
- pp = (int64_t * )p - 2 ;
4191
- szold = pp [0 ] + JL_SMALL_BYTE_ALIGNMENT ;
4192
- }
4193
- int64_t * pnew = (int64_t * )jl_gc_counted_realloc_with_old_size (pp , szold , sz + JL_SMALL_BYTE_ALIGNMENT );
4194
- if (pnew == NULL )
4195
- return NULL ;
4196
- pnew [0 ] = sz ;
4197
- return (void * )(pnew + 2 ); // assumes JL_SMALL_BYTE_ALIGNMENT == 16
4175
+ void * alloc_ptr = * (void * * )SAVED_PTR (p );
4176
+ return _msize (alloc_ptr ) - ((char * )p - (char * )alloc_ptr );
4198
4177
}
4178
+ #undef SAVED_PTR
4179
+ #endif
4199
4180
4181
+ size_t memory_block_usable_size (void * p , int isaligned ) JL_NOTSAFEPOINT
4182
+ {
4183
+ #if defined(_OS_WINDOWS_ )
4184
+ if (isaligned )
4185
+ return _aligned_msize (p );
4186
+ else
4187
+ return _msize (p );
4188
+ #elif defined(_OS_DARWIN_ )
4189
+ return malloc_size (p );
4190
+ #else
4191
+ return malloc_usable_size (p );
4192
+ #endif
4193
+ }
4200
4194
// allocating blocks for Arrays and Strings
4201
4195
4202
4196
JL_DLLEXPORT void * jl_gc_managed_malloc (size_t sz )
@@ -4214,12 +4208,13 @@ JL_DLLEXPORT void *jl_gc_managed_malloc(size_t sz)
4214
4208
void * b = malloc_cache_align (allocsz );
4215
4209
if (b == NULL )
4216
4210
jl_throw (jl_memory_exception );
4217
-
4211
+ size_t allocated_bytes = memory_block_usable_size (b , 1 );
4212
+ assert (allocated_bytes >= allocsz );
4218
4213
jl_atomic_store_relaxed (& ptls -> gc_num .allocd ,
4219
- jl_atomic_load_relaxed (& ptls -> gc_num .allocd ) + allocsz );
4214
+ jl_atomic_load_relaxed (& ptls -> gc_num .allocd ) + allocated_bytes );
4220
4215
jl_atomic_store_relaxed (& ptls -> gc_num .malloc ,
4221
4216
jl_atomic_load_relaxed (& ptls -> gc_num .malloc ) + 1 );
4222
- jl_batch_accum_heap_size (ptls , allocsz );
4217
+ jl_batch_accum_heap_size (ptls , allocated_bytes );
4223
4218
#ifdef _OS_WINDOWS_
4224
4219
SetLastError (last_error );
4225
4220
#endif
0 commit comments