Skip to content

Commit 9682d72

Browse files
committed
pass alignment through to the determination of szclass
1 parent 298fffb commit 9682d72

File tree

8 files changed

+40
-66
lines changed

8 files changed

+40
-66
lines changed

base/atomics.jl

Lines changed: 7 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22

33
using Core.Intrinsics: llvmcall
44

5-
import Base: setindex!, getindex, unsafe_convert
5+
import Base: setindex!, getindex, unsafe_convert, datatype_alignment
66
import Base.Sys: ARCH, WORD_SIZE
77

88
export
@@ -321,9 +321,6 @@ inttype(::Type{Float16}) = Int16
321321
inttype(::Type{Float32}) = Int32
322322
inttype(::Type{Float64}) = Int64
323323

324-
325-
alignment(::Type{T}) where {T} = ccall(:jl_alignment, Cint, (Csize_t,), sizeof(T))
326-
327324
# All atomic operations have acquire and/or release semantics, depending on
328325
# whether the load or store values. Most of the time, this is what one wants
329326
# anyway, and it's only moderately expensive on most hardware.
@@ -335,39 +332,39 @@ for typ in atomictypes
335332
if VersionNumber(Base.libllvm_version) >= v"3.8"
336333
@eval getindex(x::Atomic{$typ}) =
337334
llvmcall($"""
338-
%rv = load atomic $rt %0 acquire, align $(alignment(typ))
335+
%rv = load atomic $rt %0 acquire, align $(datatype_alignment(typ))
339336
ret $lt %rv
340337
""", $typ, Tuple{Ptr{$typ}}, unsafe_convert(Ptr{$typ}, x))
341338
@eval setindex!(x::Atomic{$typ}, v::$typ) =
342339
llvmcall($"""
343-
store atomic $lt %1, $lt* %0 release, align $(alignment(typ))
340+
store atomic $lt %1, $lt* %0 release, align $(datatype_alignment(typ))
344341
ret void
345342
""", Void, Tuple{Ptr{$typ},$typ}, unsafe_convert(Ptr{$typ}, x), v)
346343
else
347344
if typ <: Integer
348345
@eval getindex(x::Atomic{$typ}) =
349346
llvmcall($"""
350-
%rv = load atomic $rt %0 acquire, align $(alignment(typ))
347+
%rv = load atomic $rt %0 acquire, align $(datatype_alignment(typ))
351348
ret $lt %rv
352349
""", $typ, Tuple{Ptr{$typ}}, unsafe_convert(Ptr{$typ}, x))
353350
@eval setindex!(x::Atomic{$typ}, v::$typ) =
354351
llvmcall($"""
355-
store atomic $lt %1, $lt* %0 release, align $(alignment(typ))
352+
store atomic $lt %1, $lt* %0 release, align $(datatype_alignment(typ))
356353
ret void
357354
""", Void, Tuple{Ptr{$typ},$typ}, unsafe_convert(Ptr{$typ}, x), v)
358355
else
359356
@eval getindex(x::Atomic{$typ}) =
360357
llvmcall($"""
361358
%iptr = bitcast $lt* %0 to $ilt*
362-
%irv = load atomic $irt %iptr acquire, align $(alignment(typ))
359+
%irv = load atomic $irt %iptr acquire, align $(datatype_alignment(typ))
363360
%rv = bitcast $ilt %irv to $lt
364361
ret $lt %rv
365362
""", $typ, Tuple{Ptr{$typ}}, unsafe_convert(Ptr{$typ}, x))
366363
@eval setindex!(x::Atomic{$typ}, v::$typ) =
367364
llvmcall($"""
368365
%iptr = bitcast $lt* %0 to $ilt*
369366
%ival = bitcast $lt %1 to $ilt
370-
store atomic $ilt %ival, $ilt* %iptr release, align $(alignment(typ))
367+
store atomic $ilt %ival, $ilt* %iptr release, align $(datatype_alignment(typ))
371368
ret void
372369
""", Void, Tuple{Ptr{$typ},$typ}, unsafe_convert(Ptr{$typ}, x), v)
373370
end

src/ccall.cpp

Lines changed: 7 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1214,8 +1214,9 @@ static jl_cgval_t mark_or_box_ccall_result(Value *result, bool isboxed, jl_value
12141214
const DataLayout &DL = *jl_ExecutionEngine->getDataLayout();
12151215
#endif
12161216
unsigned nb = DL.getTypeStoreSize(result->getType());
1217+
unsigned alignment = DL.getPrefTypeAlignment(result->getType());
12171218
MDNode *tbaa = jl_is_mutable(rt) ? tbaa_mutab : tbaa_immut;
1218-
Value *strct = emit_allocobj(ctx, nb, runtime_dt);
1219+
Value *strct = emit_allocobj(ctx, nb, alignment, runtime_dt);
12191220
init_bits_value(strct, result, tbaa);
12201221
return mark_julia_type(strct, true, rt, ctx);
12211222
}
@@ -2001,7 +2002,7 @@ jl_cgval_t function_sig_t::emit_a_ccall(
20012002
else {
20022003
// XXX: result needs to be zero'd and given a GC root here
20032004
assert(jl_datatype_size(rt) > 0 && "sret shouldn't be a singleton instance");
2004-
result = emit_allocobj(ctx, jl_datatype_size(rt),
2005+
result = emit_allocobj(ctx, jl_datatype_size(rt), jl_datatype_align(rt),
20052006
literal_pointer_val((jl_value_t*)rt));
20062007
sretboxed = true;
20072008
}
@@ -2148,9 +2149,9 @@ jl_cgval_t function_sig_t::emit_a_ccall(
21482149
if (static_rt) {
21492150
Value *runtime_bt = literal_pointer_val(rt);
21502151
size_t rtsz = jl_datatype_size(rt);
2152+
size_t alignment = jl_datatype_align(rt);
21512153
assert(rtsz > 0);
2152-
Value *strct = emit_allocobj(ctx, rtsz, runtime_bt);
2153-
int boxalign = jl_gc_alignment(rtsz);
2154+
Value *strct = emit_allocobj(ctx, rtsz, alignment, runtime_bt);
21542155
#ifndef JL_NDEBUG
21552156
#if JL_LLVM_VERSION >= 30600
21562157
const DataLayout &DL = jl_ExecutionEngine->getDataLayout();
@@ -2160,11 +2161,11 @@ jl_cgval_t function_sig_t::emit_a_ccall(
21602161
// ARM and AArch64 can use a LLVM type larger than the julia
21612162
// type. However, the LLVM type size should be no larger than
21622163
// the GC allocation size. (multiple of `sizeof(void*)`)
2163-
assert(DL.getTypeStoreSize(lrt) <= LLT_ALIGN(rtsz, boxalign));
2164+
assert(DL.getTypeStoreSize(lrt) <= LLT_ALIGN(rtsz, alignment));
21642165
#endif
21652166
// copy the data from the return value to the new struct
21662167
MDNode *tbaa = jl_is_mutable(rt) ? tbaa_mutab : tbaa_immut;
2167-
init_bits_value(strct, result, tbaa, boxalign);
2168+
init_bits_value(strct, result, tbaa, alignment);
21682169
return mark_julia_type(strct, true, rt, ctx);
21692170
}
21702171
jlretboxed = false; // trigger mark_or_box_ccall_result to build the runtime box

src/cgutils.cpp

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1781,7 +1781,7 @@ static Value *emit_array_nd_index(const jl_cgval_t &ainfo, jl_value_t *ex, ssize
17811781

17821782
// --- boxing ---
17831783

1784-
static Value *emit_allocobj(jl_codectx_t *ctx, size_t static_size, Value *jt);
1784+
static Value *emit_allocobj(jl_codectx_t *ctx, size_t static_size, size_t alignment, Value *jt);
17851785

17861786
static void init_bits_value(Value *newv, Value *v, MDNode *tbaa, unsigned alignment = sizeof(void*)) // min alignment in julia's gc is pointer-aligned
17871787
{
@@ -1989,7 +1989,7 @@ static Value *box_union(const jl_cgval_t &vinfo, jl_codectx_t *ctx, const SmallB
19891989
jl_cgval_t vinfo_r = jl_cgval_t(vinfo, (jl_value_t*)jt, NULL);
19901990
box = _boxed_special(vinfo_r, t, ctx);
19911991
if (!box) {
1992-
box = emit_allocobj(ctx, jl_datatype_size(jt), literal_pointer_val((jl_value_t*)jt));
1992+
box = emit_allocobj(ctx, jl_datatype_size(jt), jl_datatype_align(jt), literal_pointer_val((jl_value_t*)jt));
19931993
init_bits_cgval(box, vinfo_r, jl_is_mutable(jt) ? tbaa_mutab : tbaa_immut, ctx);
19941994
}
19951995
}
@@ -2052,7 +2052,7 @@ static Value *boxed(const jl_cgval_t &vinfo, jl_codectx_t *ctx, bool gcrooted)
20522052
assert(!type_is_ghost(t)); // ghost values should have been handled by vinfo.constant above!
20532053
box = _boxed_special(vinfo, t, ctx);
20542054
if (!box) {
2055-
box = emit_allocobj(ctx, jl_datatype_size(jt), literal_pointer_val((jl_value_t*)jt));
2055+
box = emit_allocobj(ctx, jl_datatype_size(jt), jl_datatype_align(jt), literal_pointer_val((jl_value_t*)jt));
20562056
init_bits_cgval(box, vinfo, jl_is_mutable(jt) ? tbaa_mutab : tbaa_immut, ctx);
20572057
}
20582058
}
@@ -2165,13 +2165,13 @@ static void emit_cpointercheck(const jl_cgval_t &x, const std::string &msg, jl_c
21652165
}
21662166

21672167
// allocation for known size object
2168-
static Value *emit_allocobj(jl_codectx_t *ctx, size_t static_size, Value *jt)
2168+
static Value *emit_allocobj(jl_codectx_t *ctx, size_t static_size, size_t alignment, Value *jt)
21692169
{
21702170
JL_FEAT_REQUIRE(ctx, dynamic_alloc);
21712171
JL_FEAT_REQUIRE(ctx, runtime);
21722172

21732173
int osize;
2174-
int offset = jl_gc_classify_pools(static_size, &osize);
2174+
int offset = jl_gc_classify_pools(static_size, alignment, &osize);
21752175
Value *ptls_ptr = emit_bitcast(ctx->ptlsStates, T_pint8);
21762176
Value *v;
21772177
if (offset < 0) {
@@ -2325,7 +2325,7 @@ static jl_cgval_t emit_new_struct(jl_value_t *ty, size_t nargs, jl_value_t **arg
23252325
else
23262326
return mark_julia_slot(strct, ty, NULL, tbaa_stack);
23272327
}
2328-
Value *strct = emit_allocobj(ctx, jl_datatype_size(sty),
2328+
Value *strct = emit_allocobj(ctx, jl_datatype_size(sty), jl_datatype_align(sty),
23292329
literal_pointer_val((jl_value_t*)ty));
23302330
jl_cgval_t strctinfo = mark_julia_type(strct, true, ty, ctx);
23312331
for (size_t i = 0; i < nf; i++) {

src/codegen.cpp

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4523,7 +4523,7 @@ static Function *gen_cfun_wrapper(jl_function_t *ff, jl_value_t *jlrettype, jl_t
45234523
(void)julia_type_to_llvm(jargty, &isboxed);
45244524
if (isboxed) {
45254525
// passed an unboxed T, but want something boxed
4526-
Value *mem = emit_allocobj(&ctx, jl_datatype_size(jargty),
4526+
Value *mem = emit_allocobj(&ctx, jl_datatype_size(jargty), jl_datatype_align(jargty),
45274527
literal_pointer_val((jl_value_t*)jargty));
45284528
tbaa_decorate(jl_is_mutable(jargty) ? tbaa_mutab : tbaa_immut,
45294529
builder.CreateAlignedStore(val,
@@ -4785,7 +4785,8 @@ static Function *gen_cfun_wrapper(jl_function_t *ff, jl_value_t *jlrettype, jl_t
47854785
}
47864786
case jl_returninfo_t::SRet: {
47874787
unsigned sret_nbytes = jl_datatype_size(astrt);
4788-
builder.CreateMemCpy(&*gf_thunk->arg_begin(), gf_ret, sret_nbytes, jl_alignment(sret_nbytes));
4788+
unsigned alignment = jl_datatype_align(astrt);
4789+
builder.CreateMemCpy(&*gf_thunk->arg_begin(), gf_ret, sret_nbytes, alignment);
47894790
builder.CreateRetVoid();
47904791
break;
47914792
}

src/gc.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -965,12 +965,12 @@ JL_DLLEXPORT jl_value_t *jl_gc_pool_alloc(jl_ptls_t ptls, int pool_offset,
965965
return jl_valueof(v);
966966
}
967967

968-
int jl_gc_classify_pools(size_t sz, int *osize)
968+
int jl_gc_classify_pools(size_t sz, size_t alignment, int *osize)
969969
{
970970
if (sz > GC_MAX_SZCLASS)
971971
return -1;
972972
size_t allocsz = sz + sizeof(jl_taggedvalue_t);
973-
int klass = jl_gc_szclass(allocsz);
973+
int klass = jl_gc_szclass(allocsz, alignment);
974974
*osize = jl_gc_sizeclasses[klass];
975975
return (int)(intptr_t)(&((jl_ptls_t)0)->heap.norm_pools[klass]);
976976
}

src/intrinsics.cpp

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -464,7 +464,7 @@ static jl_cgval_t generic_bitcast(const jl_cgval_t *argv, jl_codectx_t *ctx)
464464
return mark_julia_type(vx, false, bt, ctx);
465465
}
466466
else {
467-
Value *box = emit_allocobj(ctx, nb, boxed(bt_value, ctx));
467+
Value *box = emit_allocobj(ctx, nb, jl_datatype_align(bt), boxed(bt_value, ctx));
468468
init_bits_value(box, vx, tbaa_immut);
469469
return mark_julia_type(box, true, bt, ctx);
470470
}
@@ -612,8 +612,9 @@ static jl_cgval_t emit_pointerref(jl_cgval_t *argv, jl_codectx_t *ctx)
612612
return jl_cgval_t();
613613
}
614614
assert(jl_is_datatype(ety));
615-
uint64_t size = jl_datatype_size(ety);
616-
Value *strct = emit_allocobj(ctx, size,
615+
size_t size = jl_datatype_size(ety);
616+
size_t alignment = jl_datatype_align(ety);
617+
Value *strct = emit_allocobj(ctx, size, alignment,
617618
literal_pointer_val((jl_value_t*)ety));
618619
im1 = builder.CreateMul(im1, ConstantInt::get(T_size,
619620
LLT_ALIGN(size, jl_datatype_align(ety))));

src/julia_internal.h

Lines changed: 11 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -127,11 +127,17 @@ JL_DLLEXPORT extern const char *jl_filename;
127127
JL_DLLEXPORT jl_value_t *jl_gc_pool_alloc(jl_ptls_t ptls, int pool_offset,
128128
int osize);
129129
JL_DLLEXPORT jl_value_t *jl_gc_big_alloc(jl_ptls_t ptls, size_t allocsz);
130-
int jl_gc_classify_pools(size_t sz, int *osize);
130+
int jl_gc_classify_pools(size_t sz, size_t alignment, int *osize);
131131
extern jl_mutex_t gc_perm_lock;
132132
void *jl_gc_perm_alloc_nolock(size_t sz, int zero, unsigned align, unsigned offset);
133133
void *jl_gc_perm_alloc(size_t sz, int zero, unsigned align, unsigned offset);
134134

135+
#define JL_SMALL_BYTE_ALIGNMENT 16
136+
#define JL_CACHE_BYTE_ALIGNMENT 64
137+
// JL_HEAP_ALIGNMENT is the maximum alignment that the GC can provide
138+
#define JL_HEAP_ALIGNMENT JL_SMALL_BYTE_ALIGNMENT
139+
#define GC_MAX_SZCLASS (2032-sizeof(void*))
140+
135141
// pools are 16376 bytes large (GC_POOL_SZ - GC_PAGE_OFFSET)
136142
static const int jl_gc_sizeclasses[JL_GC_N_POOLS] = {
137143
#ifdef _P64
@@ -170,29 +176,9 @@ static const int jl_gc_sizeclasses[JL_GC_N_POOLS] = {
170176
// 64, 32, 160, 64, 16, 64, 112, 128, bytes lost
171177
};
172178

173-
STATIC_INLINE int jl_gc_alignment(size_t sz)
174-
{
175-
if (sz == 0)
176-
return sizeof(void*);
177-
#ifdef _P64
178-
(void)sz;
179-
return 16;
180-
#elif defined(_CPU_ARM_) || defined(_CPU_PPC_) || defined(_CPU_X86_)
181-
return sz <= 4 ? 8 : 16;
182-
#else
183-
// szclass 8
184-
if (sz <= 4)
185-
return 8;
186-
// szclass 12
187-
if (sz <= 8)
188-
return 4;
189-
// szclass 16+
190-
return 16;
191-
#endif
192-
}
193-
JL_DLLEXPORT int jl_alignment(size_t sz);
179+
JL_DLLEXPORT int jl_alignment(void* ty);
194180

195-
STATIC_INLINE int JL_CONST_FUNC jl_gc_szclass(size_t sz)
181+
STATIC_INLINE int JL_CONST_FUNC jl_gc_szclass(size_t sz, size_t alignment)
196182
{
197183
#ifdef _P64
198184
if (sz <= 8)
@@ -221,20 +207,16 @@ STATIC_INLINE int JL_CONST_FUNC jl_gc_szclass(size_t sz)
221207
#else
222208
# define jl_is_constexpr(e) (0)
223209
#endif
224-
#define JL_SMALL_BYTE_ALIGNMENT 16
225-
#define JL_CACHE_BYTE_ALIGNMENT 64
226-
// JL_HEAP_ALIGNMENT is the maximum alignment that the GC can provide
227-
#define JL_HEAP_ALIGNMENT JL_SMALL_BYTE_ALIGNMENT
228-
#define GC_MAX_SZCLASS (2032-sizeof(void*))
229210

230211
STATIC_INLINE jl_value_t *jl_gc_alloc_(jl_ptls_t ptls, size_t sz, void *ty)
231212
{
232213
const size_t allocsz = sz + sizeof(jl_taggedvalue_t);
214+
const size_t alignment = jl_datatype_align(ty);
233215
if (allocsz < sz) // overflow in adding offs, size was "negative"
234216
jl_throw(jl_memory_exception);
235217
jl_value_t *v;
236218
if (allocsz <= GC_MAX_SZCLASS + sizeof(jl_taggedvalue_t)) {
237-
int pool_id = jl_gc_szclass(allocsz);
219+
int pool_id = jl_gc_szclass(allocsz, alignment);
238220
jl_gc_pool_t *p = &ptls->heap.norm_pools[pool_id];
239221
int osize;
240222
if (jl_is_constexpr(allocsz)) {

src/threading.c

Lines changed: 0 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -832,14 +832,6 @@ void jl_init_threading(void)
832832
void jl_start_threads(void) { }
833833

834834
#endif // !JULIA_ENABLE_THREADING
835-
836-
// Make gc alignment available for threading
837-
// see threads.jl alignment
838-
JL_DLLEXPORT int jl_alignment(size_t sz)
839-
{
840-
return jl_gc_alignment(sz);
841-
}
842-
843835
#ifdef __cplusplus
844836
}
845837
#endif

0 commit comments

Comments
 (0)