Skip to content

Allocate objects in pools with correct alignment #21959

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 1 commit into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions base/boot.jl
Original file line number Diff line number Diff line change
Expand Up @@ -475,6 +475,7 @@ Array{T}(A::AbstractArray{S,N}) where {T,N,S} = Array{T,N}(A)
AbstractArray{T}(A::AbstractArray{S,N}) where {T,S,N} = AbstractArray{T,N}(A)

# primitive Symbol constructors
# XXX: these use unrooted, invalid GC pointers
eval(Core, :(function Symbol(s::String)
$(Expr(:meta, :pure))
return ccall(:jl_symbol_n, Ref{Symbol}, (Ptr{UInt8}, Int),
Expand Down
38 changes: 20 additions & 18 deletions src/array.c
Original file line number Diff line number Diff line change
Expand Up @@ -75,14 +75,15 @@ size_t jl_arr_xtralloc_limit = 0;
#define MAXINTVAL (((size_t)-1)>>1)

static jl_array_t *_new_array_(jl_value_t *atype, uint32_t ndims, size_t *dims,
int isunboxed, int hasptr, int isunion, int elsz)
int isunboxed, int hasptr, int isunion, int elsz, int elalign)
{
jl_ptls_t ptls = jl_get_ptls_states();
size_t i, tot, nel=1;
size_t i, tot, nel = 1;
void *data;
jl_array_t *a;
assert(elalign);

for(i=0; i < ndims; i++) {
for (i = 0; i < ndims; i++) {
size_t di = dims[i];
wideint_t prod = (wideint_t)nel * (wideint_t)di;
if (prod > (wideint_t) MAXINTVAL || di > MAXINTVAL)
Expand Down Expand Up @@ -115,23 +116,23 @@ static jl_array_t *_new_array_(jl_value_t *atype, uint32_t ndims, size_t *dims,
int tsz = JL_ARRAY_ALIGN(sizeof(jl_array_t) + ndimwords*sizeof(size_t), JL_CACHE_BYTE_ALIGNMENT);
if (tot <= ARRAY_INLINE_NBYTES) {
if (isunboxed && elsz >= 4)
tsz = JL_ARRAY_ALIGN(tsz, JL_SMALL_BYTE_ALIGNMENT); // align data area
tsz = JL_ARRAY_ALIGN(tsz, elalign); // align data area
size_t doffs = tsz;
tsz += tot;
tsz = JL_ARRAY_ALIGN(tsz, JL_SMALL_BYTE_ALIGNMENT); // align whole object
a = (jl_array_t*)jl_gc_alloc(ptls, tsz, atype);
a = (jl_array_t*)jl_gc_alloc(ptls, tsz, elalign, atype);
tsz = JL_ARRAY_ALIGN(tsz + sizeof(void*), JL_SMALL_BYTE_ALIGNMENT) - sizeof(void*); // XXX: predict possible gc behavior
// No allocation or safepoint allowed after this
a->flags.how = 0;
data = (char*)a + doffs;
if (tot > 0 && (!isunboxed || hasptr || isunion)) // TODO: check for zeroinit
memset(data, 0, tot);
}
else {
tsz = JL_ARRAY_ALIGN(tsz, JL_CACHE_BYTE_ALIGNMENT); // align whole object
data = jl_gc_managed_malloc(tot);
// Allocate the Array **after** allocating the data
// to make sure the array is still young
a = (jl_array_t*)jl_gc_alloc(ptls, tsz, atype);
a = (jl_array_t*)jl_gc_alloc(ptls, tsz, JL_SMALL_BYTE_ALIGNMENT, atype);
tsz = JL_ARRAY_ALIGN(tsz + sizeof(void*), JL_SMALL_BYTE_ALIGNMENT) - sizeof(void*); // XXX: predict possible gc behavior
// No allocation or safepoint allowed after this
a->flags.how = 2;
jl_gc_track_malloced_array(ptls, a);
Expand Down Expand Up @@ -187,13 +188,13 @@ static inline jl_array_t *_new_array(jl_value_t *atype, uint32_t ndims, size_t *
elsz = LLT_ALIGN(elsz, al);
}

return _new_array_(atype, ndims, dims, isunboxed, hasptr, isunion, elsz);
return _new_array_(atype, ndims, dims, isunboxed, hasptr, isunion, elsz, al);
}

jl_array_t *jl_new_array_for_deserialization(jl_value_t *atype, uint32_t ndims, size_t *dims,
int isunboxed, int hasptr, int isunion, int elsz)
int isunboxed, int hasptr, int isunion, int elsz, int elalign)
{
return _new_array_(atype, ndims, dims, isunboxed, hasptr, isunion, elsz);
return _new_array_(atype, ndims, dims, isunboxed, hasptr, isunion, elsz, elalign);
}

#ifndef JL_NDEBUG
Expand Down Expand Up @@ -224,7 +225,7 @@ JL_DLLEXPORT jl_array_t *jl_reshape_array(jl_value_t *atype, jl_array_t *data,

int ndimwords = jl_array_ndimwords(ndims);
int tsz = JL_ARRAY_ALIGN(sizeof(jl_array_t) + ndimwords * sizeof(size_t) + sizeof(void*), JL_SMALL_BYTE_ALIGNMENT);
a = (jl_array_t*)jl_gc_alloc(ptls, tsz, atype);
a = (jl_array_t*)jl_gc_alloc(ptls, tsz, JL_SMALL_BYTE_ALIGNMENT, atype);
// No allocation or safepoint allowed after this
a->flags.pooled = tsz <= GC_MAX_SZCLASS;
a->flags.ndims = ndims;
Expand Down Expand Up @@ -305,7 +306,7 @@ JL_DLLEXPORT jl_array_t *jl_string_to_array(jl_value_t *str)

int ndimwords = jl_array_ndimwords(1);
int tsz = JL_ARRAY_ALIGN(sizeof(jl_array_t) + ndimwords*sizeof(size_t) + sizeof(void*), JL_SMALL_BYTE_ALIGNMENT);
a = (jl_array_t*)jl_gc_alloc(ptls, tsz, jl_array_uint8_type);
a = (jl_array_t*)jl_gc_alloc(ptls, tsz, JL_SMALL_BYTE_ALIGNMENT, jl_array_uint8_type);
a->flags.pooled = tsz <= GC_MAX_SZCLASS;
a->flags.ndims = 1;
a->offset = 0;
Expand Down Expand Up @@ -352,7 +353,7 @@ JL_DLLEXPORT jl_array_t *jl_ptr_to_array_1d(jl_value_t *atype, void *data,

int ndimwords = jl_array_ndimwords(1);
int tsz = JL_ARRAY_ALIGN(sizeof(jl_array_t) + ndimwords*sizeof(size_t), JL_CACHE_BYTE_ALIGNMENT);
a = (jl_array_t*)jl_gc_alloc(ptls, tsz, atype);
a = (jl_array_t*)jl_gc_alloc(ptls, tsz, align, atype);
// No allocation or safepoint allowed after this
a->flags.pooled = tsz <= GC_MAX_SZCLASS;
a->data = data;
Expand Down Expand Up @@ -419,7 +420,7 @@ JL_DLLEXPORT jl_array_t *jl_ptr_to_array(jl_value_t *atype, void *data,

int ndimwords = jl_array_ndimwords(ndims);
int tsz = JL_ARRAY_ALIGN(sizeof(jl_array_t) + ndimwords*sizeof(size_t), JL_CACHE_BYTE_ALIGNMENT);
a = (jl_array_t*)jl_gc_alloc(ptls, tsz, atype);
a = (jl_array_t*)jl_gc_alloc(ptls, tsz, align, atype);
// No allocation or safepoint allowed after this
a->flags.pooled = tsz <= GC_MAX_SZCLASS;
a->data = data;
Expand Down Expand Up @@ -515,7 +516,7 @@ JL_DLLEXPORT jl_value_t *jl_pchar_to_string(const char *str, size_t len)
jl_throw(jl_memory_exception);
if (len == 0)
return jl_an_empty_string;
jl_value_t *s = jl_gc_alloc_(jl_get_ptls_states(), sz, jl_string_type); // force inlining
jl_value_t *s = jl_gc_alloc_(jl_get_ptls_states(), sz, /*align*/ 0, jl_string_type); // force inlining
*(size_t*)s = len;
memcpy((char*)s + sizeof(size_t), str, len);
((char*)s + sizeof(size_t))[len] = 0;
Expand All @@ -529,7 +530,7 @@ JL_DLLEXPORT jl_value_t *jl_alloc_string(size_t len)
jl_throw(jl_memory_exception);
if (len == 0)
return jl_an_empty_string;
jl_value_t *s = jl_gc_alloc_(jl_get_ptls_states(), sz, jl_string_type); // force inlining
jl_value_t *s = jl_gc_alloc_(jl_get_ptls_states(), sz, /*align*/ 0, jl_string_type); // force inlining
*(size_t*)s = len;
((char*)s + sizeof(size_t))[len] = 0;
return s;
Expand Down Expand Up @@ -1197,11 +1198,12 @@ JL_DLLEXPORT void jl_array_sizehint(jl_array_t *a, size_t sz)
JL_DLLEXPORT jl_array_t *jl_array_copy(jl_array_t *ary)
{
size_t elsz = ary->elsize;
size_t elalign = ary->flags.ptrarray ? sizeof(void*) : jl_datatype_align(jl_tparam0(jl_typeof(ary)));
size_t len = jl_array_len(ary);
int isunion = jl_is_uniontype(jl_tparam0(jl_typeof(ary)));
jl_array_t *new_ary = _new_array_(jl_typeof(ary), jl_array_ndims(ary),
&ary->nrows, !ary->flags.ptrarray,
ary->flags.hasptr, isunion, elsz);
ary->flags.hasptr, isunion, elsz, elalign);
memcpy(new_ary->data, ary->data, len * elsz);
// ensure isbits union arrays copy their selector bytes correctly
if (jl_array_isbitsunion(ary))
Expand Down
10 changes: 4 additions & 6 deletions src/builtins.c
Original file line number Diff line number Diff line change
Expand Up @@ -754,7 +754,7 @@ JL_CALLABLE(jl_f_tuple)
if (tt->instance != NULL)
return tt->instance;
jl_ptls_t ptls = jl_get_ptls_states();
jl_value_t *jv = jl_gc_alloc(ptls, jl_datatype_size(tt), tt);
jl_value_t *jv = jl_gc_alloc(ptls, jl_datatype_size(tt), jl_datatype_align(tt), tt);
for (i = 0; i < nargs; i++)
set_nth_field(tt, (void*)jv, i, args[i]);
return jv;
Expand Down Expand Up @@ -1091,8 +1091,7 @@ jl_expr_t *jl_exprn(jl_sym_t *head, size_t n)
jl_ptls_t ptls = jl_get_ptls_states();
jl_array_t *ar = jl_alloc_vec_any(n);
JL_GC_PUSH1(&ar);
jl_expr_t *ex = (jl_expr_t*)jl_gc_alloc(ptls, sizeof(jl_expr_t),
jl_expr_type);
jl_expr_t *ex = (jl_expr_t*)jl_gc_alloc(ptls, sizeof(jl_expr_t), /*align*/ 0, jl_expr_type);
ex->head = head;
ex->args = ar;
JL_GC_POP();
Expand All @@ -1108,8 +1107,7 @@ JL_CALLABLE(jl_f__expr)
JL_GC_PUSH1(&ar);
for(size_t i=0; i < nargs-1; i++)
jl_array_ptr_set(ar, i, args[i+1]);
jl_expr_t *ex = (jl_expr_t*)jl_gc_alloc(ptls, sizeof(jl_expr_t),
jl_expr_type);
jl_expr_t *ex = (jl_expr_t*)jl_gc_alloc(ptls, sizeof(jl_expr_t), /*align*/ 0, jl_expr_type);
ex->head = (jl_sym_t*)args[0];
ex->args = ar;
JL_GC_POP();
Expand All @@ -1124,7 +1122,7 @@ JL_DLLEXPORT jl_tvar_t *jl_new_typevar(jl_sym_t *name, jl_value_t *lb, jl_value_
if ((ub != (jl_value_t *)jl_any_type && !jl_is_type(ub) && !jl_is_typevar(ub)) || jl_is_vararg_type(ub))
jl_type_error_rt("TypeVar", "upper bound", (jl_value_t *)jl_type_type, ub);
jl_ptls_t ptls = jl_get_ptls_states();
jl_tvar_t *tv = (jl_tvar_t *)jl_gc_alloc(ptls, sizeof(jl_tvar_t), jl_tvar_type);
jl_tvar_t *tv = (jl_tvar_t *)jl_gc_alloc(ptls, sizeof(jl_tvar_t), 0, jl_tvar_type);
tv->name = name;
tv->lb = lb;
tv->ub = ub;
Expand Down
17 changes: 9 additions & 8 deletions src/ccall.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -924,8 +924,9 @@ static Value *box_ccall_result(jl_codectx_t &ctx, Value *result, Value *runtime_
// XXX: need to handle parameterized zero-byte types (singleton)
const DataLayout &DL = jl_data_layout;
unsigned nb = DL.getTypeStoreSize(result->getType());
unsigned alignment = DL.getPrefTypeAlignment(result->getType());
MDNode *tbaa = jl_is_mutable(rt) ? tbaa_mutab : tbaa_immut;
Value *strct = emit_allocobj(ctx, nb, runtime_dt);
Value *strct = emit_allocobj(ctx, nb, alignment, runtime_dt);
init_bits_value(ctx, strct, result, tbaa);
return strct;
}
Expand Down Expand Up @@ -1802,7 +1803,7 @@ jl_cgval_t function_sig_t::emit_a_ccall(
// and has incorrect write barriers.
// instead this code path should behave like `unsafe_load`
assert(jl_datatype_size(rt) > 0 && "sret shouldn't be a singleton instance");
result = emit_allocobj(ctx, jl_datatype_size(rt),
result = emit_allocobj(ctx, jl_datatype_size(rt), jl_datatype_align(rt),
literal_pointer_val(ctx, (jl_value_t*)rt));
sretboxed = true;
gc_uses.push_back(result);
Expand Down Expand Up @@ -1933,23 +1934,23 @@ jl_cgval_t function_sig_t::emit_a_ccall(
if (static_rt) {
Value *runtime_bt = literal_pointer_val(ctx, rt);
size_t rtsz = jl_datatype_size(rt);
size_t rtal = julia_alignment(rt);
assert(rtsz > 0);
Value *strct = emit_allocobj(ctx, rtsz, runtime_bt);
Value *strct = emit_allocobj(ctx, rtsz, rtal, runtime_bt);
MDNode *tbaa = jl_is_mutable(rt) ? tbaa_mutab : tbaa_immut;
int boxalign = julia_alignment(rt);
// copy the data from the return value to the new struct
const DataLayout &DL = jl_data_layout;
auto resultTy = result->getType();
if (DL.getTypeStoreSize(resultTy) > rtsz) {
// ARM and AArch64 can use a LLVM type larger than the julia type.
// When this happens, cast through memory.
auto slot = emit_static_alloca(ctx, resultTy);
slot->setAlignment(Align(boxalign));
ctx.builder.CreateAlignedStore(result, slot, Align(boxalign));
emit_memcpy(ctx, strct, tbaa, slot, tbaa, rtsz, boxalign);
slot->setAlignment(Align(rtal));
ctx.builder.CreateAlignedStore(result, slot, Align(rtal));
emit_memcpy(ctx, strct, tbaa, slot, tbaa, rtsz, rtal);
}
else {
init_bits_value(ctx, strct, result, tbaa, boxalign);
init_bits_value(ctx, strct, result, tbaa, rtal);
}
return mark_julia_type(ctx, strct, true, rt);
}
Expand Down
17 changes: 11 additions & 6 deletions src/cgutils.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2271,7 +2271,7 @@ static Value *emit_array_nd_index(

// --- boxing ---

static Value *emit_allocobj(jl_codectx_t &ctx, size_t static_size, Value *jt);
static Value *emit_allocobj(jl_codectx_t &ctx, size_t static_size, size_t alignment, Value *jt);

static void init_bits_value(jl_codectx_t &ctx, Value *newv, Value *v, MDNode *tbaa,
unsigned alignment = sizeof(void*)) // min alignment in julia's gc is pointer-aligned
Expand Down Expand Up @@ -2573,7 +2573,7 @@ static Value *box_union(jl_codectx_t &ctx, const jl_cgval_t &vinfo, const SmallB
jl_cgval_t vinfo_r = jl_cgval_t(vinfo, (jl_value_t*)jt, NULL);
box = _boxed_special(ctx, vinfo_r, t);
if (!box) {
box = emit_allocobj(ctx, jl_datatype_size(jt), literal_pointer_val(ctx, (jl_value_t*)jt));
box = emit_allocobj(ctx, jl_datatype_size(jt), jl_datatype_align(jt), literal_pointer_val(ctx, (jl_value_t*)jt));
init_bits_cgval(ctx, box, vinfo_r, jl_is_mutable(jt) ? tbaa_mutab : tbaa_immut);
}
}
Expand Down Expand Up @@ -2637,7 +2637,7 @@ static Value *boxed(jl_codectx_t &ctx, const jl_cgval_t &vinfo)
assert(!type_is_ghost(t)); // ghost values should have been handled by vinfo.constant above!
box = _boxed_special(ctx, vinfo, t);
if (!box) {
box = emit_allocobj(ctx, jl_datatype_size(jt), literal_pointer_val(ctx, (jl_value_t*)jt));
box = emit_allocobj(ctx, jl_datatype_size(jt), jl_datatype_align(jt), literal_pointer_val(ctx, (jl_value_t*)jt));
init_bits_cgval(ctx, box, vinfo, jl_is_mutable(jt) ? tbaa_mutab : tbaa_immut);
}
}
Expand Down Expand Up @@ -2759,11 +2759,16 @@ static void emit_cpointercheck(jl_codectx_t &ctx, const jl_cgval_t &x, const std
}

// allocation for known size object
static Value *emit_allocobj(jl_codectx_t &ctx, size_t static_size, Value *jt)
static Value *emit_allocobj(jl_codectx_t &ctx, size_t static_size, size_t alignment, Value *jt)
{
Value *ptls_ptr = emit_bitcast(ctx, ctx.ptlsStates, T_pint8);
Function *F = prepare_call(jl_alloc_obj_func);
auto call = ctx.builder.CreateCall(F, {ptls_ptr, ConstantInt::get(T_size, static_size), maybe_decay_untracked(ctx, jt)});
auto call = ctx.builder.CreateCall(F, {
ptls_ptr,
ConstantInt::get(T_size, static_size),
ConstantInt::get(T_size, alignment),
maybe_decay_untracked(ctx, jt)
});
call->setAttributes(F->getAttributes());
return call;
}
Expand Down Expand Up @@ -3033,7 +3038,7 @@ static jl_cgval_t emit_new_struct(jl_codectx_t &ctx, jl_value_t *ty, size_t narg
else
return mark_julia_slot(strct, ty, NULL, tbaa_stack);
}
Value *strct = emit_allocobj(ctx, jl_datatype_size(sty),
Value *strct = emit_allocobj(ctx, jl_datatype_size(sty), jl_datatype_align(sty),
literal_pointer_val(ctx, (jl_value_t*)ty));
jl_cgval_t strctinfo = mark_julia_type(ctx, strct, true, ty);
strct = decay_derived(ctx, strct);
Expand Down
4 changes: 2 additions & 2 deletions src/codegen.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -611,7 +611,7 @@ static const auto jlegal_func = new JuliaFunction{
static const auto jl_alloc_obj_func = new JuliaFunction{
"julia.gc_alloc_obj",
[](LLVMContext &C) { return FunctionType::get(T_prjlvalue,
{T_pint8, T_size, T_prjlvalue}, false); },
{T_pint8, T_size, T_size, T_prjlvalue}, false); },
[](LLVMContext &C) { return AttributeList::get(C,
AttributeSet::get(C, makeArrayRef({Attribute::getWithAllocSizeArgs(C, 1, None)})), // returns %1 bytes
Attributes(C, {Attribute::NoAlias, Attribute::NonNull}),
Expand Down Expand Up @@ -5368,7 +5368,7 @@ static jl_cgval_t emit_cfunction(jl_codectx_t &ctx, jl_value_t *output_type, con
outboxed = (output_type != (jl_value_t*)jl_voidpointer_type);
if (outboxed) {
assert(jl_datatype_size(output_type) == sizeof(void*) * 4);
Value *strct = emit_allocobj(ctx, jl_datatype_size(output_type),
Value *strct = emit_allocobj(ctx, jl_datatype_size(output_type), jl_datatype_align(output_type),
literal_pointer_val(ctx, (jl_value_t*)output_type));
Value *derived_strct = emit_bitcast(ctx, decay_derived(ctx, strct), T_psize);
MDNode *tbaa = best_tbaa(output_type);
Expand Down
Loading