Skip to content

Commit 82d933d

Browse files
committed
Add LLVM level allocation optimization pass
This can obtain escape information with much higher precision than what we can currently do in typeinf. However, it does not replace the alloc_elim_pass! in type inference either since this cannot handle objects with reference fields. Fix #20452
1 parent 17496a4 commit 82d933d

12 files changed

+1071
-64
lines changed

src/Makefile

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -50,7 +50,9 @@ endif
5050
LLVMLINK :=
5151

5252
ifeq ($(JULIACODEGEN),LLVM)
53-
SRCS += codegen jitlayers disasm debuginfo llvm-simdloop llvm-ptls llvm-muladd llvm-late-gc-lowering llvm-lower-handlers llvm-gc-invariant-verifier llvm-propagate-addrspaces cgmemmgr
53+
SRCS += codegen jitlayers disasm debuginfo llvm-simdloop llvm-ptls llvm-muladd \
54+
llvm-late-gc-lowering llvm-lower-handlers llvm-gc-invariant-verifier \
55+
llvm-propagate-addrspaces llvm-alloc-opt cgmemmgr
5456
FLAGS += -I$(shell $(LLVM_CONFIG_HOST) --includedir)
5557
LLVM_LIBS := all
5658
ifeq ($(USE_POLLY),1)

src/ccall.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2106,7 +2106,7 @@ jl_cgval_t function_sig_t::emit_a_ccall(
21062106
size_t rtsz = jl_datatype_size(rt);
21072107
assert(rtsz > 0);
21082108
Value *strct = emit_allocobj(ctx, rtsz, runtime_bt);
2109-
int boxalign = jl_gc_alignment(rtsz);
2109+
int boxalign = jl_datatype_align(rt);
21102110
#ifndef JL_NDEBUG
21112111
#if JL_LLVM_VERSION >= 40000
21122112
const DataLayout &DL = jl_data_layout;

src/cgutils.cpp

Lines changed: 5 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -2097,25 +2097,12 @@ static Value *emit_allocobj(jl_codectx_t &ctx, size_t static_size, Value *jt)
20972097
{
20982098
JL_FEAT_REQUIRE(ctx, dynamic_alloc);
20992099
JL_FEAT_REQUIRE(ctx, runtime);
2100-
2101-
int osize;
2102-
int offset = jl_gc_classify_pools(static_size, &osize);
21032100
Value *ptls_ptr = emit_bitcast(ctx, ctx.ptlsStates, T_pint8);
2104-
Value *v;
2105-
if (offset < 0) {
2106-
Value *args[] = {ptls_ptr,
2107-
ConstantInt::get(T_size, static_size + sizeof(void*))};
2108-
v = ctx.builder.CreateCall(prepare_call(jlalloc_big_func),
2109-
ArrayRef<Value*>(args, 2));
2110-
}
2111-
else {
2112-
Value *pool_offs = ConstantInt::get(T_int32, offset);
2113-
Value *args[] = {ptls_ptr, pool_offs, ConstantInt::get(T_int32, osize)};
2114-
v = ctx.builder.CreateCall(prepare_call(jlalloc_pool_func),
2115-
ArrayRef<Value*>(args, 3));
2116-
}
2117-
tbaa_decorate(tbaa_tag, ctx.builder.CreateStore(maybe_decay_untracked(jt), emit_typeptr_addr(ctx, v)));
2118-
return v;
2101+
auto call = ctx.builder.CreateCall(prepare_call(jl_alloc_obj_func),
2102+
{ptls_ptr, ConstantInt::get(T_size, static_size),
2103+
maybe_decay_untracked(jt)});
2104+
call->setAttributes(jl_alloc_obj_func->getAttributes());
2105+
return call;
21192106
}
21202107

21212108
// if ptr is NULL this emits a write barrier _back_

src/codegen.cpp

Lines changed: 14 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -314,8 +314,7 @@ static Function *jlgenericfunction_func;
314314
static Function *jlenter_func;
315315
static Function *jlleave_func;
316316
static Function *jlegal_func;
317-
static Function *jlalloc_pool_func;
318-
static Function *jlalloc_big_func;
317+
static Function *jl_alloc_obj_func;
319318
static Function *jlisa_func;
320319
static Function *jlsubtype_func;
321320
static Function *jlapplytype_func;
@@ -6517,24 +6516,19 @@ static void init_julia_llvm_env(Module *m)
65176516
"jl_instantiate_type_in_env", m);
65186517
add_named_global(jlapplytype_func, &jl_instantiate_type_in_env);
65196518

6520-
std::vector<Type*> alloc_pool_args(0);
6521-
alloc_pool_args.push_back(T_pint8);
6522-
alloc_pool_args.push_back(T_int32);
6523-
alloc_pool_args.push_back(T_int32);
6524-
jlalloc_pool_func =
6525-
Function::Create(FunctionType::get(T_prjlvalue, alloc_pool_args, false),
6526-
Function::ExternalLinkage,
6527-
"jl_gc_pool_alloc", m);
6528-
add_named_global(jlalloc_pool_func, &jl_gc_pool_alloc);
6529-
6530-
std::vector<Type*> alloc_big_args(0);
6531-
alloc_big_args.push_back(T_pint8);
6532-
alloc_big_args.push_back(T_size);
6533-
jlalloc_big_func =
6534-
Function::Create(FunctionType::get(T_prjlvalue, alloc_big_args, false),
6535-
Function::ExternalLinkage,
6536-
"jl_gc_big_alloc", m);
6537-
add_named_global(jlalloc_big_func, &jl_gc_big_alloc);
6519+
std::vector<Type*> gc_alloc_args(0);
6520+
gc_alloc_args.push_back(T_pint8);
6521+
gc_alloc_args.push_back(T_size);
6522+
gc_alloc_args.push_back(T_prjlvalue);
6523+
jl_alloc_obj_func = Function::Create(FunctionType::get(T_prjlvalue, gc_alloc_args, false),
6524+
Function::ExternalLinkage,
6525+
"julia.gc_alloc_obj");
6526+
#if JL_LLVM_VERSION >= 50000
6527+
jl_alloc_obj_func->addAttribute(AttributeList::ReturnIndex, Attribute::NoAlias);
6528+
#else
6529+
jl_alloc_obj_func->addAttribute(AttributeSet::ReturnIndex, Attribute::NoAlias);
6530+
#endif
6531+
add_named_global(jl_alloc_obj_func, (void*)NULL, /*dllimport*/false);
65386532

65396533
std::vector<Type *> dlsym_args(0);
65406534
dlsym_args.push_back(T_pint8);

src/intrinsics.cpp

Lines changed: 1 addition & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -325,11 +325,7 @@ static Value *emit_unbox(jl_codectx_t &ctx, Type *to, const jl_cgval_t &x, jl_va
325325
}
326326

327327
int alignment;
328-
if (x.isboxed) {
329-
// julia's gc gives 16-byte aligned addresses
330-
alignment = 16;
331-
}
332-
else if (jt) {
328+
if (jt) {
333329
alignment = julia_alignment(p, jt, 0);
334330
}
335331
else {

src/jitlayers.cpp

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -147,6 +147,7 @@ void addOptimizationPasses(legacy::PassManagerBase *PM, int opt_level)
147147
// effectiveness of the optimization, but should retain correctness.
148148
#if JL_LLVM_VERSION < 50000
149149
PM->add(createLowerExcHandlersPass());
150+
PM->add(createAllocOptPass());
150151
PM->add(createLateLowerGCFramePass());
151152
// Remove dead use of ptls
152153
PM->add(createDeadCodeEliminationPass());
@@ -161,6 +162,12 @@ void addOptimizationPasses(legacy::PassManagerBase *PM, int opt_level)
161162
PM->add(createAlwaysInlinerPass()); // Respect always_inline
162163
#endif
163164

165+
#if JL_LLVM_VERSION >= 50000
166+
// Running `memcpyopt` between this and `sroa` seems to give `sroa` a hard time
167+
// merging the `alloca` for the unboxed data and the `alloca` created by the `alloc_opt`
168+
// pass.
169+
PM->add(createAllocOptPass());
170+
#endif
164171
PM->add(createInstructionCombiningPass()); // Cleanup for scalarrepl.
165172
PM->add(createSROAPass()); // Break up aggregate allocas
166173
PM->add(createInstructionCombiningPass()); // Cleanup for scalarrepl.

src/jitlayers.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -175,6 +175,7 @@ Pass *createLateLowerGCFramePass();
175175
Pass *createLowerExcHandlersPass();
176176
Pass *createGCInvariantVerifierPass(bool Strong);
177177
Pass *createPropagateJuliaAddrspaces();
178+
Pass *createAllocOptPass();
178179
// Whether the Function is an llvm or julia intrinsic.
179180
static inline bool isIntrinsicFunction(Function *F)
180181
{

0 commit comments

Comments
 (0)