Skip to content

Commit 94d2f89

Browse files
committed
Add LLVM level allocation optimization pass
This can obtain escape information with much higher precision than what we can currently do in typeinf. However, it does not replace the alloc_elim_pass! in type inference either since this cannot handle objects with reference fields. Fix #20452
1 parent 29c78b7 commit 94d2f89

12 files changed

+1071
-64
lines changed

src/Makefile

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -50,7 +50,9 @@ endif
5050
LLVMLINK :=
5151

5252
ifeq ($(JULIACODEGEN),LLVM)
53-
SRCS += codegen jitlayers disasm debuginfo llvm-simdloop llvm-ptls llvm-muladd llvm-late-gc-lowering llvm-lower-handlers llvm-gc-invariant-verifier llvm-propagate-addrspaces cgmemmgr
53+
SRCS += codegen jitlayers disasm debuginfo llvm-simdloop llvm-ptls llvm-muladd \
54+
llvm-late-gc-lowering llvm-lower-handlers llvm-gc-invariant-verifier \
55+
llvm-propagate-addrspaces llvm-alloc-opt cgmemmgr
5456
FLAGS += -I$(shell $(LLVM_CONFIG_HOST) --includedir)
5557
LLVM_LIBS := all
5658
ifeq ($(USE_POLLY),1)

src/ccall.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2106,7 +2106,7 @@ jl_cgval_t function_sig_t::emit_a_ccall(
21062106
size_t rtsz = jl_datatype_size(rt);
21072107
assert(rtsz > 0);
21082108
Value *strct = emit_allocobj(ctx, rtsz, runtime_bt);
2109-
int boxalign = jl_gc_alignment(rtsz);
2109+
int boxalign = jl_datatype_align(rt);
21102110
#ifndef JL_NDEBUG
21112111
#if JL_LLVM_VERSION >= 40000
21122112
const DataLayout &DL = jl_data_layout;

src/cgutils.cpp

Lines changed: 5 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -2097,25 +2097,12 @@ static Value *emit_allocobj(jl_codectx_t &ctx, size_t static_size, Value *jt)
20972097
{
20982098
JL_FEAT_REQUIRE(ctx, dynamic_alloc);
20992099
JL_FEAT_REQUIRE(ctx, runtime);
2100-
2101-
int osize;
2102-
int offset = jl_gc_classify_pools(static_size, &osize);
21032100
Value *ptls_ptr = emit_bitcast(ctx, ctx.ptlsStates, T_pint8);
2104-
Value *v;
2105-
if (offset < 0) {
2106-
Value *args[] = {ptls_ptr,
2107-
ConstantInt::get(T_size, static_size + sizeof(void*))};
2108-
v = ctx.builder.CreateCall(prepare_call(jlalloc_big_func),
2109-
ArrayRef<Value*>(args, 2));
2110-
}
2111-
else {
2112-
Value *pool_offs = ConstantInt::get(T_int32, offset);
2113-
Value *args[] = {ptls_ptr, pool_offs, ConstantInt::get(T_int32, osize)};
2114-
v = ctx.builder.CreateCall(prepare_call(jlalloc_pool_func),
2115-
ArrayRef<Value*>(args, 3));
2116-
}
2117-
tbaa_decorate(tbaa_tag, ctx.builder.CreateStore(maybe_decay_untracked(jt), emit_typeptr_addr(ctx, v)));
2118-
return v;
2101+
auto call = ctx.builder.CreateCall(prepare_call(jl_alloc_obj_func),
2102+
{ptls_ptr, ConstantInt::get(T_size, static_size),
2103+
maybe_decay_untracked(jt)});
2104+
call->setAttributes(jl_alloc_obj_func->getAttributes());
2105+
return call;
21192106
}
21202107

21212108
// if ptr is NULL this emits a write barrier _back_

src/codegen.cpp

Lines changed: 14 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -314,8 +314,7 @@ static Function *jlgenericfunction_func;
314314
static Function *jlenter_func;
315315
static Function *jlleave_func;
316316
static Function *jlegal_func;
317-
static Function *jlalloc_pool_func;
318-
static Function *jlalloc_big_func;
317+
static Function *jl_alloc_obj_func;
319318
static Function *jlisa_func;
320319
static Function *jlsubtype_func;
321320
static Function *jlapplytype_func;
@@ -6372,24 +6371,19 @@ static void init_julia_llvm_env(Module *m)
63726371
"jl_instantiate_type_in_env", m);
63736372
add_named_global(jlapplytype_func, &jl_instantiate_type_in_env);
63746373

6375-
std::vector<Type*> alloc_pool_args(0);
6376-
alloc_pool_args.push_back(T_pint8);
6377-
alloc_pool_args.push_back(T_int32);
6378-
alloc_pool_args.push_back(T_int32);
6379-
jlalloc_pool_func =
6380-
Function::Create(FunctionType::get(T_prjlvalue, alloc_pool_args, false),
6381-
Function::ExternalLinkage,
6382-
"jl_gc_pool_alloc", m);
6383-
add_named_global(jlalloc_pool_func, &jl_gc_pool_alloc);
6384-
6385-
std::vector<Type*> alloc_big_args(0);
6386-
alloc_big_args.push_back(T_pint8);
6387-
alloc_big_args.push_back(T_size);
6388-
jlalloc_big_func =
6389-
Function::Create(FunctionType::get(T_prjlvalue, alloc_big_args, false),
6390-
Function::ExternalLinkage,
6391-
"jl_gc_big_alloc", m);
6392-
add_named_global(jlalloc_big_func, &jl_gc_big_alloc);
6374+
std::vector<Type*> gc_alloc_args(0);
6375+
gc_alloc_args.push_back(T_pint8);
6376+
gc_alloc_args.push_back(T_size);
6377+
gc_alloc_args.push_back(T_prjlvalue);
6378+
jl_alloc_obj_func = Function::Create(FunctionType::get(T_prjlvalue, gc_alloc_args, false),
6379+
Function::ExternalLinkage,
6380+
"julia.gc_alloc_obj");
6381+
#if JL_LLVM_VERSION >= 50000
6382+
jl_alloc_obj_func->addAttribute(AttributeList::ReturnIndex, Attribute::NoAlias);
6383+
#else
6384+
jl_alloc_obj_func->addAttribute(AttributeSet::ReturnIndex, Attribute::NoAlias);
6385+
#endif
6386+
add_named_global(jl_alloc_obj_func, (void*)NULL, /*dllimport*/false);
63936387

63946388
std::vector<Type *> dlsym_args(0);
63956389
dlsym_args.push_back(T_pint8);

src/intrinsics.cpp

Lines changed: 1 addition & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -325,11 +325,7 @@ static Value *emit_unbox(jl_codectx_t &ctx, Type *to, const jl_cgval_t &x, jl_va
325325
}
326326

327327
int alignment;
328-
if (x.isboxed) {
329-
// julia's gc gives 16-byte aligned addresses
330-
alignment = 16;
331-
}
332-
else if (jt) {
328+
if (jt) {
333329
alignment = julia_alignment(p, jt, 0);
334330
}
335331
else {

src/jitlayers.cpp

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -147,6 +147,7 @@ void addOptimizationPasses(legacy::PassManagerBase *PM, int opt_level)
147147
// effectiveness of the optimization, but should retain correctness.
148148
#if JL_LLVM_VERSION < 50000
149149
PM->add(createLowerExcHandlersPass());
150+
PM->add(createAllocOptPass());
150151
PM->add(createLateLowerGCFramePass());
151152
// Remove dead use of ptls
152153
PM->add(createDeadCodeEliminationPass());
@@ -161,6 +162,12 @@ void addOptimizationPasses(legacy::PassManagerBase *PM, int opt_level)
161162
PM->add(createAlwaysInlinerPass()); // Respect always_inline
162163
#endif
163164

165+
#if JL_LLVM_VERSION >= 50000
166+
// Running `memcpyopt` between this and `sroa` seems to give `sroa` a hard time
167+
// merging the `alloca` for the unboxed data and the `alloca` created by the `alloc_opt`
168+
// pass.
169+
PM->add(createAllocOptPass());
170+
#endif
164171
PM->add(createInstructionCombiningPass()); // Cleanup for scalarrepl.
165172
PM->add(createSROAPass()); // Break up aggregate allocas
166173
PM->add(createInstructionCombiningPass()); // Cleanup for scalarrepl.

src/jitlayers.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -175,6 +175,7 @@ Pass *createLateLowerGCFramePass();
175175
Pass *createLowerExcHandlersPass();
176176
Pass *createGCInvariantVerifierPass(bool Strong);
177177
Pass *createPropagateJuliaAddrspaces();
178+
Pass *createAllocOptPass();
178179
// Whether the Function is an llvm or julia intrinsic.
179180
static inline bool isIntrinsicFunction(Function *F)
180181
{

0 commit comments

Comments
 (0)