Skip to content

Commit 9496fce

Browse files
committed
Add LLVM level allocation optimization pass
This can obtain escape information with much higher precision than what we can currently do in typeinf. However, it does not replace the alloc_elim_pass! in type inference either since this cannot handle objects with reference fields. Fix #20452
1 parent e4bd638 commit 9496fce

11 files changed

+1007
-45
lines changed

src/Makefile

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -50,7 +50,9 @@ endif
5050
LLVMLINK :=
5151

5252
ifeq ($(JULIACODEGEN),LLVM)
53-
SRCS += codegen jitlayers disasm debuginfo llvm-simdloop llvm-ptls llvm-muladd llvm-late-gc-lowering llvm-lower-handlers llvm-gc-invariant-verifier llvm-propagate-addrspaces cgmemmgr
53+
SRCS += codegen jitlayers disasm debuginfo llvm-simdloop llvm-ptls llvm-muladd \
54+
llvm-late-gc-lowering llvm-lower-handlers llvm-gc-invariant-verifier \
55+
llvm-propagate-addrspaces llvm-alloc-opt cgmemmgr
5456
FLAGS += -I$(shell $(LLVM_CONFIG_HOST) --includedir)
5557
LLVM_LIBS := all
5658
ifeq ($(USE_POLLY),1)

src/ccall.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2146,7 +2146,7 @@ jl_cgval_t function_sig_t::emit_a_ccall(
21462146
size_t rtsz = jl_datatype_size(rt);
21472147
assert(rtsz > 0);
21482148
Value *strct = emit_allocobj(ctx, rtsz, runtime_bt);
2149-
int boxalign = jl_gc_alignment(rtsz);
2149+
int boxalign = jl_datatype_align(rt);
21502150
#ifndef JL_NDEBUG
21512151
#if JL_LLVM_VERSION >= 40000
21522152
const DataLayout &DL = jl_data_layout;

src/cgutils.cpp

Lines changed: 5 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -2182,25 +2182,12 @@ static Value *emit_allocobj(jl_codectx_t &ctx, size_t static_size, Value *jt)
21822182
{
21832183
JL_FEAT_REQUIRE(ctx, dynamic_alloc);
21842184
JL_FEAT_REQUIRE(ctx, runtime);
2185-
2186-
int osize;
2187-
int offset = jl_gc_classify_pools(static_size, &osize);
21882185
Value *ptls_ptr = emit_bitcast(ctx, ctx.ptlsStates, T_pint8);
2189-
Value *v;
2190-
if (offset < 0) {
2191-
Value *args[] = {ptls_ptr,
2192-
ConstantInt::get(T_size, static_size + sizeof(void*))};
2193-
v = ctx.builder.CreateCall(prepare_call(jlalloc_big_func),
2194-
ArrayRef<Value*>(args, 2));
2195-
}
2196-
else {
2197-
Value *pool_offs = ConstantInt::get(T_int32, offset);
2198-
Value *args[] = {ptls_ptr, pool_offs, ConstantInt::get(T_int32, osize)};
2199-
v = ctx.builder.CreateCall(prepare_call(jlalloc_pool_func),
2200-
ArrayRef<Value*>(args, 3));
2201-
}
2202-
tbaa_decorate(tbaa_tag, ctx.builder.CreateStore(maybe_decay_untracked(jt), emit_typeptr_addr(ctx, v)));
2203-
return v;
2186+
auto call = ctx.builder.CreateCall(prepare_call(jl_alloc_obj_func),
2187+
{ptls_ptr, ConstantInt::get(T_size, static_size),
2188+
maybe_decay_untracked(jt)});
2189+
call->setAttributes(jl_alloc_obj_func->getAttributes());
2190+
return call;
22042191
}
22052192

22062193
// if ptr is NULL this emits a write barrier _back_

src/codegen.cpp

Lines changed: 14 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -314,8 +314,7 @@ static Function *jlgenericfunction_func;
314314
static Function *jlenter_func;
315315
static Function *jlleave_func;
316316
static Function *jlegal_func;
317-
static Function *jlalloc_pool_func;
318-
static Function *jlalloc_big_func;
317+
static Function *jl_alloc_obj_func;
319318
static Function *jlisa_func;
320319
static Function *jlsubtype_func;
321320
static Function *jlapplytype_func;
@@ -6614,24 +6613,19 @@ static void init_julia_llvm_env(Module *m)
66146613
"jl_instantiate_type_in_env", m);
66156614
add_named_global(jlapplytype_func, &jl_instantiate_type_in_env);
66166615

6617-
std::vector<Type*> alloc_pool_args(0);
6618-
alloc_pool_args.push_back(T_pint8);
6619-
alloc_pool_args.push_back(T_int32);
6620-
alloc_pool_args.push_back(T_int32);
6621-
jlalloc_pool_func =
6622-
Function::Create(FunctionType::get(T_prjlvalue, alloc_pool_args, false),
6623-
Function::ExternalLinkage,
6624-
"jl_gc_pool_alloc", m);
6625-
add_named_global(jlalloc_pool_func, &jl_gc_pool_alloc);
6626-
6627-
std::vector<Type*> alloc_big_args(0);
6628-
alloc_big_args.push_back(T_pint8);
6629-
alloc_big_args.push_back(T_size);
6630-
jlalloc_big_func =
6631-
Function::Create(FunctionType::get(T_prjlvalue, alloc_big_args, false),
6632-
Function::ExternalLinkage,
6633-
"jl_gc_big_alloc", m);
6634-
add_named_global(jlalloc_big_func, &jl_gc_big_alloc);
6616+
std::vector<Type*> gc_alloc_args(0);
6617+
gc_alloc_args.push_back(T_pint8);
6618+
gc_alloc_args.push_back(T_size);
6619+
gc_alloc_args.push_back(T_prjlvalue);
6620+
jl_alloc_obj_func = Function::Create(FunctionType::get(T_prjlvalue, gc_alloc_args, false),
6621+
Function::ExternalLinkage,
6622+
"julia.gc_alloc_obj");
6623+
#if JL_LLVM_VERSION >= 50000
6624+
jl_alloc_obj_func->addAttribute(AttributeList::ReturnIndex, Attribute::NoAlias);
6625+
#else
6626+
jl_alloc_obj_func->addAttribute(AttributeSet::ReturnIndex, Attribute::NoAlias);
6627+
#endif
6628+
add_named_global(jl_alloc_obj_func, (void*)NULL, /*dllimport*/false);
66356629

66366630
std::vector<Type *> dlsym_args(0);
66376631
dlsym_args.push_back(T_pint8);

src/intrinsics.cpp

Lines changed: 1 addition & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -325,11 +325,7 @@ static Value *emit_unbox(jl_codectx_t &ctx, Type *to, const jl_cgval_t &x, jl_va
325325
}
326326

327327
int alignment;
328-
if (x.isboxed) {
329-
// julia's gc gives 16-byte aligned addresses
330-
alignment = 16;
331-
}
332-
else if (jt) {
328+
if (jt) {
333329
alignment = julia_alignment(p, jt, 0);
334330
}
335331
else {

src/jitlayers.cpp

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -108,6 +108,7 @@ void addOptimizationPasses(legacy::PassManagerBase *PM, int opt_level)
108108
#endif
109109
if (opt_level == 0) {
110110
PM->add(createCFGSimplificationPass()); // Clean up disgusting code
111+
PM->add(createAllocOptPass(false));
111112
#if JL_LLVM_VERSION < 50000
112113
PM->add(createBarrierNoopPass());
113114
PM->add(createLowerExcHandlersPass());
@@ -147,6 +148,7 @@ void addOptimizationPasses(legacy::PassManagerBase *PM, int opt_level)
147148
// effectiveness of the optimization, but should retain correctness.
148149
#if JL_LLVM_VERSION < 50000
149150
PM->add(createLowerExcHandlersPass());
151+
PM->add(createAllocOptPass(true));
150152
PM->add(createLateLowerGCFramePass());
151153
// Remove dead use of ptls
152154
PM->add(createDeadCodeEliminationPass());
@@ -161,6 +163,12 @@ void addOptimizationPasses(legacy::PassManagerBase *PM, int opt_level)
161163
PM->add(createAlwaysInlinerPass()); // Respect always_inline
162164
#endif
163165

166+
#if JL_LLVM_VERSION >= 50000
167+
// Running `memcpyopt` between this and `sroa` seems to give `sroa` a hard time
168+
// merging the `alloca` for the unboxed data and the `alloca` created by the `alloc_opt`
169+
// pass.
170+
PM->add(createAllocOptPass(true));
171+
#endif
164172
PM->add(createInstructionCombiningPass()); // Cleanup for scalarrepl.
165173
PM->add(createSROAPass()); // Break up aggregate allocas
166174
PM->add(createInstructionCombiningPass()); // Cleanup for scalarrepl.

src/jitlayers.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -175,6 +175,7 @@ Pass *createLateLowerGCFramePass();
175175
Pass *createLowerExcHandlersPass();
176176
Pass *createGCInvariantVerifierPass(bool Strong);
177177
Pass *createPropagateJuliaAddrspaces();
178+
Pass *createAllocOptPass(bool);
178179
// Whether the Function is an llvm or julia intrinsic.
179180
static inline bool isIntrinsicFunction(Function *F)
180181
{

0 commit comments

Comments
 (0)