Skip to content

Commit 6ce15fb

Browse files
committed
Add LLVM level allocation optimization pass
This can obtain escape information with much higher precision than what we can currently do in typeinf. However, it does not replace the alloc_elim_pass! in type inference either since this cannot handle objects with reference fields. Fix #20452
1 parent 4bbde8b commit 6ce15fb

9 files changed

+710
-45
lines changed

src/Makefile

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -50,7 +50,7 @@ endif
5050
LLVMLINK :=
5151

5252
ifeq ($(JULIACODEGEN),LLVM)
53-
SRCS += codegen jitlayers disasm debuginfo llvm-simdloop llvm-ptls llvm-late-gc-lowering llvm-lower-handlers llvm-gc-invariant-verifier llvm-propagate-addrspaces cgmemmgr
53+
SRCS += codegen jitlayers disasm debuginfo llvm-simdloop llvm-ptls llvm-late-gc-lowering llvm-lower-handlers llvm-gc-invariant-verifier llvm-propagate-addrspaces llvm-alloc-opt cgmemmgr
5454
FLAGS += -I$(shell $(LLVM_CONFIG_HOST) --includedir)
5555
LLVM_LIBS := all
5656
ifeq ($(USE_POLLY),1)

src/ccall.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2143,7 +2143,7 @@ jl_cgval_t function_sig_t::emit_a_ccall(
21432143
size_t rtsz = jl_datatype_size(rt);
21442144
assert(rtsz > 0);
21452145
Value *strct = emit_allocobj(ctx, rtsz, runtime_bt);
2146-
int boxalign = jl_gc_alignment(rtsz);
2146+
int boxalign = jl_datatype_align(rt);
21472147
#ifndef JL_NDEBUG
21482148
#if JL_LLVM_VERSION >= 40000
21492149
const DataLayout &DL = jl_data_layout;

src/cgutils.cpp

Lines changed: 3 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -2149,25 +2149,10 @@ static Value *emit_allocobj(jl_codectx_t &ctx, size_t static_size, Value *jt)
21492149
{
21502150
JL_FEAT_REQUIRE(ctx, dynamic_alloc);
21512151
JL_FEAT_REQUIRE(ctx, runtime);
2152-
2153-
int osize;
2154-
int offset = jl_gc_classify_pools(static_size, &osize);
21552152
Value *ptls_ptr = emit_bitcast(ctx, ctx.ptlsStates, T_pint8);
2156-
Value *v;
2157-
if (offset < 0) {
2158-
Value *args[] = {ptls_ptr,
2159-
ConstantInt::get(T_size, static_size + sizeof(void*))};
2160-
v = ctx.builder.CreateCall(prepare_call(jlalloc_big_func),
2161-
ArrayRef<Value*>(args, 2));
2162-
}
2163-
else {
2164-
Value *pool_offs = ConstantInt::get(T_int32, offset);
2165-
Value *args[] = {ptls_ptr, pool_offs, ConstantInt::get(T_int32, osize)};
2166-
v = ctx.builder.CreateCall(prepare_call(jlalloc_pool_func),
2167-
ArrayRef<Value*>(args, 3));
2168-
}
2169-
tbaa_decorate(tbaa_tag, ctx.builder.CreateStore(maybe_decay_untracked(jt), emit_typeptr_addr(ctx, v)));
2170-
return v;
2153+
return ctx.builder.CreateCall(prepare_call(jl_alloc_obj_func),
2154+
{ptls_ptr, ConstantInt::get(T_size, static_size),
2155+
maybe_decay_untracked(jt)});
21712156
}
21722157

21732158
// if ptr is NULL this emits a write barrier _back_

src/codegen.cpp

Lines changed: 14 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -315,8 +315,7 @@ static Function *jlgenericfunction_func;
315315
static Function *jlenter_func;
316316
static Function *jlleave_func;
317317
static Function *jlegal_func;
318-
static Function *jlalloc_pool_func;
319-
static Function *jlalloc_big_func;
318+
static Function *jl_alloc_obj_func;
320319
static Function *jlisa_func;
321320
static Function *jlsubtype_func;
322321
static Function *jlapplytype_func;
@@ -6635,24 +6634,19 @@ static void init_julia_llvm_env(Module *m)
66356634
"jl_instantiate_type_in_env", m);
66366635
add_named_global(jlapplytype_func, &jl_instantiate_type_in_env);
66376636

6638-
std::vector<Type*> alloc_pool_args(0);
6639-
alloc_pool_args.push_back(T_pint8);
6640-
alloc_pool_args.push_back(T_int32);
6641-
alloc_pool_args.push_back(T_int32);
6642-
jlalloc_pool_func =
6643-
Function::Create(FunctionType::get(T_prjlvalue, alloc_pool_args, false),
6644-
Function::ExternalLinkage,
6645-
"jl_gc_pool_alloc", m);
6646-
add_named_global(jlalloc_pool_func, &jl_gc_pool_alloc);
6647-
6648-
std::vector<Type*> alloc_big_args(0);
6649-
alloc_big_args.push_back(T_pint8);
6650-
alloc_big_args.push_back(T_size);
6651-
jlalloc_big_func =
6652-
Function::Create(FunctionType::get(T_prjlvalue, alloc_big_args, false),
6653-
Function::ExternalLinkage,
6654-
"jl_gc_big_alloc", m);
6655-
add_named_global(jlalloc_big_func, &jl_gc_big_alloc);
6637+
std::vector<Type*> gc_alloc_args(0);
6638+
gc_alloc_args.push_back(T_pint8);
6639+
gc_alloc_args.push_back(T_size);
6640+
gc_alloc_args.push_back(T_prjlvalue);
6641+
jl_alloc_obj_func = Function::Create(FunctionType::get(T_prjlvalue, gc_alloc_args, false),
6642+
Function::ExternalLinkage,
6643+
"julia.gc_alloc_obj");
6644+
#if JL_LLVM_VERSION >= 50000
6645+
jl_alloc_obj_func->addAttribute(AttributeList::ReturnIndex, Attribute::NoAlias);
6646+
#else
6647+
jl_alloc_obj_func->addAttribute(AttributeSet::ReturnIndex, Attribute::NoAlias);
6648+
#endif
6649+
add_named_global(jl_alloc_obj_func, (void*)NULL, /*dllimport*/false);
66566650

66576651
std::vector<Type *> dlsym_args(0);
66586652
dlsym_args.push_back(T_pint8);

src/intrinsics.cpp

Lines changed: 1 addition & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -325,11 +325,7 @@ static Value *emit_unbox(jl_codectx_t &ctx, Type *to, const jl_cgval_t &x, jl_va
325325
}
326326

327327
int alignment;
328-
if (x.isboxed) {
329-
// julia's gc gives 16-byte aligned addresses
330-
alignment = 16;
331-
}
332-
else if (jt) {
328+
if (jt) {
333329
alignment = julia_alignment(p, jt, 0);
334330
}
335331
else {

src/jitlayers.cpp

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -108,6 +108,7 @@ void addOptimizationPasses(legacy::PassManagerBase *PM, int opt_level)
108108
#endif
109109
if (opt_level == 0) {
110110
PM->add(createCFGSimplificationPass()); // Clean up disgusting code
111+
PM->add(createAllocOptPass(false));
111112
#if JL_LLVM_VERSION < 50000
112113
PM->add(createBarrierNoopPass());
113114
PM->add(createLowerExcHandlersPass());
@@ -147,6 +148,7 @@ void addOptimizationPasses(legacy::PassManagerBase *PM, int opt_level)
147148
// effectiveness of the optimization, but should retain correctness.
148149
#if JL_LLVM_VERSION < 50000
149150
PM->add(createLowerExcHandlersPass());
151+
PM->add(createAllocOptPass(true));
150152
PM->add(createLateLowerGCFramePass());
151153
// Remove dead use of ptls
152154
PM->add(createDeadCodeEliminationPass());
@@ -161,6 +163,12 @@ void addOptimizationPasses(legacy::PassManagerBase *PM, int opt_level)
161163
PM->add(createAlwaysInlinerPass()); // Respect always_inline
162164
#endif
163165

166+
#if JL_LLVM_VERSION >= 50000
167+
// Running `memcpyopt` between this and `sroa` seems to give `sroa` a hard time
168+
// merging the `alloca` for the unboxed data and the `alloca` created by the `alloc_opt`
169+
// pass.
170+
PM->add(createAllocOptPass(true));
171+
#endif
164172
PM->add(createInstructionCombiningPass()); // Cleanup for scalarrepl.
165173
PM->add(createSROAPass()); // Break up aggregate allocas
166174
PM->add(createInstructionCombiningPass()); // Cleanup for scalarrepl.

src/jitlayers.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -206,6 +206,7 @@ Pass *createLateLowerGCFramePass();
206206
Pass *createLowerExcHandlersPass();
207207
Pass *createGCInvariantVerifierPass(bool Strong);
208208
Pass *createPropagateJuliaAddrspaces();
209+
Pass *createAllocOptPass(bool);
209210
// Whether the Function is an llvm or julia intrinsic.
210211
static inline bool isIntrinsicFunction(Function *F)
211212
{

0 commit comments

Comments
 (0)