Skip to content

Commit 90494c2

Browse files
authored
Fix Expr(:loopinfo) codegen (#50663)
We used a loop-marker intrinsic because the LoopID used to be dropped by optimization passes (this seems no longer true). #50660 is an example of a miscompilation where a loop of length 1 got optimized by simplifycfg to the point where the loop-marker is now attached to the wrong back-edge. This PR drops the loop-marker and uses the LoopID metadata node directly.
1 parent 91093fe commit 90494c2

11 files changed

+210
-204
lines changed

doc/src/devdocs/llvm-passes.md

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -58,9 +58,9 @@ This pass removes the non-integral address spaces from the module's datalayout s
5858

5959
* Filename: `llvm-simdloop.cpp`
6060
* Class Name: `LowerSIMDLoopPass`
61-
* Opt Name: `module(LowerSIMDLoop)`
61+
* Opt Name: `loop(LowerSIMDLoop)`
6262

63-
This pass acts as the main driver of the `@simd` annotation. Codegen inserts a call to a marker intrinsic (`julia.simdloop`), which this pass uses to identify loops that were originally marked with `@simd`. Then, this pass looks for a chain of floating point operations that form a reduce and adds the `contract` and `reassoc` fast math flags to allow reassociation (and thus vectorization). This pass does not preserve either loop information nor inference correctness, so it may violate Julia semantics in surprising ways. If the loop was annotated with `ivdep` as well, then the pass marks the loop as having no loop-carried dependencies (the resulting behavior is undefined if the user annotation was incorrect or gets applied to the wrong loop).
63+
This pass acts as the main driver of the `@simd` annotation. Codegen inserts a `!llvm.loopid` marker at the back branch of a loop, which this pass uses to identify loops that were originally marked with `@simd`. Then, this pass looks for a chain of floating point operations that form a reduce and adds the `contract` and `reassoc` fast math flags to allow reassociation (and thus vectorization). This pass does not preserve either loop information nor inference correctness, so it may violate Julia semantics in surprising ways. If the loop was annotated with `ivdep` as well, then the pass marks the loop as having no loop-carried dependencies (the resulting behavior is undefined if the user annotation was incorrect or gets applied to the wrong loop).
6464

6565
### LowerPTLS
6666

src/codegen.cpp

Lines changed: 30 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -992,14 +992,7 @@ static const auto jl_typeof_func = new JuliaFunction<>{
992992
Attributes(C, {Attribute::NonNull}),
993993
None); },
994994
};
995-
static const auto jl_loopinfo_marker_func = new JuliaFunction<>{
996-
"julia.loopinfo_marker",
997-
[](LLVMContext &C) { return FunctionType::get(getVoidTy(C), false); },
998-
[](LLVMContext &C) { return AttributeList::get(C,
999-
Attributes(C, {Attribute::ReadOnly, Attribute::NoRecurse, Attribute::InaccessibleMemOnly}),
1000-
AttributeSet(),
1001-
None); },
1002-
};
995+
1003996
static const auto jl_write_barrier_func = new JuliaFunction<>{
1004997
"julia.write_barrier",
1005998
[](LLVMContext &C) { return FunctionType::get(getVoidTy(C),
@@ -1609,6 +1602,7 @@ class jl_codectx_t {
16091602
jl_codegen_params_t &emission_context;
16101603
llvm::MapVector<jl_code_instance_t*, jl_codegen_call_target_t> call_targets;
16111604
Function *f = NULL;
1605+
MDNode* LoopID = NULL;
16121606
// local var info. globals are not in here.
16131607
std::vector<jl_varinfo_t> slots;
16141608
std::map<int, jl_varinfo_t> phic_slots;
@@ -5773,16 +5767,22 @@ static jl_cgval_t emit_expr(jl_codectx_t &ctx, jl_value_t *expr, ssize_t ssaidx_
57735767
}
57745768
else if (head == jl_loopinfo_sym) {
57755769
// parse Expr(:loopinfo, "julia.simdloop", ("llvm.loop.vectorize.width", 4))
5770+
// to LLVM LoopID
57765771
SmallVector<Metadata *, 8> MDs;
5772+
5773+
// Reserve first location for self reference to the LoopID metadata node.
5774+
TempMDTuple TempNode = MDNode::getTemporary(ctx.builder.getContext(), None);
5775+
MDs.push_back(TempNode.get());
5776+
57775777
for (int i = 0, ie = nargs; i < ie; ++i) {
57785778
Metadata *MD = to_md_tree(args[i], ctx.builder.getContext());
57795779
if (MD)
57805780
MDs.push_back(MD);
57815781
}
57825782

5783-
MDNode* MD = MDNode::get(ctx.builder.getContext(), MDs);
5784-
CallInst *I = ctx.builder.CreateCall(prepare_call(jl_loopinfo_marker_func));
5785-
I->setMetadata("julia.loopinfo", MD);
5783+
ctx.LoopID = MDNode::getDistinct(ctx.builder.getContext(), MDs);
5784+
// Replace the temporary node with a self-reference.
5785+
ctx.LoopID->replaceOperandWith(0, ctx.LoopID);
57865786
return jl_cgval_t();
57875787
}
57885788
else if (head == jl_leave_sym || head == jl_coverageeffect_sym
@@ -8125,6 +8125,7 @@ static jl_llvm_functions_t
81258125
std::map<int, BasicBlock*> BB;
81268126
std::map<size_t, BasicBlock*> come_from_bb;
81278127
int cursor = 0;
8128+
int current_label = 0;
81288129
auto find_next_stmt = [&] (int seq_next) {
81298130
// new style ir is always in dominance order, but frontend IR might not be
81308131
// `seq_next` is the next statement we want to emit
@@ -8141,6 +8142,7 @@ static jl_llvm_functions_t
81418142
workstack.pop_back();
81428143
auto nextbb = BB.find(item + 1);
81438144
if (nextbb == BB.end()) {
8145+
// Not a BB
81448146
cursor = item;
81458147
return;
81468148
}
@@ -8151,8 +8153,10 @@ static jl_llvm_functions_t
81518153
seq_next = -1;
81528154
// if this BB is non-empty, we've visited it before so skip it
81538155
if (!nextbb->second->getTerminator()) {
8156+
// New BB
81548157
ctx.builder.SetInsertPoint(nextbb->second);
81558158
cursor = item;
8159+
current_label = item;
81568160
return;
81578161
}
81588162
}
@@ -8399,7 +8403,12 @@ static jl_llvm_functions_t
83998403
if (jl_is_gotonode(stmt)) {
84008404
int lname = jl_gotonode_label(stmt);
84018405
come_from_bb[cursor+1] = ctx.builder.GetInsertBlock();
8402-
ctx.builder.CreateBr(BB[lname]);
8406+
auto br = ctx.builder.CreateBr(BB[lname]);
8407+
// Check if backwards branch
8408+
if (ctx.LoopID && lname <= current_label) {
8409+
br->setMetadata(LLVMContext::MD_loop, ctx.LoopID);
8410+
ctx.LoopID = NULL;
8411+
}
84038412
find_next_stmt(lname - 1);
84048413
continue;
84058414
}
@@ -8417,10 +8426,17 @@ static jl_llvm_functions_t
84178426
workstack.push_back(lname - 1);
84188427
BasicBlock *ifnot = BB[lname];
84198428
BasicBlock *ifso = BB[cursor+2];
8429+
Instruction *br;
84208430
if (ifnot == ifso)
8421-
ctx.builder.CreateBr(ifnot);
8431+
br = ctx.builder.CreateBr(ifnot);
84228432
else
8423-
ctx.builder.CreateCondBr(isfalse, ifnot, ifso);
8433+
br = ctx.builder.CreateCondBr(isfalse, ifnot, ifso);
8434+
8435+
// Check if backwards branch
8436+
if (ctx.LoopID && lname <= current_label) {
8437+
br->setMetadata(LLVMContext::MD_loop, ctx.LoopID);
8438+
ctx.LoopID = NULL;
8439+
}
84248440
find_next_stmt(cursor + 1);
84258441
continue;
84268442
}
@@ -9163,7 +9179,6 @@ static void init_jit_functions(void)
91639179
add_named_global(jl_object_id__func, &jl_object_id_);
91649180
add_named_global(jl_alloc_obj_func, (void*)NULL);
91659181
add_named_global(jl_newbits_func, (void*)jl_new_bits);
9166-
add_named_global(jl_loopinfo_marker_func, (void*)NULL);
91679182
add_named_global(jl_typeof_func, (void*)NULL);
91689183
add_named_global(jl_write_barrier_func, (void*)NULL);
91699184
add_named_global(jldlsym_func, &jl_load_and_lookup);

src/jl_exported_funcs.inc

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -582,7 +582,6 @@
582582
YY(LLVMExtraAddCPUFeaturesPass) \
583583
YY(LLVMExtraMPMAddCPUFeaturesPass) \
584584
YY(LLVMExtraMPMAddRemoveNIPass) \
585-
YY(LLVMExtraMPMAddLowerSIMDLoopPass) \
586585
YY(LLVMExtraMPMAddMultiVersioningPass) \
587586
YY(LLVMExtraMPMAddRemoveJuliaAddrspacesPass) \
588587
YY(LLVMExtraMPMAddRemoveAddrspacesPass) \
@@ -596,6 +595,7 @@
596595
YY(LLVMExtraFPMAddGCInvariantVerifierPass) \
597596
YY(LLVMExtraFPMAddFinalLowerGCPass) \
598597
YY(LLVMExtraLPMAddJuliaLICMPass) \
598+
YY(LLVMExtraLPMAddLowerSIMDLoopPass) \
599599
YY(JLJITGetLLVMOrcExecutionSession) \
600600
YY(JLJITGetJuliaOJIT) \
601601
YY(JLJITGetExternalJITDylib) \

src/llvm-julia-passes.inc

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,6 @@
22
#ifdef MODULE_PASS
33
MODULE_PASS("CPUFeatures", CPUFeaturesPass, CPUFeaturesPass())
44
MODULE_PASS("RemoveNI", RemoveNIPass, RemoveNIPass())
5-
MODULE_PASS("LowerSIMDLoop", LowerSIMDLoopPass, LowerSIMDLoopPass())
65
MODULE_PASS("JuliaMultiVersioning", MultiVersioningPass, MultiVersioningPass())
76
MODULE_PASS("RemoveJuliaAddrspaces", RemoveJuliaAddrspacesPass, RemoveJuliaAddrspacesPass())
87
MODULE_PASS("RemoveAddrspaces", RemoveAddrspacesPass, RemoveAddrspacesPass())
@@ -24,4 +23,5 @@ FUNCTION_PASS("FinalLowerGC", FinalLowerGCPass, FinalLowerGCPass())
2423
//Loop passes
2524
#ifdef LOOP_PASS
2625
LOOP_PASS("JuliaLICM", JuliaLICMPass, JuliaLICMPass())
26+
LOOP_PASS("LowerSIMDLoop", LowerSIMDLoopPass, LowerSIMDLoopPass())
2727
#endif

0 commit comments

Comments
 (0)