Skip to content

Commit 0b95a5d

Browse files
authored
Merge pull request #22912 from JuliaLang/kf/fixciregresssion
Fix recent CI compile time perf regression
2 parents 7181300 + 2d6a589 commit 0b95a5d

File tree

8 files changed

+81
-19
lines changed

8 files changed

+81
-19
lines changed

base/options.jl

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -34,11 +34,18 @@ struct JLOptions
3434
bindto::Ptr{UInt8}
3535
outputbc::Ptr{UInt8}
3636
outputunoptbc::Ptr{UInt8}
37+
outputjitbc::Ptr{UInt8}
3738
outputo::Ptr{UInt8}
3839
outputji::Ptr{UInt8}
3940
incremental::Int8
4041
end
4142

43+
# This runs early in the sysimage != is not defined yet
44+
if sizeof(JLOptions) === ccall(:jl_sizeof_jl_options, Int, ())
45+
else
46+
ccall(:jl_throw, Void, (Any,), "Option structure mismatch")
47+
end
48+
4249
JLOptions() = unsafe_load(cglobal(:jl_options, JLOptions))
4350

4451
function show(io::IO, opt::JLOptions)

doc/src/devdocs/llvm.md

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -80,6 +80,12 @@ cc -shared -o sys.so sys.o
8080
```
8181
This system image can then be loaded by `julia` as usual.
8282

83+
Alternatively, you can
84+
use `--output-jit-bc jit.bc` to obtain a trace of all IR passed to the JIT.
85+
This is useful for code that cannot be run as part of the sysimg generation
86+
process (e.g. because it creates unserializable state). However, the resulting
87+
`jit.bc` does not include sysimage data, and can thus not be used as such.
88+
8389
It is also possible to dump an LLVM IR module for just one Julia function,
8490
using:
8591
```julia

src/jitlayers.cpp

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -947,7 +947,7 @@ void* jl_get_globalvar(GlobalVariable *gv)
947947
void jl_add_to_shadow(Module *m)
948948
{
949949
#ifndef KEEP_BODIES
950-
if (!imaging_mode)
950+
if (!imaging_mode && !jl_options.outputjitbc)
951951
return;
952952
#endif
953953
ValueToValueMapTy VMap;
@@ -1064,7 +1064,6 @@ extern "C"
10641064
void jl_dump_native(const char *bc_fname, const char *unopt_bc_fname, const char *obj_fname, const char *sysimg_data, size_t sysimg_len)
10651065
{
10661066
JL_TIMING(NATIVE_DUMP);
1067-
assert(imaging_mode);
10681067
// We don't want to use MCJIT's target machine because
10691068
// it uses the large code model and we may potentially
10701069
// want less optimizations there.
@@ -1161,7 +1160,8 @@ void jl_dump_native(const char *bc_fname, const char *unopt_bc_fname, const char
11611160
#endif
11621161

11631162
// add metadata information
1164-
jl_gen_llvm_globaldata(shadow_output, sysimg_data, sysimg_len);
1163+
if (imaging_mode)
1164+
jl_gen_llvm_globaldata(shadow_output, sysimg_data, sysimg_len);
11651165

11661166
// do the actual work
11671167
PM.run(*shadow_output);

src/jloptions.c

Lines changed: 15 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -127,7 +127,11 @@ static const char opts[] =
127127
// compiler output options
128128
" --output-o name Generate an object file (including system image data)\n"
129129
" --output-ji name Generate a system image data file (.ji)\n"
130-
" --output-unopt-bc name Generate unoptimized LLVM bitcode (.bc)\n"
130+
// These are for compiler debugging purposes only and should not be otherwise
131+
// used, so don't show them here. See the devdocs for tips on using these
132+
// options for debugging the compiler.
133+
// " --output-unopt-bc name Generate unoptimized LLVM bitcode (.bc)\n"
134+
// " --output-jit-bc name Dump all IR generated by the frontend (not including system image)\n"
131135
" --output-bc name Generate LLVM bitcode (.bc)\n"
132136
" --output-incremental=no Generate an incremental output file (rather than complete)\n\n"
133137

@@ -148,6 +152,7 @@ JL_DLLEXPORT void jl_parse_opts(int *argcp, char ***argvp)
148152
opt_code_coverage,
149153
opt_track_allocation,
150154
opt_check_bounds,
155+
opt_output_jit_bc,
151156
opt_output_unopt_bc,
152157
opt_output_bc,
153158
opt_depwarn,
@@ -191,6 +196,7 @@ JL_DLLEXPORT void jl_parse_opts(int *argcp, char ***argvp)
191196
{ "check-bounds", required_argument, 0, opt_check_bounds },
192197
{ "output-bc", required_argument, 0, opt_output_bc },
193198
{ "output-unopt-bc", required_argument, 0, opt_output_unopt_bc },
199+
{ "output-jit-bc", required_argument, 0, opt_output_jit_bc },
194200
{ "output-o", required_argument, 0, opt_output_o },
195201
{ "output-ji", required_argument, 0, opt_output_ji },
196202
{ "output-incremental",required_argument, 0, opt_incremental },
@@ -439,6 +445,9 @@ JL_DLLEXPORT void jl_parse_opts(int *argcp, char ***argvp)
439445
jl_options.outputbc = optarg;
440446
if (!jl_options.image_file_specified) jl_options.image_file = NULL;
441447
break;
448+
case opt_output_jit_bc:
449+
jl_options.outputjitbc = optarg;
450+
break;
442451
case opt_output_unopt_bc:
443452
jl_options.outputunoptbc = optarg;
444453
if (!jl_options.image_file_specified) jl_options.image_file = NULL;
@@ -542,3 +551,8 @@ JL_DLLEXPORT void jl_set_ARGS(int argc, char **argv)
542551
}
543552
}
544553
}
554+
555+
JL_DLLEXPORT ssize_t jl_sizeof_jl_options(void)
556+
{
557+
return sizeof(jl_options_t);
558+
}

src/julia.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1734,13 +1734,15 @@ typedef struct {
17341734
const char *bindto;
17351735
const char *outputbc;
17361736
const char *outputunoptbc;
1737+
const char *outputjitbc;
17371738
const char *outputo;
17381739
const char *outputji;
17391740
int8_t incremental;
17401741
int8_t image_file_specified;
17411742
} jl_options_t;
17421743

17431744
extern JL_DLLEXPORT jl_options_t jl_options;
1745+
JL_DLLEXPORT ssize_t jl_sizeof_jl_options(void);
17441746

17451747
// Parse an argc/argv pair to extract general julia options, passing back out
17461748
// any arguments that should be passed on to the script.

src/llvm-late-gc-lowering.cpp

Lines changed: 17 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -1188,7 +1188,7 @@ static void AddInPredLiveOuts(BasicBlock *BB, BitVector &LiveIn, State &S)
11881188
BB = &*WorkList.back();
11891189
WorkList.pop_back();
11901190
for (BasicBlock *Pred : predecessors(BB)) {
1191-
if (Visited.insert(Pred).second)
1191+
if (!Visited.insert(Pred).second)
11921192
continue;
11931193
if (!S.BBStates[Pred].HasSafepoint) {
11941194
WorkList.push_back(Pred);
@@ -1206,7 +1206,9 @@ static void AddInPredLiveOuts(BasicBlock *BB, BitVector &LiveIn, State &S)
12061206
}
12071207
}
12081208

1209-
void LateLowerGCFrame::PlaceGCFrameStore(State &S, unsigned R, unsigned MinColorRoot, const std::vector<int> &Colors, Value *GCFrame, Instruction *InsertionPoint) {
1209+
void LateLowerGCFrame::PlaceGCFrameStore(State &S, unsigned R, unsigned MinColorRoot,
1210+
const std::vector<int> &Colors, Value *GCFrame,
1211+
Instruction *InsertionPoint) {
12101212
Value *Val = GetPtrForNumber(S, R, InsertionPoint);
12111213
Value *args[1] = {
12121214
ConstantInt::get(T_int32, Colors[R]+MinColorRoot)
@@ -1222,26 +1224,27 @@ void LateLowerGCFrame::PlaceGCFrameStore(State &S, unsigned R, unsigned MinColor
12221224
new StoreInst(Val, gep, InsertionPoint);
12231225
}
12241226

1225-
void LateLowerGCFrame::PlaceGCFrameStores(Function &F, State &S, unsigned MinColorRoot, const std::vector<int> &Colors, Value *GCFrame)
1227+
void LateLowerGCFrame::PlaceGCFrameStores(Function &F, State &S, unsigned MinColorRoot,
1228+
const std::vector<int> &Colors, Value *GCFrame)
12261229
{
12271230
for (auto &BB : F) {
1228-
if (!S.BBStates[&BB].HasSafepoint) {
1231+
const BBState &BBS = S.BBStates[&BB];
1232+
if (!BBS.HasSafepoint) {
12291233
continue;
12301234
}
12311235
BitVector LiveIn;
12321236
AddInPredLiveOuts(&BB, LiveIn, S);
1233-
for(auto rit = S.BBStates[&BB].Safepoints.rbegin();
1234-
rit != S.BBStates[&BB].Safepoints.rend(); ++rit ) {
1235-
// Find those that become live, but were not before
1236-
BitVector NowLive = S.LiveSets[*rit];
1237-
LiveIn.resize(NowLive.size(), 0);
1238-
LiveIn.flip();
1239-
NowLive &= LiveIn;
1237+
const BitVector *LastLive = &LiveIn;
1238+
for(auto rit = BBS.Safepoints.rbegin();
1239+
rit != BBS.Safepoints.rend(); ++rit ) {
1240+
const BitVector &NowLive = S.LiveSets[*rit];
12401241
for (int Idx = NowLive.find_first(); Idx >= 0; Idx = NowLive.find_next(Idx)) {
1241-
PlaceGCFrameStore(S, Idx, MinColorRoot, Colors, GCFrame,
1242-
S.ReverseSafepointNumbering[*rit]);
1242+
if (!HasBitSet(*LastLive, Idx)) {
1243+
PlaceGCFrameStore(S, Idx, MinColorRoot, Colors, GCFrame,
1244+
S.ReverseSafepointNumbering[*rit]);
1245+
}
12431246
}
1244-
LiveIn = S.LiveSets[*rit];
1247+
LastLive = &NowLive;
12451248
}
12461249
}
12471250
}

src/precompile.c

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -24,8 +24,11 @@ void jl_precompile(int all);
2424

2525
void jl_write_compiler_output(void)
2626
{
27-
if (!jl_generating_output())
27+
if (!jl_generating_output()) {
28+
if (jl_options.outputjitbc)
29+
jl_dump_native(NULL, jl_options.outputjitbc, NULL, NULL, 0);
2830
return;
31+
}
2932

3033
if (!jl_options.incremental)
3134
jl_precompile(jl_options.compile_enabled == JL_OPTIONS_COMPILE_ALL);
@@ -35,6 +38,10 @@ void jl_write_compiler_output(void)
3538
return;
3639
}
3740

41+
if (jl_options.outputjitbc) {
42+
jl_printf(JL_STDERR, "WARNING: --output-jit-bc is meaningless with options for dumping sysimage data\n");
43+
}
44+
3845
jl_array_t *worklist = jl_module_init_order;
3946
JL_GC_PUSH1(&worklist);
4047
jl_module_init_order = jl_alloc_vec_any(0);

test/llvmpasses/gcroots.ll

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
declare void @boxed_simple(%jl_value_t addrspace(10)*, %jl_value_t addrspace(10)*)
66
declare %jl_value_t addrspace(10)* @jl_box_int64(i64)
77
declare %jl_value_t*** @jl_get_ptls_states()
8+
declare void @jl_safepoint()
89
declare %jl_value_t addrspace(10)* @jl_apply_generic(%jl_value_t addrspace(10)*, %jl_value_t addrspace(10)**, i32)
910

1011
define void @simple(i64 %a, i64 %b) {
@@ -184,3 +185,25 @@ define void @global_ref() {
184185
call void @one_arg_boxed(%jl_value_t addrspace(10)* %loaded)
185186
ret void
186187
}
188+
189+
define %jl_value_t addrspace(10)* @no_redundant_rerooting(i64 %a, i1 %cond) {
190+
; CHECK-LABEL: @no_redundant_rerooting
191+
; CHECK: %gcframe = alloca %jl_value_t addrspace(10)*, i32 3
192+
top:
193+
%ptls = call %jl_value_t*** @jl_get_ptls_states()
194+
%aboxed = call %jl_value_t addrspace(10)* @jl_box_int64(i64 signext %a)
195+
; CHECK: store %jl_value_t addrspace(10)* %aboxed
196+
; CHECK-NEXT: call void @jl_safepoint()
197+
call void @jl_safepoint()
198+
br i1 %cond, label %blocka, label %blockb
199+
blocka:
200+
; CHECK-NOT: call void @jl_safepoint()
201+
; CHECK: call void @jl_safepoint()
202+
call void @jl_safepoint()
203+
ret %jl_value_t addrspace(10)* %aboxed
204+
blockb:
205+
; CHECK-NOT: call void @jl_safepoint()
206+
; CHECK: call void @jl_safepoint()
207+
call void @jl_safepoint()
208+
ret %jl_value_t addrspace(10)* %aboxed
209+
}

0 commit comments

Comments
 (0)