diff --git a/doc/src/devdocs/llvm.md b/doc/src/devdocs/llvm.md index 35266648de520..65aa8b7ef2c88 100644 --- a/doc/src/devdocs/llvm.md +++ b/doc/src/devdocs/llvm.md @@ -107,3 +107,184 @@ study it and the pass of interest in isolation. 4. Strip the debug metadata and fix up the TBAA metadata by hand. The last step is labor intensive. Suggestions on a better way would be appreciated. + +## The jlcall calling convention + +Julia has a generic calling convention for unoptimized code, which looks somewhat +as follows: +``` + jl_value_t *any_unoptimized_call(jl_value_t *, jl_value_t **, int); +``` +where the first argument is the boxed function object, the second argument is +an on-stack array of arguments and the third is the number of arguments. Now, +we could perform a straightforward lowering and emit an alloca for the argument +array. However, this would betray the SSA nature of the uses at the callsite, +making optimizations (including GC root placement), significantly harder. +Instead, we emit it as follows: +``` + %bitcast = bitcast @any_unoptimized_call to %jl_value_t *(*)(%jl_value_t *, %jl_value_t *) + call cc 37 %jl_value_t *%bitcast(%jl_value_t *%arg1, %jl_value_t *%arg2) +``` +The special `cc 37` annotation marks the fact that this call site is really using +jlcall calling convention. This allows us to retain the SSA-ness of the +uses throughout the optimizer. GC root placement will later lower this call to +the original C ABI. In the code the calling convention number is represented by +the `JLCALL_F_CC` constant. In addition, there ist the `JLCALL_CC` calling +convention which functions similarly, but omits the first argument. + +## GC root placement + +GC root placement is done by an LLVM late in the pass pipeline. Doing GC root +placement this late enables LLVM to make more aggressive optimizations around +code that requires GC roots, as well as allowing us to reduce the number of +required GC roots and GC root store operations (since LLVM doesn't understand +our GC, it wouldn't otherwise know what it is and is not allowed to do with +values stored to the GC frame, so it'll conservatively do very little). As an +example, consider an error path +``` + if some_condition() + #= Use some variables maybe =# + error("An error occurred") + end +``` +During constant folding, LLVM may discover that the condition is always false, +and can remove the basic block. However, if GC root lowering is done early, +the GC root slots used in the deleted block, as well as any values kept alive +in those slots only because they were used in the error path, would be kept +alive by LLVM. By doing GC root lowering late, we give LLVM the license to do +any of its usual optimizations (constant folding, dead code elimination, etc.), +without having to worry (too much) about which values may or may not be gc +tracked. + +However, in order to be able to do late GC root placement, we need to be able to +identify a) which pointers are gc tracked and b) all uses of such pointers. The +goal of the GC placement pass is thus simple: + +Minimize the number of needed gc roots/stores to them subject to the constraint +that at every safepoint, any live gc-tracked pointer (i.e. for which there is +a path after this point that contains a use of this pointer) is in some gc slot. + +### Representation + +The primary difficulty is thus choosing an IR representation that allows us to +identify gc-tracked pointers and their uses, even after the program has been +run through the optimizer. Our design makes use of three LLVM features to achieve +this: +- Custom address spaces +- Operand Bundles +- non-integral pointers + +Custom address spaces allow us to tag every point with an integer that needs +to be preserved through optimizations. The compiler may not insert casts between +address spaces that did not exist in the original program and it must never +change the address space of a pointer on a load/store/etc operation. This allows +us to annotate which pointers are gc-tracked in an optimizer-resistant way. Note +that metadata would not be able to achieve the same purpose. Metadata is supposed +to always be discardable without altering the semantics of the program. However, +failing to identify a gc-tracked pointer alters the resulting program behavior +dramatically - it'll probably crash or return wrong results. We currently use +three different addressspaces (their numbers are defined in src/codegen_shared.cpp): + +- GC Tracked Pointers (currently 10): These are pointers to boxed values that may be put + into a GC frame. It is loosely equivalent to a `jl_value_t*` pointer on the C + side. N.B. It is illegal to ever have a pointer in this address space that may + not be stored to a GC slot. +- Derived Pointers (currently 11): These are pointers that are derived from some GC + tracked pointer. Uses of these pointers generate uses of the original pointer. + However, they need not themselves be known to the GC. The GC root placement + pass MUST always find the GC tracked pointer from which this pointer is + derived and use that as the pointer to root. +- Callee Rooted Pointers (currently 12): This is a utility address space to express the + notion of a callee rooted value. All values of this address space MUST be + storable to a GC root (though it is possible to relax this condition in the + future), but unlike the other pointers need not be rooted if passed to a + call (they do still need to be rooted if they are live across another safepoint + between the definition and the call). + +### Invariants. +The GC root placement pass makes use of several invariants, which need +to be observed by the frontend and are preserved by the optimizer. + +First, only the following addressspace casts are allowed +- 0->{Tracked,Derived,CalleeRooted}: It is allowable to decay an untracked pointer to any of the + other. However, do note that the optimizer has broad license to not root + such a value. It is never safe to have a value in addressspace 0 in any part + of the program if it is (or is derived from) a value that requires a GC root. +- Tracked->Derived: This is the standard decay route for interior values. The placement + pass will look for these to identify the base pointer for any use. +- Tracked->CalleeRooted: Addrspace CalleeRooted serves merely as a hint that a GC root is not + required. However, do note that the Derived->CalleeRooted decay is prohibited, since + pointers should generally be storable to a GC slot, even in this address space. + +Now let us consider what constitutes a use: +- Loads whose loaded values is in one of the address spaces +- Stores of a value in one of the address spaces to a location +- Stores to a pointer in one of the address spaces +- Calls for which a value in one of the address spaces is an operand +- Calls in jlcall ABI, for which the argument array contains a value +- Return instructions. + +We explicitly allow load/stores and simple calls in address spaces Tracked/Derived. Elements of jlcall +argument arrays must always be in address space Tracked (it is required by the ABI that +they are valid `jl_value_t*` pointers). The same is true for return instructions +(though note that struct return arguments are allowed to have any of the address +spaces). The only allowable use of an address space CalleRooted pointer is to pass it to +a call (which must have an appropriately typed operand). + +Further, we disallow getelementptr in addrspace Tracked. This is because unless +the operation is a noop, the resulting pointer will not be validly storable +to a GC slot and may thus not be in this address space. If such a pointer +is required, it should be decayed to addrspace Derived first. + +Lastly, we disallow inttoptr/ptrtoint instructions in these address spaces. +Having these instructions would mean that some i64 values are really gc tracked. +This is problematic, because it breaks that stated requirement that we're able +to identify gc-relevant pointers. This invariant is accomplished using the LLVM +"non-integral pointers" feature, which is new in LLVM 5.0. It prohibits the +optimizer from making optimizations that would introduce these operations. Note +we can still insert static constants at JIT time by using inttoptr in address +space 0 and then decaying to the appropriate address space afterwards. + +### Supporting ccall +One important aspect missing from the discussion so far is the handling of +`ccall`. `ccall` has the peculiar feature that the location and scope of a use +do not coincide. As an example consider: +``` +A = randn(1024) +ccall(:foo, Void, (Ptr{Float64},), A) +``` +In lowering, the compiler will insert a conversion from the array to the +pointer which drops the reference to the array value. However, we of course +need to make sure that the array does stay alive while we're doing the ccall. +To understand how this is done, first recall the lowering of the above code: +``` + return $(Expr(:foreigncall, :(:foo), Void, svec(Ptr{Float64}), :($(Expr(:foreigncall, :(:jl_array_ptr), Ptr{Float64}, svec(Any), :(A), 0))), :(A))) +``` +The last `:(A)`, is an extra argument list inserted during lowering that informs +the code generator which julia level values need to be kept alive for the +duration of this ccall. We then take this information and represent it in an +"operand bundle" at the IR level. An operand bundle is essentially a fake use +that is attached to the call site. At the IR level, this looks like so: +``` + call void inttoptr (i64 ... to void (double*)*)(double* %5) [ "jl_roots"(%jl_value_t addrspace(10)* %A) ] +``` +The GC root placement pass will treat the jl_roots operand bundle as if it were +a regular operand. However, as a final step, after the gc roots are inserted, +it will drop the operand bundle to avoid confusing instruction selection. + +### Supporting pointer_from_objref +`pointer_from_objref` is special because it requires the user to take explicit +control of GC rooting. By our above invariants, this function is illegal, +because it performs an addressspace cast from 10 to 0. However, it can be useful, +in certain situations, so we provide a special intrinsic: +``` +declared %jl_value_t *julia.pointer_from_objref(%jl_value_t addrspace(10)*) +``` +which is lowered to the corresponding address space cast after gc root lowering. +Do note however that by using this intrinsic, the caller assumes all responsibility +for making sure that the value in question is rooted. Further this intrinsic is +not considered a use, so the GC root placement pass will not provide a GC root +for the function. As a result, the external rooting must be arranged while the +value is still tracked by the system. I.e. it is not valid to attempt use the +result of this operation to establish a global root - the optimizer may have +already dropped the value. diff --git a/src/Makefile b/src/Makefile index ce5e8b63a5679..0140eb90f50e9 100644 --- a/src/Makefile +++ b/src/Makefile @@ -53,7 +53,7 @@ endif LLVMLINK := ifeq ($(JULIACODEGEN),LLVM) -SRCS += codegen jitlayers disasm debuginfo llvm-simdloop llvm-ptls llvm-gcroot llvm-lower-handlers cgmemmgr +SRCS += codegen jitlayers disasm debuginfo llvm-simdloop llvm-ptls llvm-late-gc-lowering llvm-lower-handlers llvm-gc-invariant-verifier llvm-propagate-addrspaces cgmemmgr FLAGS += -I$(shell $(LLVM_CONFIG_HOST) --includedir) LLVM_LIBS := all ifeq ($(USE_POLLY),1) diff --git a/src/ccall.cpp b/src/ccall.cpp index c50f0bb20e8e8..1fb38faf1dadc 100644 --- a/src/ccall.cpp +++ b/src/ccall.cpp @@ -459,7 +459,9 @@ static Value *llvm_type_rewrite( // sizes. Value *from; Value *to; -#if JL_LLVM_VERSION >= 30600 +#if JL_LLVM_VERSION >= 40000 + const DataLayout &DL = jl_data_layout; +#elif JL_LLVM_VERSION >= 30600 const DataLayout &DL = jl_ExecutionEngine->getDataLayout(); #else const DataLayout &DL = *jl_ExecutionEngine->getDataLayout(); @@ -485,8 +487,8 @@ static Value *runtime_apply_type(jl_value_t *ty, jl_unionall_t *unionall, jl_cod args[0] = literal_pointer_val(ty); args[1] = literal_pointer_val((jl_value_t*)ctx->linfo->def.method->sig); args[2] = builder.CreateInBoundsGEP( - LLVM37_param(T_pjlvalue) - emit_bitcast(ctx->spvals_ptr, T_ppjlvalue), + LLVM37_param(T_prjlvalue) + emit_bitcast(decay_derived(ctx->spvals_ptr), T_pprjlvalue), ConstantInt::get(T_size, sizeof(jl_svec_t) / sizeof(jl_value_t*))); return builder.CreateCall(prepare_call(jlapplytype_func), makeArrayRef(args)); } @@ -639,7 +641,7 @@ static Value *julia_to_native(Type *to, bool toboxed, jl_value_t *jlto, jl_union // We're passing Any if (toboxed) { assert(!byRef); // don't expect any ABI to pass pointers by pointer - return boxed(jvinfo, ctx); + return maybe_decay_untracked(boxed(jvinfo, ctx)); } assert(jl_is_datatype(jlto) && julia_struct_has_layout((jl_datatype_t*)jlto, jlto_env)); @@ -1208,7 +1210,9 @@ static jl_cgval_t mark_or_box_ccall_result(Value *result, bool isboxed, jl_value Value *runtime_dt = runtime_apply_type(rt, unionall, ctx); // TODO: is this leaf check actually necessary, or is it structurally guaranteed? emit_leafcheck(runtime_dt, "ccall: return type must be a leaf DataType", ctx); -#if JL_LLVM_VERSION >= 30600 +#if JL_LLVM_VERSION >= 40000 + const DataLayout &DL = jl_data_layout; +#elif JL_LLVM_VERSION >= 30600 const DataLayout &DL = jl_ExecutionEngine->getDataLayout(); #else const DataLayout &DL = *jl_ExecutionEngine->getDataLayout(); @@ -1306,7 +1310,7 @@ std::string generate_func_sig() #else paramattrs.push_back(AttributeSet::get(jl_LLVMContext, 1, retattrs)); #endif - fargt_sig.push_back(PointerType::get(lrt, 0)); + fargt_sig.push_back(PointerType::get(lrt, AddressSpace::Derived)); sret = 1; prt = lrt; } @@ -1349,6 +1353,8 @@ std::string generate_func_sig() } t = julia_struct_to_llvm(tti, unionall_env, &isboxed); + if (isboxed) + t = T_prjlvalue; if (t == NULL || t == T_void) { std::stringstream msg; msg << "ccall: the type of argument "; @@ -1369,7 +1375,7 @@ std::string generate_func_sig() pat = t; } else if (byRef) { - pat = PointerType::get(t, 0); + pat = PointerType::get(t, AddressSpace::Derived); } else { pat = abi->preferred_llvm_type((jl_datatype_t*)tti, false); @@ -1459,6 +1465,8 @@ static const std::string verify_ccall_sig(size_t nargs, jl_value_t *&rt, jl_valu lrt = julia_struct_to_llvm(rt, unionall_env, &retboxed); if (lrt == NULL) return "ccall: return type doesn't correspond to a C type"; + else if (retboxed) + lrt = T_prjlvalue; // is return type fully statically known? if (unionall_env == NULL) { @@ -1646,8 +1654,16 @@ static jl_cgval_t emit_ccall(jl_value_t **args, size_t nargs, jl_codectx_t *ctx) ary = emit_unbox(largty, emit_expr(argi, ctx), tti); } JL_GC_POP(); - return mark_or_box_ccall_result(emit_bitcast(ary, lrt), - retboxed, rt, unionall, static_rt, ctx); + if (!retboxed) { + return mark_or_box_ccall_result( + emit_bitcast(emit_pointer_from_objref( + emit_bitcast(ary, T_prjlvalue)), lrt), + retboxed, rt, unionall, static_rt, ctx); + } else { + return mark_or_box_ccall_result(maybe_decay_untracked( + emit_bitcast(ary, lrt)), + retboxed, rt, unionall, static_rt, ctx); + } } else if (is_libjulia_func(jl_cpu_pause)) { // Keep in sync with the julia_threads.h version @@ -1971,6 +1987,7 @@ jl_cgval_t function_sig_t::emit_a_ccall( ai + 1, ctx, &needStackRestore); bool issigned = jl_signed_type && jl_subtype(jargty, (jl_value_t*)jl_signed_type); if (byRef) { + v = decay_derived(v); // julia_to_native should already have done the alloca and store assert(v->getType() == pargty); } @@ -1986,6 +2003,13 @@ jl_cgval_t function_sig_t::emit_a_ccall( } v = julia_to_address(largty, jargty_in_env, unionall_env, arg, ai + 1, ctx, &needStackRestore); + if (isa(v)) { + JL_GC_POP(); + return jl_cgval_t(); + } + // A bit of a hack, but we're trying to get rid of this feature + // anyway. + v = emit_bitcast(emit_pointer_from_objref(v), pargty); assert((!toboxed && !byRef) || isa(v)); } @@ -2013,7 +2037,7 @@ jl_cgval_t function_sig_t::emit_a_ccall( literal_pointer_val((jl_value_t*)rt)); sretboxed = true; } - argvals[0] = emit_bitcast(result, fargt_sig.at(0)); + argvals[0] = emit_bitcast(decay_derived(result), fargt_sig.at(0)); } Instruction *stacksave = NULL; @@ -2101,9 +2125,11 @@ jl_cgval_t function_sig_t::emit_a_ccall( // Mark GC use before **and** after the ccall to make sure the arguments // are alive during the ccall even if the function called is `noreturn`. mark_gc_uses(gc_uses); + OperandBundleDef OpBundle("jl_roots", gc_uses); // the actual call Value *ret = builder.CreateCall(prepare_call(llvmf), - ArrayRef(&argvals[0], nargs + sret)); + ArrayRef(&argvals[0], nargs + sret), + ArrayRef(&OpBundle, gc_uses.empty() ? 0 : 1)); ((CallInst*)ret)->setAttributes(attributes); if (cc != CallingConv::C) @@ -2145,6 +2171,9 @@ jl_cgval_t function_sig_t::emit_a_ccall( } else { Type *jlrt = julia_type_to_llvm(rt, &jlretboxed); // compute the real "julian" return type and compute whether it is boxed + if (jlretboxed) { + jlrt = T_prjlvalue; + } if (type_is_ghost(jlrt)) { return ghostValue(rt); } @@ -2160,7 +2189,9 @@ jl_cgval_t function_sig_t::emit_a_ccall( Value *strct = emit_allocobj(ctx, rtsz, runtime_bt); int boxalign = jl_gc_alignment(rtsz); #ifndef JL_NDEBUG -#if JL_LLVM_VERSION >= 30600 +#if JL_LLVM_VERSION >= 40000 + const DataLayout &DL = jl_data_layout; +#elif JL_LLVM_VERSION >= 30600 const DataLayout &DL = jl_ExecutionEngine->getDataLayout(); #else const DataLayout &DL = *jl_ExecutionEngine->getDataLayout(); diff --git a/src/cgutils.cpp b/src/cgutils.cpp index a14dff0ecfe91..41ebed0981fd2 100644 --- a/src/cgutils.cpp +++ b/src/cgutils.cpp @@ -25,6 +25,40 @@ static Value *prepare_call(Value *Callee) return prepare_call(builder, Callee); } +static Value *maybe_decay_untracked(Value *V) +{ + if (V->getType() == T_pjlvalue) + return builder.CreateAddrSpaceCast(V, T_prjlvalue); + else if (V->getType() == T_ppjlvalue) + return builder.CreateBitCast(V, T_pprjlvalue); + return V; +} + +static Constant *maybe_decay_untracked(Constant *C) +{ + if (C->getType() == T_pjlvalue) + return ConstantExpr::getAddrSpaceCast(C, T_prjlvalue); + else if (C->getType() == T_ppjlvalue) + return ConstantExpr::getBitCast(C, T_pprjlvalue); + return C; +} + +static Value *decay_derived(Value *V) +{ + Type *T = V->getType(); + if (cast(T)->getAddressSpace() == AddressSpace::Derived) + return V; + // Once llvm deletes pointer element types, we won't need it here any more either. + Type *NewT = PointerType::get(cast(T)->getElementType(), AddressSpace::Derived); + return builder.CreateAddrSpaceCast(V, NewT); +} + +static Value *mark_callee_rooted(Value *V) +{ + assert(V->getType() == T_pjlvalue || V->getType() == T_prjlvalue); + return builder.CreateAddrSpaceCast(V, + PointerType::get(T_jlvalue, AddressSpace::CalleeRooted)); +} // --- language feature checks --- @@ -215,6 +249,22 @@ static DIType julia_type_to_di(jl_value_t *jt, DIBuilder *dbuilder, bool isboxed return jl_pvalue_dillvmt; } +static Value *emit_pointer_from_objref(Value *V) +{ + unsigned AS = cast(V->getType())->getAddressSpace(); + if (AS != AddressSpace::Tracked && AS != AddressSpace::Derived) + return builder.CreateBitCast(V, T_pjlvalue); + V = builder.CreateBitCast(decay_derived(V), + PointerType::get(T_jlvalue, AddressSpace::Derived)); + CallInst *Call = builder.CreateCall(prepare_call(pointer_from_objref_func), V); +#if JL_LLVM_VERSION >= 50000 + Call->addAttribute(AttributeList::FunctionIndex, Attribute::ReadNone); +#else + Call->addAttribute(AttributeSet::FunctionIndex, Attribute::ReadNone); +#endif + return Call; +} + // --- emitting pointers directly into code --- static Constant *literal_static_pointer_val(const void *p, Type *t) @@ -223,9 +273,9 @@ static Constant *literal_static_pointer_val(const void *p, Type *t) // the generated code will only be valid during the current session, // and thus, this should typically be avoided in new API's #if defined(_P64) - return ConstantExpr::getIntToPtr(ConstantInt::get(T_int64, (uint64_t)p), t); + return ConstantExpr::getPointerBitCastOrAddrSpaceCast(ConstantExpr::getIntToPtr(ConstantInt::get(T_int64, (uint64_t)p), T_pjlvalue), t); #else - return ConstantExpr::getIntToPtr(ConstantInt::get(T_int32, (uint32_t)p), t); + return ConstantExpr::getPointerBitCastOrAddrSpaceCast(ConstantExpr::getIntToPtr(ConstantInt::get(T_int32, (uint32_t)p), T_pjlvalue), t); #endif } @@ -284,31 +334,31 @@ static Value *literal_pointer_val_slot(jl_value_t *p) } if (GlobalVariable *gv = julia_const_gv(p)) { // if this is a known object, use the existing GlobalValue - return prepare_global(gv, jl_builderModule); + return maybe_decay_untracked(prepare_global(gv, jl_builderModule)); } if (jl_is_datatype(p)) { jl_datatype_t *addr = (jl_datatype_t*)p; // DataTypes are prefixed with a + - return julia_pgv("+", addr->name->name, addr->name->module, p); + return maybe_decay_untracked(julia_pgv("+", addr->name->name, addr->name->module, p)); } if (jl_is_method(p)) { jl_method_t *m = (jl_method_t*)p; // functions are prefixed with a - - return julia_pgv("-", m->name, m->module, p); + return maybe_decay_untracked(julia_pgv("-", m->name, m->module, p)); } if (jl_is_method_instance(p)) { jl_method_instance_t *linfo = (jl_method_instance_t*)p; // Type-inferred functions are also prefixed with a - if (jl_is_method(linfo->def.method)) - return julia_pgv("-", linfo->def.method->name, linfo->def.method->module, p); + return maybe_decay_untracked(julia_pgv("-", linfo->def.method->name, linfo->def.method->module, p)); } if (jl_is_symbol(p)) { jl_sym_t *addr = (jl_sym_t*)p; // Symbols are prefixed with jl_sym# - return julia_pgv("jl_sym#", addr, NULL, p); + return maybe_decay_untracked(julia_pgv("jl_sym#", addr, NULL, p)); } // something else gets just a generic name - return julia_pgv("jl_global#", p); + return maybe_decay_untracked(julia_pgv("jl_global#", p)); } static Value *literal_pointer_val(jl_value_t *p) @@ -316,7 +366,7 @@ static Value *literal_pointer_val(jl_value_t *p) if (p == NULL) return V_null; if (!imaging_mode) - return literal_static_pointer_val(p, T_pjlvalue); + return literal_static_pointer_val(p, T_prjlvalue); Value *pgv = literal_pointer_val_slot(p); return tbaa_decorate(tbaa_const, builder.CreateLoad(pgv)); } @@ -364,9 +414,9 @@ static Value *julia_binding_gv(jl_binding_t *b) bv = emit_bitcast( tbaa_decorate(tbaa_const, builder.CreateLoad(julia_pgv("*", b->name, b->owner, b))), - T_ppjlvalue); + T_pprjlvalue); else - bv = literal_static_pointer_val(b, T_ppjlvalue); + bv = literal_static_pointer_val(b, T_pprjlvalue); return julia_binding_gv(bv); } @@ -517,7 +567,9 @@ static Type *julia_struct_to_llvm(jl_value_t *jt, jl_unionall_t *ua, bool *isbox // If LLVM and Julia disagree about alignment, much trouble ensues, so check it! if (jst->layout) { const DataLayout &DL = -#if JL_LLVM_VERSION >= 30600 +#if JL_LLVM_VERSION >= 40000 + jl_data_layout; +#elif JL_LLVM_VERSION >= 30600 jl_ExecutionEngine->getDataLayout(); #else *jl_ExecutionEngine->getDataLayout(); @@ -618,15 +670,15 @@ static unsigned get_box_tindex(jl_datatype_t *jt, jl_value_t *ut) // --- generating various field accessors --- -static Value *emit_nthptr_addr(Value *v, ssize_t n) +static Value *emit_nthptr_addr(Value *v, ssize_t n, bool gctracked = true) { - return builder.CreateGEP(emit_bitcast(v, T_ppjlvalue), + return builder.CreateGEP(emit_bitcast(gctracked ? decay_derived(v) : v, T_pprjlvalue), ConstantInt::get(T_size, n)); } -static Value *emit_nthptr_addr(Value *v, Value *idx) +static Value *emit_nthptr_addr(Value *v, Value *idx, bool gctracked = true) { - return builder.CreateGEP(emit_bitcast(v, T_ppjlvalue), idx); + return builder.CreateGEP(emit_bitcast(gctracked ? decay_derived(v) : v, T_pprjlvalue), idx); } static Value *emit_nthptr(Value *v, ssize_t n, MDNode *tbaa) @@ -636,17 +688,17 @@ static Value *emit_nthptr(Value *v, ssize_t n, MDNode *tbaa) return tbaa_decorate(tbaa,builder.CreateLoad(vptr, false)); } -static Value *emit_nthptr_recast(Value *v, Value *idx, MDNode *tbaa, Type *ptype) +static Value *emit_nthptr_recast(Value *v, Value *idx, MDNode *tbaa, Type *ptype, bool gctracked = true) { // p = (jl_value_t**)v; *(ptype)&p[n] - Value *vptr = emit_nthptr_addr(v, idx); + Value *vptr = emit_nthptr_addr(v, idx, gctracked); return tbaa_decorate(tbaa,builder.CreateLoad(emit_bitcast(vptr,ptype), false)); } -static Value *emit_nthptr_recast(Value *v, ssize_t n, MDNode *tbaa, Type *ptype) +static Value *emit_nthptr_recast(Value *v, ssize_t n, MDNode *tbaa, Type *ptype, bool gctracked = true) { // p = (jl_value_t**)v; *(ptype)&p[n] - Value *vptr = emit_nthptr_addr(v, n); + Value *vptr = emit_nthptr_addr(v, n, gctracked); return tbaa_decorate(tbaa,builder.CreateLoad(emit_bitcast(vptr,ptype), false)); } @@ -672,8 +724,13 @@ static Value *emit_typeof(Value *tt) { assert(tt != NULL && !isa(tt) && "expected a conditionally boxed value"); // given p, a jl_value_t*, compute its type tag - tt = tbaa_decorate(tbaa_tag, builder.CreateLoad(emit_typeptr_addr(tt))); - return mask_gc_bits(tt); + // The raw address is not GC-safe to load from as it may have mask bits set. + // Note that this gives the optimizer license to not root this value. That + // is fine however, since leaf types are not GCed at the moment. Should + // that ever change, this may have to go through a special intrinsic. + Value *addr = emit_bitcast(emit_typeptr_addr(tt), T_ppjlvalue); + tt = tbaa_decorate(tbaa_tag, builder.CreateLoad(addr)); + return maybe_decay_untracked(mask_gc_bits(tt)); } static jl_cgval_t emit_typeof(const jl_cgval_t &p, jl_codectx_t *ctx) @@ -695,22 +752,27 @@ static jl_cgval_t emit_typeof(const jl_cgval_t &p, jl_codectx_t *ctx) counter); if (allunboxed) pdatatype = Constant::getNullValue(T_ppjlvalue); - else - pdatatype = emit_typeptr_addr(p.V); + else { + // See note above in emit_typeof(Value*), we can't tell the system + // about this until we've cleared the GC bits. + pdatatype = emit_bitcast(emit_typeptr_addr(builder.CreateLoad(p.gcroot)), T_ppjlvalue); + } counter = 0; for_each_uniontype_small( [&](unsigned idx, jl_datatype_t *jt) { Value *cmp = builder.CreateICmpEQ(tindex, ConstantInt::get(T_int8, idx)); - pdatatype = builder.CreateSelect(cmp, literal_pointer_val_slot((jl_value_t*)jt), pdatatype); + pdatatype = builder.CreateSelect(cmp, + decay_derived(emit_bitcast(literal_pointer_val_slot((jl_value_t*)jt), T_ppjlvalue)), + decay_derived(pdatatype)); }, p.typ, counter); Value *datatype; if (allunboxed) { - datatype = tbaa_decorate(tbaa_const, builder.CreateLoad(pdatatype)); + datatype = tbaa_decorate(tbaa_const, builder.CreateLoad(maybe_decay_untracked(pdatatype))); } else { - datatype = mask_gc_bits(tbaa_decorate(tbaa_tag, builder.CreateLoad(pdatatype))); + datatype = maybe_decay_untracked(mask_gc_bits(tbaa_decorate(tbaa_tag, builder.CreateLoad(pdatatype)))); } return mark_julia_type(datatype, true, jl_datatype_type, ctx, /*needsroot*/false); } @@ -732,7 +794,7 @@ static Value *emit_datatype_types(Value *dt) { return tbaa_decorate(tbaa_const, builder. CreateLoad(emit_bitcast(builder. - CreateGEP(emit_bitcast(dt, T_pint8), + CreateGEP(emit_bitcast(decay_derived(dt), T_pint8), ConstantInt::get(T_size, offsetof(jl_datatype_t, types))), T_ppjlvalue))); } @@ -743,7 +805,7 @@ static Value *emit_datatype_nfields(Value *dt) tbaa_decorate(tbaa_const, builder.CreateLoad( emit_bitcast( builder.CreateGEP( - emit_bitcast(dt, T_pint8), + emit_bitcast(decay_derived(dt), T_pint8), ConstantInt::get(T_size, offsetof(jl_datatype_t, types))), T_pint32->getPointerTo()))))); #ifdef _P64 @@ -756,7 +818,7 @@ static Value *emit_datatype_size(Value *dt) { Value *size = tbaa_decorate(tbaa_const, builder. CreateLoad(emit_bitcast(builder. - CreateGEP(emit_bitcast(dt, T_pint8), + CreateGEP(emit_bitcast(decay_derived(dt), T_pint8), ConstantInt::get(T_size, offsetof(jl_datatype_t, size))), T_pint32))); return size; @@ -814,7 +876,7 @@ static Value *emit_sizeof(const jl_cgval_t &p, jl_codectx_t *ctx) static Value *emit_datatype_mutabl(Value *dt) { Value *mutabl = tbaa_decorate(tbaa_const, builder. - CreateLoad(builder.CreateGEP(emit_bitcast(dt, T_pint8), + CreateLoad(builder.CreateGEP(emit_bitcast(decay_derived(dt), T_pint8), ConstantInt::get(T_size, offsetof(jl_datatype_t, mutabl))))); return builder.CreateTrunc(mutabl, T_int1); } @@ -822,7 +884,7 @@ static Value *emit_datatype_mutabl(Value *dt) static Value *emit_datatype_abstract(Value *dt) { Value *abstract = tbaa_decorate(tbaa_const, builder. - CreateLoad(builder.CreateGEP(emit_bitcast(dt, T_pint8), + CreateLoad(builder.CreateGEP(emit_bitcast(decay_derived(dt), T_pint8), ConstantInt::get(T_size, offsetof(jl_datatype_t, abstract))))); return builder.CreateTrunc(abstract, T_int1); } @@ -885,9 +947,9 @@ static void raise_exception(Value *exc, jl_codectx_t *ctx, } else { JL_FEAT_REQUIRE(ctx, runtime); #if JL_LLVM_VERSION >= 30700 - builder.CreateCall(prepare_call(jlthrow_func), { exc }); + builder.CreateCall(prepare_call(jlthrow_func), { mark_callee_rooted(exc) }); #else - builder.CreateCall(prepare_call(jlthrow_func), exc); + builder.CreateCall(prepare_call(jlthrow_func), mark_callee_rooted(exc)); #endif } builder.CreateUnreachable(); @@ -977,11 +1039,11 @@ static void emit_type_error(const jl_cgval_t &x, Value *type, const std::string #if JL_LLVM_VERSION >= 30700 builder.CreateCall(prepare_call(jltypeerror_func), { fname_val, msg_val, - type, boxed(x, ctx, false)}); // x is rooted by jl_type_error_rt + type, mark_callee_rooted(boxed(x, ctx, false))}); #else builder.CreateCall4(prepare_call(jltypeerror_func), fname_val, msg_val, - type, boxed(x, ctx, false)); // x is rooted by jl_type_error_rt + type, mark_callee_rooted(boxed(x, ctx, false))); #endif } @@ -1006,20 +1068,21 @@ static std::pair emit_isa(const jl_cgval_t &x, jl_value_t *type, c // intersection with Type needs to be handled specially if (jl_has_intersect_type_not_kind(type)) { - Value *vx = boxed(x, ctx); + Value *vx = maybe_decay_untracked(boxed(x, ctx)); + Value *vtyp = literal_pointer_val(type); if (msg && *msg == "typeassert") { #if JL_LLVM_VERSION >= 30700 - builder.CreateCall(prepare_call(jltypeassert_func), { vx, literal_pointer_val(type) }); + builder.CreateCall(prepare_call(jltypeassert_func), { vx, vtyp }); #else - builder.CreateCall2(prepare_call(jltypeassert_func), vx, literal_pointer_val(type)); + builder.CreateCall2(prepare_call(jltypeassert_func), vx, vtyp); #endif return std::make_pair(ConstantInt::get(T_int1, 1), true); } return std::make_pair(builder.CreateICmpNE( #if JL_LLVM_VERSION >= 30700 - builder.CreateCall(prepare_call(jlisa_func), { vx, literal_pointer_val(type) }), + builder.CreateCall(prepare_call(jlisa_func), { vx, vtyp }), #else - builder.CreateCall2(prepare_call(jlisa_func), vx, literal_pointer_val(type)), + builder.CreateCall2(prepare_call(jlisa_func), vx, vtyp), #endif ConstantInt::get(T_int32, 0)), false); } @@ -1040,7 +1103,8 @@ static std::pair emit_isa(const jl_cgval_t &x, jl_value_t *type, c BasicBlock *postBB = BasicBlock::Create(jl_LLVMContext, "post_isa", ctx->f); builder.CreateCondBr(isboxed, isaBB, postBB); builder.SetInsertPoint(isaBB); - Value *istype_boxed = builder.CreateICmpEQ(emit_typeof(x.V), literal_pointer_val(type)); + Value *istype_boxed = builder.CreateICmpEQ(emit_typeof(x.V), + maybe_decay_untracked(literal_pointer_val(type))); builder.CreateBr(postBB); builder.SetInsertPoint(postBB); PHINode *istype = builder.CreatePHI(T_int1, 2); @@ -1049,15 +1113,20 @@ static std::pair emit_isa(const jl_cgval_t &x, jl_value_t *type, c return std::make_pair(istype, false); } } - return std::make_pair(builder.CreateICmpEQ(emit_typeof_boxed(x, ctx), literal_pointer_val(type)), false); + return std::make_pair(builder.CreateICmpEQ(emit_typeof_boxed(x, ctx), + maybe_decay_untracked(literal_pointer_val(type))), false); } // everything else can be handled via subtype tests - Value *vxt = emit_typeof_boxed(x, ctx); + Value *vxt = maybe_decay_untracked(emit_typeof_boxed(x, ctx)); return std::make_pair(builder.CreateICmpNE( #if JL_LLVM_VERSION >= 30700 - builder.CreateCall(prepare_call(jlsubtype_func), { vxt, literal_pointer_val(type) }), + builder.CreateCall(prepare_call(jlsubtype_func), + { vxt, + literal_pointer_val(type) }), #else - builder.CreateCall2(prepare_call(jlsubtype_func), vxt, literal_pointer_val(type)), + builder.CreateCall2(prepare_call(jlsubtype_func), + vxt, + literal_pointer_val(type)), #endif ConstantInt::get(T_int32, 0)), false); } @@ -1087,7 +1156,7 @@ static void emit_leafcheck(Value *typ, const std::string &msg, jl_codectx_t *ctx assert(typ->getType() == T_pjlvalue); emit_typecheck(mark_julia_type(typ, true, jl_any_type, ctx, false), (jl_value_t*)jl_datatype_type, msg, ctx); Value *isleaf; - isleaf = builder.CreateConstInBoundsGEP1_32(LLVM37_param(T_int8) emit_bitcast(typ, T_pint8), offsetof(jl_datatype_t, isleaftype)); + isleaf = builder.CreateConstInBoundsGEP1_32(LLVM37_param(T_int8) emit_bitcast(decay_derived(typ), T_pint8), offsetof(jl_datatype_t, isleaftype)); isleaf = builder.CreateLoad(isleaf, tbaa_const); isleaf = builder.CreateTrunc(isleaf, T_int1); error_unless(isleaf, msg, ctx); @@ -1123,9 +1192,9 @@ static Value *emit_bounds_check(const jl_cgval_t &ainfo, jl_value_t *ty, Value * } else if (ainfo.isboxed) { // jl_datatype_t or boxed jl_value_t #if JL_LLVM_VERSION >= 30700 - builder.CreateCall(prepare_call(jlboundserror_func), { boxed(ainfo, ctx), i }); + builder.CreateCall(prepare_call(jlboundserror_func), { mark_callee_rooted(boxed(ainfo, ctx)), i }); #else - builder.CreateCall2(prepare_call(jlboundserror_func), boxed(ainfo, ctx), i); + builder.CreateCall2(prepare_call(jlboundserror_func), mark_callee_rooted(boxed(ainfo, ctx)), i); #endif } else { // unboxed jl_value_t* @@ -1141,12 +1210,12 @@ static Value *emit_bounds_check(const jl_cgval_t &ainfo, jl_value_t *ty, Value * } #if JL_LLVM_VERSION >= 30700 builder.CreateCall(prepare_call(jluboundserror_func), { - builder.CreatePointerCast(a, T_pint8), + emit_bitcast(decay_derived(a), T_pint8), literal_pointer_val(ty), i }); #else builder.CreateCall3(prepare_call(jluboundserror_func), - builder.CreatePointerCast(a, T_pint8), + emit_bitcast(decay_derived(a), T_pint8), literal_pointer_val(ty), i); #endif @@ -1187,9 +1256,11 @@ static jl_cgval_t typed_load(Value *ptr, Value *idx_0based, jl_value_t *jltype, if (type_is_ghost(elty)) return ghostValue(jltype); Value *data; + if (isboxed) + elty = T_prjlvalue; // TODO: preserving_pointercast? if (ptr->getType()->getContainedType(0) != elty) - data = builder.CreatePointerCast(ptr, PointerType::get(elty, 0)); + data = emit_bitcast(ptr, PointerType::get(elty, 0)); else data = ptr; if (idx_0based) @@ -1231,13 +1302,17 @@ static void typed_store(Value *ptr, Value *idx_0based, const jl_cgval_t &rhs, r = emit_unbox(elty, rhs, jltype); } else { - r = boxed(rhs, ctx, root_box); + r = maybe_decay_untracked(boxed(rhs, ctx, root_box)); if (parent != NULL) emit_write_barrier(ctx, parent, r); } Value *data; - if (ptr->getType()->getContainedType(0) != elty) - data = emit_bitcast(ptr, PointerType::get(elty, 0)); - else + if (ptr->getType()->getContainedType(0) != elty) { + if (isboxed) { + data = emit_bitcast(ptr, T_pprjlvalue); + } else { + data = emit_bitcast(ptr, PointerType::get(elty, cast(ptr->getType())->getAddressSpace())); + } + } else data = ptr; Instruction *store = builder.CreateAlignedStore(r, builder.CreateGEP(data, idx_0based), isboxed ? alignment : julia_alignment(r, jltype, alignment)); @@ -1342,7 +1417,7 @@ static Value *data_pointer(const jl_cgval_t &x, jl_codectx_t *ctx, Type *astype } if (data->getType() != astype) data = emit_bitcast(data, astype); - return data; + return decay_derived(data); } static bool emit_getfield_unknownidx(jl_cgval_t *ret, const jl_cgval_t &strct, @@ -1363,7 +1438,8 @@ static bool emit_getfield_unknownidx(jl_cgval_t *ret, const jl_cgval_t &strct, Value *fld = tbaa_decorate(strct.tbaa, maybe_mark_load_dereferenceable( builder.CreateLoad( - builder.CreateGEP(data_pointer(strct, ctx), idx)), + builder.CreateBitCast(builder.CreateGEP(decay_derived(data_pointer(strct, ctx)), idx), + PointerType::get(T_prjlvalue, AddressSpace::Derived))), maybe_null, minimum_field_size)); if (maybe_null) null_pointer_check(fld, ctx); @@ -1378,7 +1454,7 @@ static bool emit_getfield_unknownidx(jl_cgval_t *ret, const jl_cgval_t &strct, if (!stt->mutabl) { // just compute the pointer and let user load it when necessary Type *fty = julia_type_to_llvm(jt); - Value *addr = builder.CreateGEP(builder.CreatePointerCast(ptr, PointerType::get(fty,0)), idx); + Value *addr = builder.CreateGEP(emit_bitcast(decay_derived(ptr), PointerType::get(fty,0)), idx); *ret = mark_julia_slot(addr, jt, NULL, strct.tbaa); ret->gcroot = strct.gcroot; ret->isimmutable = strct.isimmutable; @@ -1439,7 +1515,7 @@ static jl_cgval_t emit_getfield_knownidx(const jl_cgval_t &strct, unsigned idx, bool isboxed; Type *lt = julia_type_to_llvm((jl_value_t*)jt, &isboxed); if (isboxed) { - Value *ptr = data_pointer(strct, ctx, T_pint8); + Value *ptr = decay_derived(data_pointer(strct, ctx, T_pint8)); Value *llvm_idx = ConstantInt::get(T_size, jl_field_offset(jt, idx)); addr = builder.CreateGEP(ptr, llvm_idx); } @@ -1463,7 +1539,7 @@ static jl_cgval_t emit_getfield_knownidx(const jl_cgval_t &strct, unsigned idx, if (jl_field_isptr(jt, idx)) { bool maybe_null = idx >= (unsigned)jt->ninitialized; Instruction *Load = maybe_mark_load_dereferenceable( - builder.CreateLoad(emit_bitcast(addr, T_ppjlvalue)), + builder.CreateLoad(emit_bitcast(addr, T_pprjlvalue)), maybe_null, jl_field_type(jt, idx) ); Value *fldv = tbaa_decorate(strct.tbaa, Load); @@ -1529,11 +1605,15 @@ static void maybe_alloc_arrayvar(int s, jl_codectx_t *ctx) // passed to an external function (ideally only impure functions) jl_arrayvar_t av; int ndims = jl_unbox_long(jl_tparam1(jt)); - Type *elt = julia_type_to_llvm(jl_tparam0(jt)); + jl_value_t *jelt = jl_tparam0(jt); + bool isboxed = !jl_array_store_unboxed(jelt); + Type *elt = julia_type_to_llvm(jelt); if (type_is_ghost(elt)) return; + if (isboxed) + elt = T_prjlvalue; // CreateAlloca is OK here because maybe_alloc_arrayvar is only called in the prologue setup - av.dataptr = builder.CreateAlloca(PointerType::get(elt,0)); + av.dataptr = builder.CreateAlloca(PointerType::get(elt, 0)); av.len = builder.CreateAlloca(T_size); for (int i = 0; i < ndims - 1; i++) av.sizes.push_back(builder.CreateAlloca(T_size)); @@ -1578,7 +1658,7 @@ static Value *emit_arraylen_prim(const jl_cgval_t &tinfo, jl_codectx_t *ctx) #if JL_LLVM_VERSION >= 30700 nullptr, #endif - emit_bitcast(t,jl_parray_llvmt), + emit_bitcast(decay_derived(t), jl_parray_llvmt), 1); //index (not offset) of length field in jl_parray_llvmt MDNode *tbaa = arraytype_constshape(ty) ? tbaa_const : tbaa_arraylen; @@ -1611,26 +1691,30 @@ static Value *emit_arraylen(const jl_cgval_t &tinfo, jl_value_t *ex, jl_codectx_ return emit_arraylen_prim(tinfo, ctx); } -static Value *emit_arrayptr(const jl_cgval_t &tinfo, jl_codectx_t *ctx) +static Value *emit_arrayptr(const jl_cgval_t &tinfo, jl_codectx_t *ctx, bool isboxed = false) { Value *t = boxed(tinfo, ctx); Value *addr = builder.CreateStructGEP( #if JL_LLVM_VERSION >= 30700 nullptr, #endif - emit_bitcast(t,jl_parray_llvmt), + emit_bitcast(decay_derived(t), jl_parray_llvmt), 0); //index (not offset) of data field in jl_parray_llvmt MDNode *tbaa = arraytype_constshape(tinfo.typ) ? tbaa_const : tbaa_arrayptr; + if (isboxed) { + addr = builder.CreateBitCast(addr, + PointerType::get(T_pprjlvalue, cast(addr->getType())->getAddressSpace())); + } return tbaa_decorate(tbaa, builder.CreateLoad(addr, false)); } -static Value *emit_arrayptr(const jl_cgval_t &tinfo, jl_value_t *ex, jl_codectx_t *ctx) +static Value *emit_arrayptr(const jl_cgval_t &tinfo, jl_value_t *ex, jl_codectx_t *ctx, bool isboxed = false) { jl_arrayvar_t *av = arrayvar_for(ex, ctx); if (av!=NULL) return builder.CreateLoad(av->dataptr); - return emit_arrayptr(tinfo, ctx); + return emit_arrayptr(tinfo, ctx, isboxed); } static Value *emit_arraysize(const jl_cgval_t &tinfo, jl_value_t *ex, int dim, jl_codectx_t *ctx) @@ -1653,7 +1737,7 @@ static Value *emit_arrayflags(const jl_cgval_t &tinfo, jl_codectx_t *ctx) #if JL_LLVM_VERSION >= 30700 nullptr, #endif - emit_bitcast(t, jl_parray_llvmt), + emit_bitcast(decay_derived(t), jl_parray_llvmt), arrayflag_field); return tbaa_decorate(tbaa_arrayflags, builder.CreateLoad(addr)); } @@ -1765,9 +1849,11 @@ static Value *emit_array_nd_index(const jl_cgval_t &ainfo, jl_value_t *ex, ssize builder.CreateStore(idxs[k], builder.CreateGEP(tmp, ConstantInt::get(T_size, k))); } #if JL_LLVM_VERSION >= 30700 - builder.CreateCall(prepare_call(jlboundserrorv_func), { a, tmp, ConstantInt::get(T_size, nidxs) }); + builder.CreateCall(prepare_call(jlboundserrorv_func), + { mark_callee_rooted(a), tmp, ConstantInt::get(T_size, nidxs) }); #else - builder.CreateCall3(prepare_call(jlboundserrorv_func), a, tmp, ConstantInt::get(T_size, nidxs)); + builder.CreateCall3(prepare_call(jlboundserrorv_func), + mark_callee_rooted(a), tmp, ConstantInt::get(T_size, nidxs)); #endif builder.CreateUnreachable(); @@ -1964,14 +2050,14 @@ static Value *box_union(const jl_cgval_t &vinfo, jl_codectx_t *ctx, const SmallB // box_union_isboxed: // br post_box_union // post_box_union: - // box = phi [ box1, box_union_1 ], [ box2, box_union_2 ], [ vinfo, post_box_union ] + // box = phi [ box1, box_union_1 ], [ box2, box_union_2 ], [ vinfo, box_union_isboxed ] // ... Value *tindex = vinfo.TIndex; BasicBlock *defaultBB = BasicBlock::Create(jl_LLVMContext, "box_union_isboxed", ctx->f); SwitchInst *switchInst = builder.CreateSwitch(tindex, defaultBB); BasicBlock *postBB = BasicBlock::Create(jl_LLVMContext, "post_box_union", ctx->f); builder.SetInsertPoint(postBB); - PHINode *box_merge = builder.CreatePHI(T_pjlvalue, 2); + PHINode *box_merge = builder.CreatePHI(T_prjlvalue, 2); unsigned counter = 0; for_each_uniontype_small( [&](unsigned idx, jl_datatype_t *jt) { @@ -1993,7 +2079,7 @@ static Value *box_union(const jl_cgval_t &vinfo, jl_codectx_t *ctx, const SmallB init_bits_cgval(box, vinfo_r, jl_is_mutable(jt) ? tbaa_mutab : tbaa_immut, ctx); } } - box_merge->addIncoming(box, tempBB); + box_merge->addIncoming(maybe_decay_untracked(box), tempBB); builder.CreateBr(postBB); }, vinfo.typ, @@ -2002,10 +2088,10 @@ static Value *box_union(const jl_cgval_t &vinfo, jl_codectx_t *ctx, const SmallB if (skip.size() > 0 && skip[0]) { // skip[0] specifies where to return NULL or the original pointer // if the value was not handled above - box_merge->addIncoming(V_null, defaultBB); + box_merge->addIncoming(maybe_decay_untracked(V_null), defaultBB); builder.CreateBr(postBB); } - else if (vinfo.V == NULL || isa(vinfo.V)) { + else if ((vinfo.V == NULL || isa(vinfo.V)) && !vinfo.gcroot) { Function *trap_func = Intrinsic::getDeclaration( ctx->f->getParent(), Intrinsic::trap); @@ -2017,7 +2103,9 @@ static Value *box_union(const jl_cgval_t &vinfo, jl_codectx_t *ctx, const SmallB // if this is a derived pointer, make sure the root usage itself is also visible to the delete-root pass mark_gc_use(vinfo); } - box_merge->addIncoming(emit_bitcast(vinfo.V, T_pjlvalue), defaultBB); + // We're guaranteed here that Load(.gcroot) == .V, because we have determined + // that this union is a boxed value, rather than an interior pointer of some sort + box_merge->addIncoming(builder.CreateLoad(vinfo.gcroot), defaultBB); builder.CreateBr(postBB); } builder.SetInsertPoint(postBB); @@ -2032,12 +2120,14 @@ static Value *boxed(const jl_cgval_t &vinfo, jl_codectx_t *ctx, bool gcrooted) jl_value_t *jt = vinfo.typ; if (jt == jl_bottom_type || jt == NULL) // We have an undef value on a (hopefully) dead branch - return UndefValue::get(T_pjlvalue); + return UndefValue::get(T_prjlvalue); if (vinfo.constant) return literal_pointer_val(vinfo.constant); if (vinfo.isboxed) { assert(vinfo.V && "Missing value for box."); - return vinfo.V; + // We're guaranteed here that Load(.gcroot) == .V, because we have determined + // that this value is a box, so if it has a gcroot, that's where the value is. + return vinfo.gcroot ? builder.CreateLoad(vinfo.gcroot) : vinfo.V; } Value *box; @@ -2150,8 +2240,8 @@ static void emit_cpointercheck(const jl_cgval_t &x, const std::string &msg, jl_c emit_typecheck(mark_julia_type(t, true, jl_any_type, ctx, false), (jl_value_t*)jl_datatype_type, msg, ctx); Value *istype = - builder.CreateICmpEQ(emit_datatype_name(t), - literal_pointer_val((jl_value_t*)jl_pointer_typename)); + builder.CreateICmpEQ(mark_callee_rooted(emit_datatype_name(t)), + mark_callee_rooted(literal_pointer_val((jl_value_t*)jl_pointer_typename))); BasicBlock *failBB = BasicBlock::Create(jl_LLVMContext,"fail",ctx->f); BasicBlock *passBB = BasicBlock::Create(jl_LLVMContext,"pass"); builder.CreateCondBr(istype, passBB, failBB); @@ -2186,7 +2276,7 @@ static Value *emit_allocobj(jl_codectx_t *ctx, size_t static_size, Value *jt) v = builder.CreateCall(prepare_call(jlalloc_pool_func), ArrayRef(args, 3)); } - tbaa_decorate(tbaa_tag, builder.CreateStore(jt, emit_typeptr_addr(v))); + tbaa_decorate(tbaa_tag, builder.CreateStore(maybe_decay_untracked(jt), emit_typeptr_addr(v))); return v; } @@ -2212,7 +2302,7 @@ static void emit_write_barrier(jl_codectx_t *ctx, Value *parent, Value *ptr) Value *ptr_not_marked = builder.CreateICmpEQ(ptr_mark_bit, ConstantInt::get(T_size, 0)); builder.CreateCondBr(ptr_not_marked, barrier_trigger, cont); builder.SetInsertPoint(barrier_trigger); - builder.CreateCall(prepare_call(queuerootfun), emit_bitcast(parent, T_pjlvalue)); + builder.CreateCall(prepare_call(queuerootfun), maybe_decay_untracked(emit_bitcast(parent, T_prjlvalue))); builder.CreateBr(cont); ctx->f->getBasicBlockList().push_back(cont); builder.SetInsertPoint(cont); @@ -2221,7 +2311,7 @@ static void emit_write_barrier(jl_codectx_t *ctx, Value *parent, Value *ptr) static void emit_checked_write_barrier(jl_codectx_t *ctx, Value *parent, Value *ptr) { BasicBlock *cont; - Value *not_null = builder.CreateICmpNE(ptr, V_null); + Value *not_null = builder.CreateICmpNE(mark_callee_rooted(ptr), mark_callee_rooted(V_null)); BasicBlock *if_not_null = BasicBlock::Create(jl_LLVMContext, "wb_not_null", ctx->f); cont = BasicBlock::Create(jl_LLVMContext, "cont"); builder.CreateCondBr(not_null, if_not_null, cont); @@ -2241,8 +2331,9 @@ static void emit_setfield(jl_datatype_t *sty, const jl_cgval_t &strct, size_t id ConstantInt::get(T_size, jl_field_offset(sty, idx0))); jl_value_t *jfty = jl_svecref(sty->types, idx0); if (jl_field_isptr(sty, idx0)) { - Value *r = boxed(rhs, ctx, false); // don't need a temporary gcroot since it'll be rooted by strct (but should ensure strct is rooted via mark_gc_use) - tbaa_decorate(strct.tbaa, builder.CreateStore(r, emit_bitcast(addr, T_ppjlvalue))); + Value *r = maybe_decay_untracked(boxed(rhs, ctx, false)); // don't need a temporary gcroot since it'll be rooted by strct (but should ensure strct is rooted via mark_gc_use) + tbaa_decorate(strct.tbaa, builder.CreateStore(r, + emit_bitcast(addr, T_pprjlvalue))); if (wb && strct.isboxed) emit_checked_write_barrier(ctx, boxed(strct, ctx), r); mark_gc_use(strct); } @@ -2331,11 +2422,11 @@ static jl_cgval_t emit_new_struct(jl_value_t *ty, size_t nargs, jl_value_t **arg for (size_t i = 0; i < nf; i++) { if (jl_field_isptr(sty, i)) { tbaa_decorate(strctinfo.tbaa, builder.CreateStore( - V_null, - builder.CreatePointerCast( - builder.CreateGEP(emit_bitcast(strct, T_pint8), + ConstantPointerNull::get(cast(T_prjlvalue)), + emit_bitcast( + builder.CreateGEP(emit_bitcast(decay_derived(strct), T_pint8), ConstantInt::get(T_size, jl_field_offset(sty, i))), - T_ppjlvalue))); + T_pprjlvalue))); } } bool need_wb = false; @@ -2374,7 +2465,7 @@ static jl_cgval_t emit_new_struct(jl_value_t *ty, size_t nargs, jl_value_t **arg static Value *emit_exc_in_transit(jl_codectx_t *ctx) { - Value *pexc_in_transit = emit_bitcast(ctx->ptlsStates, T_ppjlvalue); + Value *pexc_in_transit = emit_bitcast(ctx->ptlsStates, T_pprjlvalue); Constant *offset = ConstantInt::getSigned(T_int32, offsetof(jl_tls_states_t, exception_in_transit) / sizeof(void*)); return builder.CreateGEP(pexc_in_transit, ArrayRef(offset), "jl_exception_in_transit"); diff --git a/src/codegen.cpp b/src/codegen.cpp index 7277da7a078f3..0aefa9fe40d93 100644 --- a/src/codegen.cpp +++ b/src/codegen.cpp @@ -206,6 +206,7 @@ Module *shadow_output; #define jl_builderModule builder.GetInsertBlock()->getParent()->getParent() #if JL_LLVM_VERSION >= 30700 +static DataLayout jl_data_layout(""); // No DataLayout pass needed anymore. #elif JL_LLVM_VERSION >= 30500 static DataLayoutPass *jl_data_layout; @@ -214,8 +215,11 @@ static DataLayout *jl_data_layout; #endif // types +static Type *T_jlvalue; static Type *T_pjlvalue; +static Type *T_prjlvalue; static Type *T_ppjlvalue; +static Type *T_pprjlvalue; static Type *jl_parray_llvmt; static FunctionType *jl_func_sig; static FunctionType *jl_func_sig_sparams; @@ -419,6 +423,7 @@ static Function *gckill_func; static Function *jlcall_frame_func; static Function *gcroot_flush_func; static Function *except_enter_func; +static Function *pointer_from_objref_func; static std::vector two_pvalue_llvmt; static std::vector three_pvalue_llvmt; @@ -641,7 +646,6 @@ static jl_cgval_t emit_expr(jl_value_t *expr, jl_codectx_t *ctx); static Value *emit_local_root(jl_codectx_t *ctx, jl_varinfo_t *vi = NULL); static void mark_gc_use(const jl_cgval_t &v); -static Value *make_jlcall(ArrayRef args, jl_codectx_t *ctx); static Value *global_binding_pointer(jl_module_t *m, jl_sym_t *s, jl_binding_t **pbnd, bool assign, jl_codectx_t *ctx); static jl_cgval_t emit_checked_var(Value *bp, jl_sym_t *name, jl_codectx_t *ctx, bool isvol, MDNode *tbaa); @@ -652,6 +656,10 @@ static GlobalVariable *prepare_global(GlobalVariable *G, Module *M = jl_builderM static Value *prepare_call(Value *Callee); static Value *prepare_call(IRBuilder<> &builder, Value *Callee); static void CreateTrap(IRBuilder<> &builder); +static Value *emit_jlcall(Value *theFptr, Value *theF, jl_cgval_t *args, + size_t nargs, jl_codectx_t *ctx); +static Value *emit_jlcall(Value *theFptr, Value *theF, jl_value_t **args, + size_t nargs, jl_codectx_t *ctx); template static void push_gc_use(T &&vec, const jl_cgval_t &v) { @@ -1011,7 +1019,7 @@ static jl_cgval_t convert_julia_type(const jl_cgval_t &v, jl_value_t *typ, jl_co union_box_dt = emit_typeof(v.V); } // didn't handle this item before, select its new union index - Value *cmp = builder.CreateICmpEQ(literal_pointer_val((jl_value_t*)jt), union_box_dt); + Value *cmp = builder.CreateICmpEQ(maybe_decay_untracked(literal_pointer_val((jl_value_t*)jt)), union_box_dt); union_box_tindex = builder.CreateSelect(cmp, ConstantInt::get(T_int8, 0x80 | idx), union_box_tindex); } }, @@ -1078,7 +1086,8 @@ static jl_cgval_t convert_julia_type(const jl_cgval_t &v, jl_value_t *typ, jl_co tbaa = tbaa_stack; isimmutable = true; } - slotv = builder.CreateSelect(isboxv, boxv, emit_bitcast(slotv, boxv->getType())); + slotv = builder.CreateSelect(isboxv, + decay_derived(boxv), emit_bitcast(slotv, boxv->getType())); jl_cgval_t newv = jl_cgval_t(slotv, froot, false, typ, new_tindex); newv.tbaa = tbaa; newv.isimmutable = isimmutable; @@ -1121,11 +1130,11 @@ static jl_cgval_t convert_julia_type(const jl_cgval_t &v, jl_value_t *typ, jl_co } if (makeboxed) { // convert to a simple isboxed value - Value *boxv = boxed(v, ctx); + Value *boxv = boxed(v, ctx, false); Value *froot = NULL; if (needsroot) { froot = emit_local_root(ctx); - builder.CreateStore(boxv, froot); + builder.CreateStore(maybe_decay_untracked(boxv), froot); } return jl_cgval_t(boxv, froot, true, typ, NULL); } @@ -1380,7 +1389,9 @@ static void jl_setup_module(Module *m, const jl_cgparams_t *params = &jl_default m->addModuleFlag(llvm::Module::Error, "Debug Info Version", llvm::DEBUG_METADATA_VERSION); #endif -#if JL_LLVM_VERSION >= 30700 +#if JL_LLVM_VERSION >= 40000 + m->setDataLayout(jl_data_layout); +#elif JL_LLVM_VERSION >= 30700 #ifdef USE_ORCJIT m->setDataLayout(jl_ExecutionEngine->getDataLayout()); #elif JL_LLVM_VERSION >= 30800 @@ -2369,13 +2380,14 @@ static void simple_escape_analysis(jl_value_t *expr, bool esc, jl_codectx_t *ctx // Emit a gc-root slot indicator static Value *emit_local_root(jl_codectx_t *ctx, jl_varinfo_t *vi) { - CallInst *newroot = CallInst::Create(prepare_call(gcroot_func), "", /*InsertBefore*/ctx->ptlsStates); + Instruction *newroot = new AllocaInst(T_prjlvalue, 0, "gcroot", /*InsertBefore*/ctx->ptlsStates); if (vi) { vi->boxroot->replaceAllUsesWith(newroot); newroot->takeName(vi->boxroot); vi->boxroot->eraseFromParent(); vi->boxroot = newroot; } + return newroot; } @@ -2390,26 +2402,6 @@ static void mark_gc_use(const jl_cgval_t &v) builder.CreateCall(prepare_call(gckill_func), v.gcroot); } -// turn an array of arguments into a single object suitable for passing to a jlcall -static Value *make_jlcall(ArrayRef args, jl_codectx_t *ctx) -{ - // the temporary variables are after all local variables in the GC frame. - CallInst *largs = CallInst::Create(prepare_call(jlcall_frame_func), - ConstantInt::get(T_int32, args.size()), - "", - /*InsertBefore*/ctx->ptlsStates); - int slot = 0; - assert(args.size() > 0); - for (ArrayRef::iterator I = args.begin(), E = args.end(); I < E; ++I, ++slot) { - Value *arg = boxed(**I, ctx, false); // mark_gc_use isn't needed since jlcall_frame_func can take ownership of this root - GetElementPtrInst *newroot = GetElementPtrInst::Create(LLVM37_param(NULL) largs, - ArrayRef(ConstantInt::get(T_int32, slot))); - newroot->insertAfter(ctx->ptlsStates); - builder.CreateStore(arg, newroot); - } - return largs; -} - static void jl_add_method_root(jl_codectx_t *ctx, jl_value_t *val) { if (jl_is_leaf_type(val) || jl_is_bool(val) || jl_is_symbol(val) || @@ -2477,17 +2469,11 @@ static jl_cgval_t emit_getfield(jl_value_t *expr, jl_sym_t *name, jl_codectx_t * // and offsets of some fields are independent of parameters. // TODO: generic getfield func with more efficient calling convention - jl_cgval_t arg1 = emit_expr(expr, ctx); - jl_cgval_t arg2 = mark_julia_const((jl_value_t*)name); - const jl_cgval_t* myargs_array[2] = {&arg1, &arg2}; - Value *myargs = make_jlcall(makeArrayRef(myargs_array), ctx); -#if JL_LLVM_VERSION >= 30700 - Value *result = builder.CreateCall(prepare_call(jlgetfield_func), {V_null, myargs, - ConstantInt::get(T_int32,2)}); -#else - Value *result = builder.CreateCall3(prepare_call(jlgetfield_func), V_null, myargs, - ConstantInt::get(T_int32,2)); -#endif + jl_cgval_t myargs_array[2] = { + emit_expr(expr, ctx), + mark_julia_const((jl_value_t*)name) + }; + Value *result = emit_jlcall(jlgetfield_func, maybe_decay_untracked(V_null), myargs_array, 2, ctx); bool needsgcroot = true; // !arg1.isimmutable || !jl_is_leaf_type(arg1.typ) || !is_datatype_all_pointers((jl_datatype_t*)arg1.typ); // TODO: probably want this as a llvm pass jl_cgval_t ret = mark_julia_type(result, true, jl_any_type, ctx, needsgcroot); // (typ will be patched up by caller) return ret; @@ -2583,11 +2569,13 @@ static Value *emit_f_is(const jl_cgval_t &arg1, const jl_cgval_t &arg2, jl_codec return emit_isa(arg1, rt2, NULL, ctx).first; // rt2 is a singleton type if (arg2.TIndex) return emit_isa(arg2, rt1, NULL, ctx).first; // rt1 is a singleton type - // mark_gc_use isn't needed since we won't load this pointer + // rooting these values isn't needed since we won't load this pointer // and we know at least one of them is a unique Singleton // which is already enough to ensure pointer uniqueness for this test // even if the other pointer managed to get garbage collected - return builder.CreateICmpEQ(boxed(arg1, ctx, false), boxed(arg2, ctx, false)); + return builder.CreateICmpEQ( + mark_callee_rooted(boxed(arg1, ctx, false)), + mark_callee_rooted(boxed(arg2, ctx, false))); } if (jl_type_intersection(rt1, rt2) == (jl_value_t*)jl_bottom_type) // types are disjoint (exhaustive test) @@ -2632,12 +2620,13 @@ static Value *emit_f_is(const jl_cgval_t &arg1, const jl_cgval_t &arg2, jl_codec Value *varg2 = arg2.constant ? literal_pointer_val(arg2.constant) : arg2.V; assert(varg1 && varg2 && (arg1.isboxed || arg1.TIndex) && (arg2.isboxed || arg2.TIndex) && "Only boxed types are valid for pointer comparison."); - return builder.CreateICmpEQ(varg1, varg2); + return builder.CreateICmpEQ(decay_derived(varg1), + decay_derived(varg2)); } JL_FEAT_REQUIRE(ctx, runtime); - Value *varg1 = boxed(arg1, ctx); - Value *varg2 = boxed(arg2, ctx, false); // potentially unrooted! + Value *varg1 = mark_callee_rooted(boxed(arg1, ctx)); + Value *varg2 = mark_callee_rooted(boxed(arg2, ctx, false)); // potentially unrooted! #if JL_LLVM_VERSION >= 30700 return builder.CreateTrunc(builder.CreateCall(prepare_call(jlegal_func), {varg1, varg2}), T_int1); #else @@ -2701,11 +2690,12 @@ static bool emit_builtin_call(jl_cgval_t *ret, jl_value_t *f, jl_value_t **args, if (jl_subtype(ty, (jl_value_t*)jl_type_type)) { *ret = emit_expr(args[1], ctx); Value *rt_ty = boxed(emit_expr(args[2], ctx), ctx); + Value *rt_val = boxed(*ret, ctx); JL_FEAT_REQUIRE(ctx, runtime); #if JL_LLVM_VERSION >= 30700 - builder.CreateCall(prepare_call(jltypeassert_func), {boxed(*ret, ctx), rt_ty}); + builder.CreateCall(prepare_call(jltypeassert_func), {rt_val, rt_ty}); #else - builder.CreateCall2(prepare_call(jltypeassert_func), boxed(*ret, ctx), rt_ty); + builder.CreateCall2(prepare_call(jltypeassert_func), rt_val, rt_ty); #endif JL_GC_POP(); return true; @@ -2751,7 +2741,7 @@ static bool emit_builtin_call(jl_cgval_t *ret, jl_value_t *f, jl_value_t **args, else if (f==jl_builtin__apply && nargs==2 && ctx->vaStack && slot_eq(args[2], ctx->vaSlot)) { // turn Core._apply(f, Tuple) ==> f(Tuple...) using the jlcall calling convention if Tuple is the vaStack allocation - Value *theF = boxed(emit_expr(args[1], ctx), ctx); + Value *theF = maybe_decay_untracked(boxed(emit_expr(args[1], ctx), ctx)); Value *nva = emit_n_varargs(ctx); #ifdef _P64 nva = builder.CreateTrunc(nva, T_int32); @@ -2920,7 +2910,7 @@ static bool emit_builtin_call(jl_cgval_t *ret, jl_value_t *f, jl_value_t **args, PHINode *data_owner = NULL; // owner object against which the write barrier must check if (isboxed) { // if not boxed we don't need a write barrier assert(ary.isboxed); - Value *aryv = boxed(ary, ctx); + Value *aryv = maybe_decay_untracked(boxed(ary, ctx)); Value *flags = emit_arrayflags(ary, ctx); // the owner of the data is ary itself except if ary->how == 3 flags = builder.CreateAnd(flags, 3); @@ -2936,9 +2926,9 @@ static bool emit_builtin_call(jl_cgval_t *ret, jl_value_t *f, jl_value_t **args, own_ptr = tbaa_decorate(tbaa_const, builder.CreateLoad( emit_bitcast( builder.CreateConstGEP1_32( - emit_bitcast(aryv, T_pint8), + emit_bitcast(decay_derived(aryv), T_pint8), jl_array_data_owner_offset(nd)), - T_ppjlvalue))); + T_pprjlvalue))); } else { #if JL_LLVM_VERSION >= 30700 @@ -2953,11 +2943,11 @@ static bool emit_builtin_call(jl_cgval_t *ret, jl_value_t *f, jl_value_t **args, } builder.CreateBr(mergeBB); builder.SetInsertPoint(mergeBB); - data_owner = builder.CreatePHI(T_pjlvalue, 2); + data_owner = builder.CreatePHI(T_prjlvalue, 2); data_owner->addIncoming(aryv, curBB); data_owner->addIncoming(own_ptr, ownedBB); } - typed_store(emit_arrayptr(ary,args[1],ctx), idx, v, + typed_store(emit_arrayptr(ary,args[1],ctx,isboxed), idx, v, ety, ctx, !isboxed ? tbaa_arraybuf : tbaa_ptrarraybuf, data_owner, 0, false); // don't need to root the box if we had to make one since it's being stored in the array immediatly } @@ -3135,7 +3125,7 @@ static bool emit_builtin_call(jl_cgval_t *ret, jl_value_t *f, jl_value_t **args, Value *types_len = emit_datatype_nfields(tyv); Value *idx = emit_unbox(T_size, emit_expr(args[2], ctx), (jl_value_t*)jl_long_type); emit_bounds_check(ty, (jl_value_t*)jl_datatype_type, idx, types_len, ctx); - Value *fieldtyp = tbaa_decorate(tbaa_const, builder.CreateLoad(builder.CreateGEP(emit_bitcast(types_svec, T_ppjlvalue), idx))); + Value *fieldtyp = tbaa_decorate(tbaa_const, builder.CreateLoad(builder.CreateGEP(decay_derived(emit_bitcast(types_svec, T_pprjlvalue)), idx))); *ret = mark_julia_type(fieldtyp, true, expr_type(expr, ctx), ctx); JL_GC_POP(); return true; @@ -3234,34 +3224,42 @@ static bool emit_builtin_call(jl_cgval_t *ret, jl_value_t *f, jl_value_t **args, return false; } -static Value *emit_jlcall(Value *theFptr, Value *theF, jl_value_t **args, +static Value *emit_jlcall(Value *theFptr, Value *theF, jl_cgval_t *args, size_t nargs, jl_codectx_t *ctx) { // emit arguments - Value *myargs; - if (nargs > 0) { - jl_cgval_t *anArg = (jl_cgval_t*)alloca(sizeof(jl_cgval_t) * nargs); - const jl_cgval_t **largs = (const jl_cgval_t**)alloca(sizeof(jl_cgval_t*) * nargs); - for(size_t i=0; i < nargs; i++) { - anArg[i] = emit_expr(args[i], ctx); - largs[i] = &anArg[i]; - } - // put into argument space - myargs = make_jlcall(makeArrayRef(largs, nargs), ctx); - } - else { - myargs = Constant::getNullValue(T_ppjlvalue); - } -#if JL_LLVM_VERSION >= 30700 - Value *result = builder.CreateCall(prepare_call(theFptr), {theF, myargs, - ConstantInt::get(T_int32,nargs)}); -#else - Value *result = builder.CreateCall3(prepare_call(theFptr), theF, myargs, - ConstantInt::get(T_int32,nargs)); -#endif + SmallVector theArgs; + if (theF) + theArgs.push_back(theF); + for(size_t i=0; i < nargs; i++) { + Value *arg = maybe_decay_untracked(boxed(args[i], ctx, false)); + theArgs.push_back(arg); + } + SmallVector argsT; + for(size_t i=0; i < nargs + (theF != nullptr); i++) { + argsT.push_back(T_prjlvalue); + } + FunctionType *FTy = FunctionType::get(T_prjlvalue, argsT, false); + CallInst *result = builder.CreateCall(FTy, + builder.CreateBitCast(prepare_call(theFptr), FTy->getPointerTo()), + theArgs); + if (theF) + result->setCallingConv(JLCALL_F_CC); + else + result->setCallingConv(JLCALL_CC); return result; } + +static Value *emit_jlcall(Value *theFptr, Value *theF, jl_value_t **args, + size_t nargs, jl_codectx_t *ctx) +{ + jl_cgval_t *cgargs = (jl_cgval_t*)alloca(sizeof(jl_cgval_t) * nargs); + for (size_t i = 0; i < nargs; ++i) + cgargs[i] = emit_expr(args[i], ctx); + return emit_jlcall(theFptr, theF, cgargs, nargs, ctx); +} + static jl_cgval_t emit_call_function_object(jl_method_instance_t *li, const jl_cgval_t &theF, jl_llvm_functions_t decls, jl_value_t **args, size_t nargs, jl_value_t *callexpr, jl_codectx_t *ctx) { @@ -3294,7 +3292,7 @@ static jl_cgval_t emit_call_function_object(jl_method_instance_t *li, const jl_c break; case jl_returninfo_t::SRet: result = emit_static_alloca(cft->getParamType(0)->getContainedType(0), ctx); - argvals[idx] = result; + argvals[idx] = decay_derived(result); idx++; break; case jl_returninfo_t::Union: @@ -3320,16 +3318,16 @@ static jl_cgval_t emit_call_function_object(jl_method_instance_t *li, const jl_c assert(idx < nfargs); Type *at = cft->getParamType(idx); if (isboxed) { - assert(at == T_pjlvalue && et == T_pjlvalue); + assert(at == T_prjlvalue && (et == T_pjlvalue || et == T_prjlvalue)); jl_cgval_t origval = i == 0 ? theF : emit_expr(args[i], ctx); - argvals[idx] = boxed(origval, ctx); + argvals[idx] = maybe_decay_untracked(boxed(origval, ctx)); } else if (et->isAggregateType()) { // can lazy load on demand, no copy needed - assert(at == PointerType::get(et, 0)); + assert(at == PointerType::get(et, AddressSpace::Derived)); jl_cgval_t arg = i == 0 ? theF : emit_expr(args[i], ctx); assert(arg.ispointer()); - argvals[idx] = data_pointer(arg, ctx, at); + argvals[idx] = decay_derived(data_pointer(arg, ctx, at)); push_gc_use(gc_uses, arg); } else { @@ -3358,22 +3356,25 @@ static jl_cgval_t emit_call_function_object(jl_method_instance_t *li, const jl_c case jl_returninfo_t::SRet: retval = mark_julia_slot(result, jlretty, NULL, tbaa_stack); break; - case jl_returninfo_t::Union: - retval = mark_julia_slot(builder.CreateExtractValue(call, 0), + case jl_returninfo_t::Union: { + Value *box = builder.CreateExtractValue(call, 0); + Value *tindex = builder.CreateExtractValue(call, 1); + Value *derived = builder.CreateSelect( + builder.CreateICmpEQ( + builder.CreateAnd(tindex, ConstantInt::get(T_int8, 0x80)), + ConstantInt::get(T_int8, 0)), + decay_derived(builder.CreateBitCast(argvals[0], T_pjlvalue)), + decay_derived(box) + ); + retval = mark_julia_slot(derived, jlretty, - builder.CreateExtractValue(call, 1), + tindex, tbaa_stack); // root this, if the return value was a box (tindex & 0x80) != 0 retval.gcroot = emit_local_root(ctx); - builder.CreateStore( - builder.CreateSelect( - builder.CreateICmpEQ( - builder.CreateAnd(retval.TIndex, ConstantInt::get(T_int8, 0x80)), - ConstantInt::get(T_int8, 0)), - V_null, - retval.V), - retval.gcroot); + builder.CreateStore(box, retval.gcroot); break; + } case jl_returninfo_t::Ghosts: retval = mark_julia_slot(NULL, jlretty, call, tbaa_stack); break; @@ -3458,7 +3459,8 @@ static jl_cgval_t emit_call(jl_expr_t *ex, jl_codectx_t *ctx) std::map::iterator it = builtin_func_map.find(jl_get_builtin_fptr(f)); if (it != builtin_func_map.end()) { theFptr = (*it).second; - result = mark_julia_type(emit_jlcall(theFptr, V_null, &args[1], nargs, ctx), true, expr_type(expr,ctx), ctx); + result = mark_julia_type(emit_jlcall(theFptr, + maybe_decay_untracked(V_null), &args[1], nargs, ctx), true, expr_type(expr,ctx), ctx); JL_GC_POP(); return result; } @@ -3476,21 +3478,7 @@ static jl_cgval_t emit_call(jl_expr_t *ex, jl_codectx_t *ctx) // emit function and arguments nargs++; // add function to nargs count - jl_cgval_t *anArg = (jl_cgval_t*)alloca(sizeof(jl_cgval_t) * nargs); - const jl_cgval_t **largs = (const jl_cgval_t**)alloca(sizeof(jl_cgval_t*) * nargs); - for(size_t i=0; i < nargs; i++) { - anArg[i] = emit_expr(args[i], ctx); - largs[i] = &anArg[i]; - } - // put into argument space - Value *myargs = make_jlcall(makeArrayRef(largs, nargs), ctx); -#if JL_LLVM_VERSION >= 30700 - Value *callval = builder.CreateCall(prepare_call(jlapplygeneric_func), - {myargs, ConstantInt::get(T_int32, nargs)}); -#else - Value *callval = builder.CreateCall2(prepare_call(jlapplygeneric_func), - myargs, ConstantInt::get(T_int32, nargs)); -#endif + Value *callval = emit_jlcall(jlapplygeneric_func, nullptr, args, nargs, ctx); result = mark_julia_type(callval, true, expr_type(expr, ctx), ctx); JL_GC_POP(); @@ -3505,7 +3493,8 @@ static void undef_var_error_ifnot(Value *ok, jl_sym_t *name, jl_codectx_t *ctx) BasicBlock *ifok = BasicBlock::Create(jl_LLVMContext, "ok"); builder.CreateCondBr(ok, ifok, err); builder.SetInsertPoint(err); - builder.CreateCall(prepare_call(jlundefvarerror_func), literal_pointer_val((jl_value_t*)name)); + builder.CreateCall(prepare_call(jlundefvarerror_func), + mark_callee_rooted(literal_pointer_val((jl_value_t*)name))); builder.CreateUnreachable(); ctx->f->getBasicBlockList().push_back(ifok); builder.SetInsertPoint(ifok); @@ -3542,11 +3531,11 @@ static Value *global_binding_pointer(jl_module_t *m, jl_sym_t *s, builder.SetInsertPoint(not_found); #if JL_LLVM_VERSION >= 30700 Value *bval = builder.CreateCall(prepare_call(jlgetbindingorerror_func), - {literal_pointer_val((jl_value_t*)m), + {maybe_decay_untracked(literal_pointer_val((jl_value_t*)m)), literal_pointer_val((jl_value_t*)s)}); #else Value *bval = builder.CreateCall2(prepare_call(jlgetbindingorerror_func), - literal_pointer_val((jl_value_t*)m), + maybe_decay_untracked(literal_pointer_val((jl_value_t*)m)), literal_pointer_val((jl_value_t*)s)); #endif builder.CreateStore(bval, bindinggv); @@ -3556,7 +3545,7 @@ static Value *global_binding_pointer(jl_module_t *m, jl_sym_t *s, PHINode *p = builder.CreatePHI(T_pjlvalue, 2); p->addIncoming(cachedval, currentbb); p->addIncoming(bval, not_found); - return julia_binding_gv(emit_bitcast(p, T_ppjlvalue)); + return julia_binding_gv(emit_bitcast(p, T_pprjlvalue)); } if (b->deprecated) cg_bdw(b, ctx); } @@ -3566,11 +3555,11 @@ static Value *global_binding_pointer(jl_module_t *m, jl_sym_t *s, static jl_cgval_t emit_checked_var(Value *bp, jl_sym_t *name, jl_codectx_t *ctx, bool isvol, MDNode *tbaa) { - assert(bp->getType() == T_ppjlvalue); + assert(bp->getType() == T_pprjlvalue); Instruction *v = builder.CreateLoad(bp, isvol); if (tbaa) tbaa_decorate(tbaa, v); - undef_var_error_ifnot(builder.CreateICmpNE(v, V_null), name, ctx); + undef_var_error_ifnot(builder.CreateICmpNE(v, maybe_decay_untracked(V_null)), name, ctx); return mark_julia_type(v, true, jl_any_type, ctx); } @@ -3583,8 +3572,8 @@ static jl_cgval_t emit_sparam(size_t i, jl_codectx_t *ctx) } } assert(ctx->spvals_ptr != NULL); - Value *bp = builder.CreateConstInBoundsGEP1_32(LLVM37_param(T_pjlvalue) - emit_bitcast(ctx->spvals_ptr, T_ppjlvalue), + Value *bp = builder.CreateConstInBoundsGEP1_32(LLVM37_param(T_prjlvalue) + emit_bitcast(decay_derived(ctx->spvals_ptr), T_pprjlvalue), i + sizeof(jl_svec_t) / sizeof(jl_value_t*)); return mark_julia_type(tbaa_decorate(tbaa_const, builder.CreateLoad(bp)), true, jl_any_type, ctx, false); } @@ -3619,7 +3608,7 @@ static jl_cgval_t emit_isdefined(jl_value_t *sym, jl_codectx_t *ctx) } if (vi.boxroot != NULL) { Value *boxed = builder.CreateLoad(vi.boxroot, vi.isVolatile); - Value *box_isnull = builder.CreateICmpNE(boxed, V_null); + Value *box_isnull = builder.CreateICmpNE(boxed, maybe_decay_untracked(V_null)); if (vi.pTIndex) { // value is either boxed in the stack slot, or unboxed in value // as indicated by testing (pTIndex & 0x80) @@ -3660,7 +3649,7 @@ static jl_cgval_t emit_isdefined(jl_value_t *sym, jl_codectx_t *ctx) literal_pointer_val((jl_value_t*)modu), literal_pointer_val((jl_value_t*)name) }); - isnull = builder.CreateICmpNE(v, V_null); + isnull = builder.CreateICmpNE(v, ConstantInt::get(T_int32, 0)); } } return mark_julia_type(isnull, false, jl_bool_type, ctx); @@ -3715,8 +3704,9 @@ static jl_cgval_t emit_local(jl_value_t *slotload, jl_codectx_t *ctx) if (vi.boxroot != NULL) { Value *boxed = builder.CreateLoad(vi.boxroot, vi.isVolatile); Value *box_isnull; + v.gcroot = vi.boxroot; if (vi.usedUndef) - box_isnull = builder.CreateICmpNE(boxed, V_null); + box_isnull = builder.CreateICmpNE(boxed, maybe_decay_untracked(V_null)); if (vi.pTIndex) { // value is either boxed in the stack slot, or unboxed in value // as indicated by testing (pTIndex & 0x80) @@ -3725,9 +3715,11 @@ static jl_cgval_t emit_local(jl_value_t *slotload, jl_codectx_t *ctx) ConstantInt::get(T_int8, 0)); if (vi.usedUndef) isnull = builder.CreateSelect(load_unbox, isnull, box_isnull); - if (v.V) // v.V will be null if it is a union of all ghost values - v.V = builder.CreateSelect(load_unbox, emit_bitcast(v.V, boxed->getType()), boxed); - else + if (v.V) { // v.V will be null if it is a union of all ghost values + boxed = decay_derived(boxed); + v.V = builder.CreateSelect(load_unbox, emit_bitcast( + decay_derived(v.V), boxed->getType()), boxed); + } else v.V = boxed; v = update_julia_type(v, typ, ctx); } @@ -3792,7 +3784,7 @@ static Value *compute_box_tindex(Value *datatype, jl_value_t *supertype, jl_valu for_each_uniontype_small( [&](unsigned idx, jl_datatype_t *jt) { if (jl_subtype((jl_value_t*)jt, supertype)) { - Value *cmp = builder.CreateICmpEQ(literal_pointer_val((jl_value_t*)jt), datatype); + Value *cmp = builder.CreateICmpEQ(maybe_decay_untracked(literal_pointer_val((jl_value_t*)jt)), datatype); tindex = builder.CreateSelect(cmp, ConstantInt::get(T_int8, idx), tindex); } }, @@ -3849,14 +3841,21 @@ static void emit_assignment(jl_value_t *l, jl_value_t *r, jl_codectx_t *ctx) emit_unionmove(dest, slot, isboxed, false, NULL, ctx); Value *gcroot = NULL; if (isboxed) { - if (slot.gcroot) + Value *box; + if (slot.gcroot) { gcroot = emit_local_root(ctx); - else + // This might load the wrong object in general, but if it gets selected, below, + // we know that it was in fact the one we wanted. + box = builder.CreateLoad(slot.gcroot); + } else { gcroot = emit_static_alloca(T_pjlvalue); - Value *box = builder.CreateSelect(isboxed, emit_bitcast(slot.V, T_pjlvalue), V_null); + box = V_null; + } builder.CreateStore(box, gcroot); if (dest) // might be all ghost values - dest = builder.CreateSelect(isboxed, box, emit_bitcast(dest, box->getType())); + dest = builder.CreateSelect(isboxed, + decay_derived(box), + emit_bitcast(decay_derived(dest), box->getType())); else dest = box; } @@ -3894,7 +3893,7 @@ static void emit_assignment(jl_value_t *l, jl_value_t *r, jl_codectx_t *ctx) if (bp != NULL) { // it's a global JL_FEAT_REQUIRE(ctx, runtime); assert(bnd); - Value *rval = boxed(emit_expr(r, ctx), ctx, false); // no root needed since this is about to be assigned to a global + Value *rval = mark_callee_rooted(boxed(emit_expr(r, ctx), ctx, false)); // no root needed since this is about to be assigned to a global #if JL_LLVM_VERSION >= 30700 builder.CreateCall(prepare_call(jlcheckassign_func), {literal_pointer_val(bnd), @@ -3923,7 +3922,7 @@ static void emit_assignment(jl_value_t *l, jl_value_t *r, jl_codectx_t *ctx) // convert rval-type to lval-type jl_value_t *slot_type = vi.value.typ; - rval_info = convert_julia_type(rval_info, slot_type, ctx, /*needs-root*/false); + rval_info = convert_julia_type(rval_info, slot_type, ctx, /*needs-root*/true); if (rval_info.typ == jl_bottom_type) return; @@ -3966,16 +3965,17 @@ static void emit_assignment(jl_value_t *l, jl_value_t *r, jl_codectx_t *ctx) isboxed = builder.CreateICmpNE( builder.CreateAnd(rval_info.TIndex, ConstantInt::get(T_int8, 0x80)), ConstantInt::get(T_int8, 0)); - rval = V_null; - if (rval_info.ispointer() && rval_info.V != NULL && !isa(rval_info.V)) // might be all ghost values or otherwise definitely not boxed - rval = builder.CreateSelect(isboxed, emit_bitcast(rval_info.V, rval->getType()), rval); + rval = maybe_decay_untracked(V_null); + if (rval_info.ispointer() && rval_info.V != NULL && !isa(rval_info.V) && + !(isa(isboxed) && cast(isboxed)->isZero())) // might be all ghost values or otherwise definitely not boxed + rval = builder.CreateLoad(rval_info.gcroot); assert(!vi.value.constant); } else { assert(!vi.pTIndex || rval_info.isboxed || rval_info.constant); - rval = boxed(rval_info, ctx, false); + rval = maybe_decay_untracked(boxed(rval_info, ctx, false)); } - builder.CreateStore(rval, vi.boxroot, vi.isVolatile); + builder.CreateStore(maybe_decay_untracked(rval), vi.boxroot, vi.isVolatile); } // store unboxed variables @@ -4061,7 +4061,8 @@ static Value *emit_condition(const jl_cgval_t &condV, const std::string &msg, return builder.CreateXor(builder.CreateTrunc(cond, T_int1), ConstantInt::get(T_int1, 1)); } if (condV.isboxed) { - return builder.CreateICmpEQ(boxed(condV, ctx), literal_pointer_val(jl_false)); + return builder.CreateICmpEQ(boxed(condV, ctx), + maybe_decay_untracked(literal_pointer_val(jl_false))); } // not a boolean return ConstantInt::get(T_int1, 0); // TODO: replace with Undef @@ -4093,7 +4094,7 @@ static void emit_stmtpos(jl_value_t *expr, jl_codectx_t *ctx) // create a new uninitialized variable Value *lv = vi.boxroot; if (lv != NULL) - builder.CreateStore(V_null, lv); + builder.CreateStore(maybe_decay_untracked(V_null), lv); if (lv == NULL || vi.pTIndex != NULL) store_def_flag(vi, false); } @@ -4257,15 +4258,16 @@ static jl_cgval_t emit_expr(jl_value_t *expr, jl_codectx_t *ctx) name = literal_pointer_val((jl_value_t*)slot_symbol(sl, ctx)); } if (bp) { - Value *mdargs[5] = { name, literal_pointer_val((jl_value_t*)mod), bp, bp_owner, literal_pointer_val(bnd) }; + Value *mdargs[5] = { name, literal_pointer_val((jl_value_t*)mod), bp, + maybe_decay_untracked(bp_owner), literal_pointer_val(bnd) }; jl_cgval_t gf = mark_julia_type( builder.CreateCall(prepare_call(jlgenericfunction_func), makeArrayRef(mdargs)), true, jl_function_type, ctx); if (jl_expr_nargs(ex) == 1) return gf; } - Value *a1 = boxed(emit_expr(args[1], ctx), ctx); - Value *a2 = boxed(emit_expr(args[2], ctx), ctx); + Value *a1 = maybe_decay_untracked(boxed(emit_expr(args[1], ctx), ctx)); + Value *a2 = maybe_decay_untracked(boxed(emit_expr(args[2], ctx), ctx)); Value *mdargs[4] = { /*argdata*/a1, /*code*/a2, @@ -4319,7 +4321,8 @@ static jl_cgval_t emit_expr(jl_value_t *expr, jl_codectx_t *ctx) } } jl_cgval_t ast = emit_expr(arg, ctx); - return mark_julia_type(builder.CreateCall(prepare_call(jlcopyast_func), boxed(ast, ctx)), true, ast.typ, ctx); + return mark_julia_type(builder.CreateCall(prepare_call(jlcopyast_func), + maybe_decay_untracked(boxed(ast, ctx))), true, ast.typ, ctx); } else if (head == simdloop_sym) { llvm::annotateSimdLoop(builder.GetInsertBlock()); @@ -4378,7 +4381,7 @@ static void allocate_gc_frame(BasicBlock *b0, jl_codectx_t *ctx) ctx->ptlsStates = builder.CreateCall(prepare_call(jltls_states_func)); int nthfield = offsetof(jl_tls_states_t, safepoint) / sizeof(void*); ctx->signalPage = emit_nthptr_recast(ctx->ptlsStates, nthfield, tbaa_const, - PointerType::get(T_psize, 0)); + PointerType::get(T_psize, 0), false); } static void emit_last_age_field(jl_codectx_t *ctx) @@ -4396,6 +4399,8 @@ static Function *gen_cfun_wrapper(jl_function_t *ff, jl_value_t *jlrettype, jl_t Type *crt = julia_struct_to_llvm(jlrettype, NULL, &toboxed); if (crt == NULL) jl_error("cfunction: return type doesn't correspond to a C type"); + else if (toboxed) + crt = T_prjlvalue; size_t nargs = jl_nparams(argt); function_sig_t sig(crt, jlrettype, toboxed, argt->parameters, NULL, nargs, false, CallingConv::C, false); @@ -4480,7 +4485,7 @@ static Function *gen_cfun_wrapper(jl_function_t *ff, jl_value_t *jlrettype, jl_t Value *lam_max = builder.CreateLoad( builder.CreateConstInBoundsGEP1_32( LLVM37_param(T_size) - emit_bitcast(literal_pointer_val((jl_value_t*)lam), T_psize), + emit_bitcast(decay_derived(literal_pointer_val((jl_value_t*)lam)), T_psize), offsetof(jl_method_instance_t, max_world) / sizeof(size_t))); // XXX: age is always OK if we don't have a TLS. This is a hack required due to `@threadcall` abuse. // and adds quite a bit of complexity here, even though it's still wrong @@ -4532,7 +4537,7 @@ static Function *gen_cfun_wrapper(jl_function_t *ff, jl_value_t *jlrettype, jl_t if (sig.sret && jlfunc_sret) result = emit_bitcast(sretPtr, cft->getParamType(0)); else - result = builder.CreateAlloca(cft->getParamType(0)->getContainedType(0)); + result = decay_derived(builder.CreateAlloca(cft->getParamType(0)->getContainedType(0))); args.push_back(result); FParamIndex++; } @@ -4542,10 +4547,11 @@ static Function *gen_cfun_wrapper(jl_function_t *ff, jl_value_t *jlrettype, jl_t theFptr = lam ? (Function*)lam->functionObjectsDecls.functionObject : NULL; specsig = false; jlfunc_sret = false; - myargs = CallInst::Create(prepare_call(jlcall_frame_func), - ConstantInt::get(T_int32, nargs + 1), - "", - /*InsertBefore*/ctx.ptlsStates); + myargs = new AllocaInst(T_prjlvalue, +#if JL_LLVM_VERSION >= 50000 + 0, +#endif + ConstantInt::get(T_int32, nargs + 1), "jlcall", /*InsertBefore*/ctx.ptlsStates); FParamIndex++; // leave room for writing the ff object at the beginning } @@ -4560,12 +4566,12 @@ static Function *gen_cfun_wrapper(jl_function_t *ff, jl_value_t *jlrettype, jl_t jargty = jl_tparam0(jargty); if (jargty == (jl_value_t*)jl_any_type) { inputarg = mark_julia_type( - builder.CreateLoad(builder.CreatePointerCast(val, T_ppjlvalue)), + builder.CreateLoad(emit_bitcast(val, T_pprjlvalue)), true, jargty, &ctx); } else if (!jl_isbits(jargty)) { // must be a jl_value_t* (because it's mutable or contains gc roots) - inputarg = mark_julia_type(builder.CreatePointerCast(val, T_pjlvalue), true, jargty, &ctx); + inputarg = mark_julia_type(maybe_decay_untracked(emit_bitcast(val, T_prjlvalue)), true, jargty, &ctx); } else { bool isboxed; @@ -4576,7 +4582,7 @@ static Function *gen_cfun_wrapper(jl_function_t *ff, jl_value_t *jlrettype, jl_t inputarg = ghostValue(jargty); } else { - val = builder.CreatePointerCast(val, T->getPointerTo()); + val = emit_bitcast(val, T->getPointerTo()); val = builder.CreateAlignedLoad(val, 1); // make no alignment assumption about pointer from C inputarg = mark_julia_type(val, false, jargty, &ctx); } @@ -4593,7 +4599,7 @@ static Function *gen_cfun_wrapper(jl_function_t *ff, jl_value_t *jlrettype, jl_t // something of type T // undo whatever we might have done to this poor argument if (sig.byRefList.at(i)) { - assert(val->getType() == sig.fargt[i]->getPointerTo()); + assert(cast(val->getType())->getElementType() == sig.fargt[i]); val = builder.CreateAlignedLoad(val, 1); // unknown alignment from C } else { @@ -4802,10 +4808,12 @@ static Function *gen_cfun_wrapper(jl_function_t *ff, jl_value_t *jlrettype, jl_t allocate_gc_frame(b0, &ctx2); Function::arg_iterator AI = gf_thunk->arg_begin(); - Value *myargs = CallInst::Create(prepare_call(jlcall_frame_func), - ConstantInt::get(T_int32, nargs + 1), - "", - /*InsertBefore*/ctx2.ptlsStates); + + Value *myargs = new AllocaInst(T_prjlvalue, +#if JL_LLVM_VERSION >= 50000 + 0, +#endif + ConstantInt::get(T_int32, nargs + 1), "jlcall", ctx2.ptlsStates); if (cc == jl_returninfo_t::SRet || cc == jl_returninfo_t::Union) ++AI; for (size_t i = 0; i < nargs + 1; i++) { @@ -4822,7 +4830,7 @@ static Function *gen_cfun_wrapper(jl_function_t *ff, jl_value_t *jlrettype, jl_t ++AI; Type *at = arg_v->getType(); if (isboxed) { - assert(at == T_pjlvalue && et == T_pjlvalue); + assert(at == T_prjlvalue && et == T_pjlvalue); arg_box = arg_v; } else if (et->isAggregateType()) { @@ -4835,7 +4843,7 @@ static Function *gen_cfun_wrapper(jl_function_t *ff, jl_value_t *jlrettype, jl_t (void)at; } Value *argn = builder.CreateConstInBoundsGEP1_32(LLVM37_param(NULL) myargs, i); - builder.CreateStore(arg_box, argn); + builder.CreateStore(maybe_decay_untracked(arg_box), argn); } assert(AI == gf_thunk->arg_end()); Value *nargs_v = ConstantInt::get(T_int32, nargs + 1); @@ -5039,7 +5047,7 @@ static Function *gen_jlcall_wrapper(jl_method_instance_t *lam, const jl_returnin break; case jl_returninfo_t::SRet: result = builder.CreateAlloca(ftype->getParamType(0)->getContainedType(0)); - args[idx] = result; + args[idx] = decay_derived(result); idx++; break; case jl_returninfo_t::Union: @@ -5065,7 +5073,7 @@ static Function *gen_jlcall_wrapper(jl_method_instance_t *lam, const jl_returnin theArg = builder.CreateLoad(argPtr); } if (lty != NULL && !isboxed) { - theArg = builder.CreatePointerCast(theArg, PointerType::get(lty, 0)); + theArg = decay_derived(emit_bitcast(theArg, PointerType::get(lty, 0))); if (!lty->isAggregateType()) // keep "aggregate" type values in place as pointers theArg = builder.CreateAlignedLoad(theArg, julia_alignment(theArg, ty, 0)); } @@ -5089,10 +5097,14 @@ static Function *gen_jlcall_wrapper(jl_method_instance_t *lam, const jl_returnin retval = mark_julia_slot(result, jlretty, NULL, tbaa_stack); break; case jl_returninfo_t::Union: - retval = mark_julia_slot(builder.CreateExtractValue(call, 0), + // result is technically not right here, but we only need to look at it + // for the unboxed values, so it's ok. + retval = mark_julia_slot(result, jlretty, builder.CreateExtractValue(call, 1), tbaa_stack); + retval.gcroot = emit_local_root(&ctx); + builder.CreateStore(builder.CreateExtractValue(call, 0), retval.gcroot); break; case jl_returninfo_t::Ghosts: retval = mark_julia_slot(NULL, jlretty, call, tbaa_stack); @@ -5119,7 +5131,7 @@ static jl_returninfo_t get_specsig_function(Module *M, const std::string &name, props.cc = jl_returninfo_t::Union; Type *AT = ArrayType::get(T_int8, props.union_bytes); fsig.push_back(AT->getPointerTo()); - Type *pair[] = { T_pjlvalue, T_int8 }; + Type *pair[] = { T_prjlvalue, T_int8 }; rt = StructType::get(jl_LLVMContext, makeArrayRef(pair)); } else if (allunbox) { @@ -5127,7 +5139,7 @@ static jl_returninfo_t get_specsig_function(Module *M, const std::string &name, rt = T_int8; } else { - rt = T_pjlvalue; + rt = T_prjlvalue; } } else { @@ -5136,12 +5148,14 @@ static jl_returninfo_t get_specsig_function(Module *M, const std::string &name, if (!retboxed) { if (rt != T_void && deserves_sret(jlrettype, rt)) { props.cc = jl_returninfo_t::SRet; - fsig.push_back(rt->getPointerTo()); + fsig.push_back(rt->getPointerTo(AddressSpace::Derived)); rt = T_void; } else { props.cc = jl_returninfo_t::Register; } + } else { + rt = T_prjlvalue; } } #if JL_LLVM_VERSION >= 50000 @@ -5160,7 +5174,8 @@ static jl_returninfo_t get_specsig_function(Module *M, const std::string &name, } for (size_t i = 0; i < jl_nparams(sig); i++) { jl_value_t *jt = jl_tparam(sig, i); - Type *ty = julia_type_to_llvm(jt); + bool isboxed; + Type *ty = julia_type_to_llvm(jt, &isboxed); if (type_is_ghost(ty)) continue; if (ty->isAggregateType()) { // aggregate types are passed by pointer @@ -5168,8 +5183,10 @@ static jl_returninfo_t get_specsig_function(Module *M, const std::string &name, #if JL_LLVM_VERSION >= 30500 attributes = attributes.addAttribute(jl_LLVMContext, fsig.size() + 1, Attribute::ReadOnly); #endif - ty = PointerType::get(ty, 0); + ty = PointerType::get(ty, AddressSpace::Derived); } + if (isboxed) + ty = PointerType::get(cast(ty)->getElementType(), AddressSpace::Tracked); fsig.push_back(ty); } FunctionType *ftype = FunctionType::get(rt, fsig, false); @@ -5712,11 +5729,15 @@ static std::unique_ptr emit_function( (va && (int)i == ctx.vaSlot && varinfo.escapes) || // or it's the va arg tuple (s != unused_sym && i == 0)) { // or it is the first argument (which isn't in `argArray`) #if JL_LLVM_VERSION >= 50000 - AllocaInst *av = new AllocaInst(T_pjlvalue, 0, + AllocaInst *av = new AllocaInst(T_prjlvalue, 0, #else - AllocaInst *av = new AllocaInst(T_pjlvalue, + AllocaInst *av = new AllocaInst(T_prjlvalue, #endif jl_symbol_name(s), /*InsertBefore*/ctx.ptlsStates); + StoreInst *SI = new StoreInst( + ConstantPointerNull::get(cast(T_prjlvalue)), av, + false); + SI->insertAfter(ctx.ptlsStates); varinfo.boxroot = av; #if JL_LLVM_VERSION >= 30600 if (ctx.debug_enabled && varinfo.dinfo) { @@ -5852,14 +5873,14 @@ static std::unique_ptr emit_function( if (vi.boxroot != NULL) { #if JL_LLVM_VERSION >= 30700 Value *restTuple = - builder.CreateCall(prepare_call(jltuple_func), {V_null, + builder.CreateCall(prepare_call(jltuple_func), {maybe_decay_untracked(V_null), builder.CreateGEP(argArray, ConstantInt::get(T_size,nreq-1)), builder.CreateSub(argCount, ConstantInt::get(T_int32,nreq-1))}); #else Value *restTuple = - builder.CreateCall3(prepare_call(jltuple_func), V_null, + builder.CreateCall3(prepare_call(jltuple_func), maybe_decay_untracked(V_null), builder.CreateGEP(argArray, ConstantInt::get(T_size,nreq-1)), builder.CreateSub(argCount, @@ -6205,18 +6226,25 @@ static std::unique_ptr emit_function( tindex = retvalinfo.TIndex; if (retvalinfo.V == NULL) { // treat this as a simple Ghosts - data = V_null; + data = maybe_decay_untracked(V_null); sret = NULL; } else { - data = emit_bitcast(sret, T_pjlvalue); + data = maybe_decay_untracked(V_null); if (retvalinfo.ispointer() && !isa(retvalinfo.V)) { // also need to account for the possibility the return object is boxed // and avoid / skip copying it to the stack isboxed_union = builder.CreateICmpNE( builder.CreateAnd(tindex, ConstantInt::get(T_int8, 0x80)), ConstantInt::get(T_int8, 0)); - data = builder.CreateSelect(isboxed_union, emit_bitcast(retvalinfo.V, T_pjlvalue), data); + // Lift the select, because gcroot may be NULL if + // there's no boxed value. + if (isa(isboxed_union)) + data = cast(isboxed_union)->isZero() ? data : builder.CreateLoad(retvalinfo.gcroot); + else + data = builder.CreateSelect(isboxed_union, + builder.CreateLoad(retvalinfo.gcroot), + data); } } } @@ -6225,7 +6253,7 @@ static std::unique_ptr emit_function( //assert(retvalinfo.isboxed); tindex = compute_tindex_unboxed(retvalinfo, jlrettype, &ctx); tindex = builder.CreateOr(tindex, ConstantInt::get(T_int8, 0x80)); - data = boxed(retvalinfo, &ctx, false); // skip the gcroot on the return path + data = maybe_decay_untracked(boxed(retvalinfo, &ctx, false)); // skip the gcroot on the return path sret = NULL; } retval = UndefValue::get(retty); @@ -6599,8 +6627,11 @@ static void init_julia_llvm_env(Module *m) dbuilder.getOrCreateArray(ArrayRef())); #endif - T_pjlvalue = T_ppint8; + T_jlvalue = StructType::create(jl_LLVMContext, "jl_value_t"); + T_pjlvalue = PointerType::get(T_jlvalue, 0); + T_prjlvalue = PointerType::get(T_jlvalue, AddressSpace::Tracked); T_ppjlvalue = PointerType::get(T_pjlvalue, 0); + T_pprjlvalue = PointerType::get(T_prjlvalue, 0); two_pvalue_llvmt.push_back(T_pjlvalue); two_pvalue_llvmt.push_back(T_pjlvalue); three_pvalue_llvmt.push_back(T_pjlvalue); @@ -6614,14 +6645,14 @@ static void init_julia_llvm_env(Module *m) jl_init_jit(T_pjlvalue); std::vector ftargs(0); - ftargs.push_back(T_pjlvalue); // linfo->sparam_vals - ftargs.push_back(T_pjlvalue); // function - ftargs.push_back(T_ppjlvalue); // args[] - ftargs.push_back(T_int32); // nargs - jl_func_sig_sparams = FunctionType::get(T_pjlvalue, ftargs, false); + ftargs.push_back(T_prjlvalue); // linfo->sparam_vals + ftargs.push_back(T_prjlvalue); // function + ftargs.push_back(T_pprjlvalue); // args[] + ftargs.push_back(T_int32); // nargs + jl_func_sig_sparams = FunctionType::get(T_prjlvalue, ftargs, false); assert(jl_func_sig_sparams != NULL); ftargs.erase(ftargs.begin()); // drop linfo->sparams_vals argument - jl_func_sig = FunctionType::get(T_pjlvalue, ftargs, false); + jl_func_sig = FunctionType::get(T_prjlvalue, ftargs, false); assert(jl_func_sig != NULL); Type *vaelts[] = {T_pint8 @@ -6722,7 +6753,7 @@ static void init_julia_llvm_env(Module *m) add_named_global(jlerror_func, &jl_error); std::vector args1_(0); - args1_.push_back(T_pjlvalue); + args1_.push_back(PointerType::get(T_jlvalue, AddressSpace::CalleeRooted)); jlthrow_func = Function::Create(FunctionType::get(T_void, args1_, false), Function::ExternalLinkage, @@ -6730,6 +6761,8 @@ static void init_julia_llvm_env(Module *m) jlthrow_func->setDoesNotReturn(); add_named_global(jlthrow_func, &jl_throw); + // Symbols are not gc-tracked, but we'll treat them as callee rooted anyway, + // because they may come from a gc-rooted location jlundefvarerror_func = Function::Create(FunctionType::get(T_void, args1_, false), Function::ExternalLinkage, @@ -6738,7 +6771,7 @@ static void init_julia_llvm_env(Module *m) add_named_global(jlundefvarerror_func, &jl_undefined_var_error); std::vector args2_boundserrorv(0); - args2_boundserrorv.push_back(T_pjlvalue); + args2_boundserrorv.push_back(PointerType::get(T_jlvalue, AddressSpace::CalleeRooted)); args2_boundserrorv.push_back(T_psize); args2_boundserrorv.push_back(T_size); jlboundserrorv_func = @@ -6749,7 +6782,7 @@ static void init_julia_llvm_env(Module *m) add_named_global(jlboundserrorv_func, &jl_bounds_error_ints); std::vector args2_boundserror(0); - args2_boundserror.push_back(T_pjlvalue); + args2_boundserror.push_back(PointerType::get(T_jlvalue, AddressSpace::CalleeRooted)); args2_boundserror.push_back(T_size); jlboundserror_func = Function::Create(FunctionType::get(T_void, args2_boundserror, false), @@ -6759,7 +6792,7 @@ static void init_julia_llvm_env(Module *m) add_named_global(jlboundserror_func, &jl_bounds_error_int); std::vector args3_vboundserror(0); - args3_vboundserror.push_back(T_ppjlvalue); + args3_vboundserror.push_back(T_pprjlvalue); args3_vboundserror.push_back(T_size); args3_vboundserror.push_back(T_size); jlvboundserror_func = @@ -6770,8 +6803,8 @@ static void init_julia_llvm_env(Module *m) add_named_global(jlvboundserror_func, &jl_bounds_error_tuple_int); std::vector args3_uboundserror(0); - args3_uboundserror.push_back(T_pint8); - args3_uboundserror.push_back(T_pjlvalue); + args3_uboundserror.push_back(PointerType::get(T_int8, AddressSpace::Derived)); + args3_uboundserror.push_back(T_prjlvalue); args3_uboundserror.push_back(T_size); jluboundserror_func = Function::Create(FunctionType::get(T_void, args3_uboundserror, false), @@ -6808,8 +6841,8 @@ static void init_julia_llvm_env(Module *m) std::vector te_args(0); te_args.push_back(T_pint8); te_args.push_back(T_pint8); - te_args.push_back(T_pjlvalue); - te_args.push_back(T_pjlvalue); + te_args.push_back(T_prjlvalue); + te_args.push_back(PointerType::get(T_jlvalue, AddressSpace::CalleeRooted)); jltypeerror_func = Function::Create(FunctionType::get(T_void, te_args, false), Function::ExternalLinkage, @@ -6819,29 +6852,32 @@ static void init_julia_llvm_env(Module *m) std::vector args_2ptrs(0); args_2ptrs.push_back(T_pjlvalue); - args_2ptrs.push_back(T_pjlvalue); + args_2ptrs.push_back(PointerType::get(T_jlvalue, AddressSpace::CalleeRooted)); jlcheckassign_func = Function::Create(FunctionType::get(T_void, args_2ptrs, false), Function::ExternalLinkage, "jl_checked_assignment", m); add_named_global(jlcheckassign_func, &jl_checked_assignment); - std::vector args_1ptr(0); - args_1ptr.push_back(T_pjlvalue); + std::vector args_1binding(0); + args_1binding.push_back(T_pjlvalue); jldeclareconst_func = - Function::Create(FunctionType::get(T_void, args_1ptr, false), + Function::Create(FunctionType::get(T_void, args_1binding, false), Function::ExternalLinkage, "jl_declare_constant", m); add_named_global(jldeclareconst_func, &jl_declare_constant); + std::vector args_2ptrs_(0); + args_2ptrs_.push_back(T_prjlvalue); + args_2ptrs_.push_back(T_prjlvalue); jlgetbindingorerror_func = - Function::Create(FunctionType::get(T_pjlvalue, args_2ptrs, false), + Function::Create(FunctionType::get(T_pjlvalue, args_2ptrs_, false), Function::ExternalLinkage, "jl_get_binding_or_error", m); add_named_global(jlgetbindingorerror_func, &jl_get_binding_or_error); jlboundp_func = - Function::Create(FunctionType::get(T_pjlvalue, args_2ptrs, false), + Function::Create(FunctionType::get(T_int32, args_2ptrs_, false), Function::ExternalLinkage, "jl_boundp", m); add_named_global(jlboundp_func, &jl_boundp); @@ -6876,11 +6912,6 @@ static void init_julia_llvm_env(Module *m) jlapply2va_func = jlcall_func_to_llvm("jl_apply_2va", &jl_apply_2va, m); - jltypeassert_func = Function::Create(FunctionType::get(T_void, two_pvalue_llvmt, false), - Function::ExternalLinkage, - "jl_typeassert", m); - add_named_global(jltypeassert_func, &jl_typeassert); - std::vector argsdepwarnpi(0); argsdepwarnpi.push_back(T_size); jldepwarnpi_func = Function::Create(FunctionType::get(T_void, argsdepwarnpi, false), @@ -6888,24 +6919,26 @@ static void init_julia_llvm_env(Module *m) "jl_depwarn_partial_indexing", m); add_named_global(jldepwarnpi_func, &jl_depwarn_partial_indexing); + std::vector args_1ptr(0); + args_1ptr.push_back(T_prjlvalue); queuerootfun = Function::Create(FunctionType::get(T_void, args_1ptr, false), Function::ExternalLinkage, "jl_gc_queue_root", m); add_named_global(queuerootfun, &jl_gc_queue_root); std::vector agargs(0); - agargs.push_back(T_ppjlvalue); + agargs.push_back(T_pprjlvalue); agargs.push_back(T_uint32); - jlapplygeneric_func = Function::Create(FunctionType::get(T_pjlvalue, agargs, false), + jlapplygeneric_func = Function::Create(FunctionType::get(T_prjlvalue, agargs, false), Function::ExternalLinkage, "jl_apply_generic", m); add_named_global(jlapplygeneric_func, &jl_apply_generic); std::vector invokeargs(0); - invokeargs.push_back(T_pjlvalue); - invokeargs.push_back(T_ppjlvalue); + invokeargs.push_back(T_prjlvalue); + invokeargs.push_back(T_pprjlvalue); invokeargs.push_back(T_uint32); - jlinvoke_func = Function::Create(FunctionType::get(T_pjlvalue, invokeargs, false), + jlinvoke_func = Function::Create(FunctionType::get(T_prjlvalue, invokeargs, false), Function::ExternalLinkage, "jl_invoke", m); add_named_global(jlinvoke_func, &jl_invoke); @@ -6915,8 +6948,8 @@ static void init_julia_llvm_env(Module *m) expect_func = Intrinsic::getDeclaration(m, Intrinsic::expect, exp_args); std::vector args_topeval(0); - args_topeval.push_back(T_pjlvalue); - args_topeval.push_back(T_pjlvalue); + args_topeval.push_back(T_prjlvalue); + args_topeval.push_back(T_prjlvalue); jltopeval_func = Function::Create(FunctionType::get(T_pjlvalue, args_topeval, false), Function::ExternalLinkage, @@ -6924,9 +6957,9 @@ static void init_julia_llvm_env(Module *m) add_named_global(jltopeval_func, &jl_toplevel_eval); std::vector args_copyast(0); - args_copyast.push_back(T_pjlvalue); + args_copyast.push_back(T_prjlvalue); jlcopyast_func = - Function::Create(FunctionType::get(T_pjlvalue, args_copyast, false), + Function::Create(FunctionType::get(T_prjlvalue, args_copyast, false), Function::ExternalLinkage, "jl_copy_ast", m); add_named_global(jlcopyast_func, &jl_copy_ast); @@ -6940,10 +6973,10 @@ static void init_julia_llvm_env(Module *m) add_named_global(jlnsvec_func, &jl_svec); std::vector mdargs(0); - mdargs.push_back(T_pjlvalue); - mdargs.push_back(T_pjlvalue); - mdargs.push_back(T_pjlvalue); - mdargs.push_back(T_pjlvalue); + mdargs.push_back(T_prjlvalue); + mdargs.push_back(T_prjlvalue); + mdargs.push_back(T_prjlvalue); + mdargs.push_back(T_prjlvalue); jlmethod_func = Function::Create(FunctionType::get(T_void, mdargs, false), Function::ExternalLinkage, @@ -6951,13 +6984,13 @@ static void init_julia_llvm_env(Module *m) add_named_global(jlmethod_func, &jl_method_def); std::vector funcdefargs(0); - funcdefargs.push_back(T_pjlvalue); - funcdefargs.push_back(T_pjlvalue); - funcdefargs.push_back(T_ppjlvalue); - funcdefargs.push_back(T_pjlvalue); + funcdefargs.push_back(T_prjlvalue); + funcdefargs.push_back(T_prjlvalue); + funcdefargs.push_back(T_pprjlvalue); + funcdefargs.push_back(T_prjlvalue); funcdefargs.push_back(T_pjlvalue); jlgenericfunction_func = - Function::Create(FunctionType::get(T_pjlvalue, funcdefargs, false), + Function::Create(FunctionType::get(T_prjlvalue, funcdefargs, false), Function::ExternalLinkage, "jl_generic_function_def", m); add_named_global(jlgenericfunction_func, &jl_generic_function_def); @@ -7012,31 +7045,39 @@ static void init_julia_llvm_env(Module *m) "jl_pop_handler", m); add_named_global(jlleave_func, &jl_pop_handler); - std::vector args_2vals(0); - args_2vals.push_back(T_pjlvalue); - args_2vals.push_back(T_pjlvalue); + std::vector args_2vals_callee_rooted(0); + args_2vals_callee_rooted.push_back(PointerType::get(T_jlvalue, AddressSpace::CalleeRooted)); + args_2vals_callee_rooted.push_back(PointerType::get(T_jlvalue, AddressSpace::CalleeRooted)); jlegal_func = - Function::Create(FunctionType::get(T_int32, args_2vals, false), + Function::Create(FunctionType::get(T_int32, args_2vals_callee_rooted, false), Function::ExternalLinkage, "jl_egal", m); add_named_global(jlegal_func, &jl_egal); + std::vector args_2vals_tracked(0); + args_2vals_tracked.push_back(T_prjlvalue); + args_2vals_tracked.push_back(T_prjlvalue); jlisa_func = - Function::Create(FunctionType::get(T_int32, args_2vals, false), + Function::Create(FunctionType::get(T_int32, args_2vals_tracked, false), Function::ExternalLinkage, "jl_isa", m); add_named_global(jlisa_func, &jl_isa); jlsubtype_func = - Function::Create(FunctionType::get(T_int32, args_2vals, false), + Function::Create(FunctionType::get(T_int32, args_2vals_tracked, false), Function::ExternalLinkage, "jl_subtype", m); add_named_global(jlsubtype_func, &jl_subtype); + jltypeassert_func = Function::Create(FunctionType::get(T_void, args_2vals_tracked, false), + Function::ExternalLinkage, + "jl_typeassert", m); + add_named_global(jltypeassert_func, &jl_typeassert); + std::vector applytype_args(0); - applytype_args.push_back(T_pjlvalue); - applytype_args.push_back(T_pjlvalue); - applytype_args.push_back(T_ppjlvalue); + applytype_args.push_back(T_prjlvalue); + applytype_args.push_back(T_prjlvalue); + applytype_args.push_back(PointerType::get(T_prjlvalue, AddressSpace::Derived)); jlapplytype_func = Function::Create(FunctionType::get(T_pjlvalue, applytype_args, false), Function::ExternalLinkage, @@ -7048,7 +7089,7 @@ static void init_julia_llvm_env(Module *m) alloc_pool_args.push_back(T_int32); alloc_pool_args.push_back(T_int32); jlalloc_pool_func = - Function::Create(FunctionType::get(T_pjlvalue, alloc_pool_args, false), + Function::Create(FunctionType::get(T_prjlvalue, alloc_pool_args, false), Function::ExternalLinkage, "jl_gc_pool_alloc", m); add_named_global(jlalloc_pool_func, &jl_gc_pool_alloc); @@ -7057,7 +7098,7 @@ static void init_julia_llvm_env(Module *m) alloc_big_args.push_back(T_pint8); alloc_big_args.push_back(T_size); jlalloc_big_func = - Function::Create(FunctionType::get(T_pjlvalue, alloc_big_args, false), + Function::Create(FunctionType::get(T_prjlvalue, alloc_big_args, false), Function::ExternalLinkage, "jl_gc_big_alloc", m); add_named_global(jlalloc_big_func, &jl_gc_big_alloc); @@ -7082,10 +7123,10 @@ static void init_julia_llvm_env(Module *m) add_named_global(jlnewbits_func, &jl_new_bits); std::vector getnthfld_args(0); - getnthfld_args.push_back(T_pjlvalue); + getnthfld_args.push_back(T_prjlvalue); getnthfld_args.push_back(T_size); jlgetnthfieldchecked_func = - Function::Create(FunctionType::get(T_pjlvalue, getnthfld_args, false), + Function::Create(FunctionType::get(T_prjlvalue, getnthfld_args, false), Function::ExternalLinkage, "jl_get_nth_field_checked", m); add_named_global(jlgetnthfieldchecked_func, *jl_get_nth_field_checked); @@ -7097,9 +7138,9 @@ static void init_julia_llvm_env(Module *m) add_named_global(diff_gc_total_bytes_func, *jl_gc_diff_total_bytes); std::vector array_owner_args(0); - array_owner_args.push_back(T_pjlvalue); + array_owner_args.push_back(T_prjlvalue); jlarray_data_owner_func = - Function::Create(FunctionType::get(T_pjlvalue, array_owner_args, false), + Function::Create(FunctionType::get(T_prjlvalue, array_owner_args, false), Function::ExternalLinkage, "jl_array_data_owner", m); jlarray_data_owner_func->setAttributes( @@ -7118,19 +7159,19 @@ static void init_julia_llvm_env(Module *m) add_named_global(jlarray_data_owner_func, jl_array_data_owner); gcroot_func = - Function::Create(FunctionType::get(T_ppjlvalue, false), + Function::Create(FunctionType::get(T_pprjlvalue, false), Function::ExternalLinkage, "julia.gc_root_decl"); add_named_global(gcroot_func, (void*)NULL, /*dllimport*/false); gckill_func = - Function::Create(FunctionType::get(T_void, ArrayRef(T_ppjlvalue), false), + Function::Create(FunctionType::get(T_void, ArrayRef(T_pprjlvalue), false), Function::ExternalLinkage, "julia.gc_root_kill"); add_named_global(gckill_func, (void*)NULL, /*dllimport*/false); jlcall_frame_func = - Function::Create(FunctionType::get(T_ppjlvalue, ArrayRef(T_int32), false), + Function::Create(FunctionType::get(T_pprjlvalue, ArrayRef(T_int32), false), Function::ExternalLinkage, "julia.jlcall_frame_decl"); add_named_global(jlcall_frame_func, (void*)NULL, /*dllimport*/false); @@ -7140,6 +7181,13 @@ static void init_julia_llvm_env(Module *m) "julia.gcroot_flush"); add_named_global(gcroot_flush_func, (void*)NULL, /*dllimport*/false); + pointer_from_objref_func = Function::Create(FunctionType::get(T_pjlvalue, + ArrayRef(PointerType::get(T_jlvalue, AddressSpace::Derived)), false), + Function::ExternalLinkage, + "julia.pointer_from_objref"); + pointer_from_objref_func->addFnAttr(Attribute::ReadNone); + add_named_global(pointer_from_objref_func, (void*)NULL, /*dllimport*/false); + except_enter_func = Function::Create(FunctionType::get(T_int32, false), Function::ExternalLinkage, "julia.except_enter"); @@ -7176,7 +7224,7 @@ static void init_julia_llvm_env(Module *m) #if JL_LLVM_VERSION < 30700 jl_globalPM->add(jl_data_layout); #endif - addOptimizationPasses(jl_globalPM); + addOptimizationPasses(jl_globalPM, jl_options.opt_level); } static inline std::string getNativeTarget() @@ -7510,6 +7558,13 @@ extern "C" void *jl_init_llvm(void) jl_ExecutionEngine->DisableLazyCompilation(); #endif +// Mark our address spaces as non-integral +#if JL_LLVM_VERSION >= 40000 + jl_data_layout = jl_ExecutionEngine->getDataLayout(); + std::string DL = jl_data_layout.getStringRepresentation() + "-ni:10:11:12"; + jl_data_layout.reset(DL); +#endif + // Now that the execution engine exists, initialize all modules jl_setup_module(engine_module); jl_setup_module(m); @@ -7564,6 +7619,14 @@ extern "C" void jl_dump_llvm_value(void *v) ((Value*)v)->dump(); #endif } +extern "C" void jl_dump_llvm_inst_function(void *v) +{ +#if JL_LLVM_VERSION >= 50000 + cast(((Value*)v))->getParent()->getParent()->print(llvm::dbgs(), nullptr, true); +#else + cast(((Value*)v))->getParent()->getParent()->dump(); +#endif +} extern "C" void jl_dump_llvm_type(void *v) { #if JL_LLVM_VERSION >= 50000 diff --git a/src/codegen_internal.h b/src/codegen_internal.h index f2175991f4138..1ee60e0611af4 100644 --- a/src/codegen_internal.h +++ b/src/codegen_internal.h @@ -1,5 +1,7 @@ // This file is a part of Julia. License is MIT: https://julialang.org/license +#include "codegen_shared.h" + #if defined(USE_ORCJIT) && JL_LLVM_VERSION <= 30800 # include void notifyObjectLoaded(RTDyldMemoryManager *memmgr, diff --git a/src/codegen_shared.h b/src/codegen_shared.h new file mode 100644 index 0000000000000..6930d007e8315 --- /dev/null +++ b/src/codegen_shared.h @@ -0,0 +1,9 @@ +enum AddressSpace { + Generic = 0, + Tracked = 10, Derived = 11, CalleeRooted = 12, + FirstSpecial = Tracked, + LastSpecial = CalleeRooted, +}; + +#define JLCALL_CC (CallingConv::ID)36 +#define JLCALL_F_CC (CallingConv::ID)37 diff --git a/src/intrinsics.cpp b/src/intrinsics.cpp index dc1f8ed89f0c0..7211038633620 100644 --- a/src/intrinsics.cpp +++ b/src/intrinsics.cpp @@ -12,22 +12,22 @@ static bool float_func[num_intrinsics]; static void jl_init_intrinsic_functions_codegen(Module *m) { std::vector args1(0); \ - args1.push_back(T_pjlvalue); \ + args1.push_back(T_prjlvalue); \ std::vector args2(0); \ - args2.push_back(T_pjlvalue); \ - args2.push_back(T_pjlvalue); \ + args2.push_back(T_prjlvalue); \ + args2.push_back(T_prjlvalue); \ std::vector args3(0); \ - args3.push_back(T_pjlvalue); \ - args3.push_back(T_pjlvalue); \ - args3.push_back(T_pjlvalue); \ + args3.push_back(T_prjlvalue); \ + args3.push_back(T_prjlvalue); \ + args3.push_back(T_prjlvalue); \ std::vector args4(0); \ - args4.push_back(T_pjlvalue); \ - args4.push_back(T_pjlvalue); \ - args4.push_back(T_pjlvalue); \ - args4.push_back(T_pjlvalue); + args4.push_back(T_prjlvalue); \ + args4.push_back(T_prjlvalue); \ + args4.push_back(T_prjlvalue); \ + args4.push_back(T_prjlvalue); #define ADD_I(name, nargs) do { \ - Function *func = Function::Create(FunctionType::get(T_pjlvalue, args##nargs, false), \ + Function *func = Function::Create(FunctionType::get(T_prjlvalue, args##nargs, false), \ Function::ExternalLinkage, "jl_"#name, m); \ runtime_func[name] = func; \ add_named_global(func, &jl_##name); \ @@ -601,7 +601,7 @@ static jl_cgval_t emit_pointerref(jl_cgval_t *argv, jl_codectx_t *ctx) if (!jl_isbits(ety)) { if (ety == (jl_value_t*)jl_any_type) { - Value *thePtr = emit_unbox(T_ppjlvalue, e, e.typ); + Value *thePtr = emit_unbox(T_pprjlvalue, e, e.typ); return mark_julia_type( builder.CreateAlignedLoad(builder.CreateGEP(thePtr, im1), align_nb), true, @@ -682,7 +682,15 @@ static jl_cgval_t emit_pointerset(jl_cgval_t *argv, jl_codectx_t *ctx) Type *ptrty = julia_type_to_llvm(e.typ, &isboxed); assert(!isboxed); thePtr = emit_unbox(ptrty, e, e.typ); - typed_store(thePtr, im1, x, ety, ctx, tbaa_data, NULL, align_nb); + if (ety == (jl_value_t*)jl_any_type) { + // unsafe_store to Ptr{Any} is allowed to implicitly drop GC roots. + Instruction *store = builder.CreateAlignedStore( + emit_pointer_from_objref(boxed(x, ctx, false)), + builder.CreateGEP(thePtr, im1), align_nb); + tbaa_decorate(tbaa_data, store); + } else { + typed_store(thePtr, im1, x, ety, ctx, tbaa_data, NULL, align_nb); + } } return mark_julia_type(thePtr, false, aty, ctx); } @@ -1212,8 +1220,8 @@ static Value *emit_untyped_intrinsic(intrinsic f, Value **argvalues, size_t narg assert(0 && "unreachable"); } -#define BOX_F(ct,jl_ct) \ - box_##ct##_func = boxfunc_llvm(ft1arg(T_pjlvalue, T_##jl_ct), \ +#define BOX_F(ct,jl_ct) \ + box_##ct##_func = boxfunc_llvm(ft1arg(T_prjlvalue, T_##jl_ct), \ "jl_box_"#ct, &jl_box_##ct, m); #define SBOX_F(ct,jl_ct) BOX_F(ct,jl_ct); box_##ct##_func->addAttribute(1, Attribute::SExt); diff --git a/src/jitlayers.cpp b/src/jitlayers.cpp index 08d47d66a4156..68d29c5f75dc2 100644 --- a/src/jitlayers.cpp +++ b/src/jitlayers.cpp @@ -55,6 +55,8 @@ namespace llvm { #include #endif +#include +#include #include #include @@ -109,14 +111,13 @@ void jl_init_jit(Type *T_pjlvalue_) // this defines the set of optimization passes defined for Julia at various optimization levels #if JL_LLVM_VERSION >= 30700 -void addOptimizationPasses(legacy::PassManager *PM) +void addOptimizationPasses(legacy::PassManagerBase *PM, int opt_level) #else -void addOptimizationPasses(PassManager *PM) +void addOptimizationPasses(PassManager *PM, int opt_level) #endif { - PM->add(createLowerExcHandlersPass()); - PM->add(createLowerGCFramePass()); #ifdef JL_DEBUG_BUILD + PM->add(createGCInvariantVerifierPass(true)); PM->add(createVerifierPass()); #endif @@ -130,17 +131,32 @@ void addOptimizationPasses(PassManager *PM) #if defined(JL_MSAN_ENABLED) PM->add(llvm::createMemorySanitizerPass(true)); #endif - if (jl_options.opt_level == 0) { + if (opt_level == 0) { PM->add(createCFGSimplificationPass()); // Clean up disgusting code - PM->add(createMemCpyOptPass()); // Remove memcpy / form memset +#if JL_LLVM_VERSION < 50000 + PM->add(createBarrierNoopPass()); + PM->add(createLowerExcHandlersPass()); + PM->add(createGCInvariantVerifierPass(false)); + PM->add(createLateLowerGCFramePass()); PM->add(createLowerPTLSPass(imaging_mode)); + PM->add(createBarrierNoopPass()); +#endif + PM->add(createMemCpyOptPass()); // Remove memcpy / form memset #if JL_LLVM_VERSION >= 40000 PM->add(createAlwaysInlinerLegacyPass()); // Respect always_inline #else PM->add(createAlwaysInlinerPass()); // Respect always_inline +#endif +#if JL_LLVM_VERSION >= 50000 + PM->add(createBarrierNoopPass()); + PM->add(createLowerExcHandlersPass()); + PM->add(createGCInvariantVerifierPass(false)); + PM->add(createLateLowerGCFramePass()); + PM->add(createLowerPTLSPass(imaging_mode)); #endif return; } + PM->add(createPropagateJuliaAddrspaces()); #if JL_LLVM_VERSION >= 30700 PM->add(createTargetTransformInfoWrapperPass(jl_TargetMachine->getTargetIRAnalysis())); #else @@ -160,7 +176,18 @@ void addOptimizationPasses(PassManager *PM) } // list of passes from vmkit PM->add(createCFGSimplificationPass()); // Clean up disgusting code + PM->add(createDeadInstEliminationPass()); PM->add(createPromoteMemoryToRegisterPass()); // Kill useless allocas + + // Due to bugs and missing features LLVM < 5.0, does not properly propagate + // our invariants. We need to do GC rooting here. This reduces the + // effectiveness of the optimization, but should retain correctness. +#if JL_LLVM_VERSION < 50000 + PM->add(createLowerExcHandlersPass()); + PM->add(createLateLowerGCFramePass()); + PM->add(createLowerPTLSPass(imaging_mode)); +#endif + PM->add(createMemCpyOptPass()); // hopefully these functions (from llvmcall) don't try to interact with the Julia runtime @@ -176,7 +203,6 @@ void addOptimizationPasses(PassManager *PM) #endif // Let the InstCombine pass remove the unnecessary load of // safepoint address first - PM->add(createLowerPTLSPass(imaging_mode)); PM->add(createSROAPass()); // Break up aggregate allocas #ifndef INSTCOMBINE_BUG PM->add(createInstructionCombiningPass()); // Cleanup for scalarrepl. @@ -265,6 +291,16 @@ void addOptimizationPasses(PassManager *PM) PM->add(createInstructionCombiningPass()); // Clean up after SLP loop vectorizer PM->add(createLoopVectorizePass()); // Vectorize loops PM->add(createInstructionCombiningPass()); // Clean up after loop vectorizer +#endif + // LowerPTLS removes an indirect call. As a result, it is likely to trigger + // LLVM's devirtualization heuristics, which would result in the entire + // pass pipeline being re-exectuted. Prevent this by inserting a barrier. +#if JL_LLVM_VERSION >= 50000 + PM->add(createBarrierNoopPass()); + PM->add(createLowerExcHandlersPass()); + PM->add(createGCInvariantVerifierPass(false)); + PM->add(createLateLowerGCFramePass()); + PM->add(createLowerPTLSPass(imaging_mode)); #endif } @@ -490,14 +526,7 @@ JuliaOJIT::JuliaOJIT(TargetMachine &TM) } ) { - if (!jl_generating_output()) { - addOptimizationPasses(&PM); - } - else { - PM.add(createLowerExcHandlersPass()); - PM.add(createLowerGCFramePass()); - PM.add(createLowerPTLSPass(imaging_mode)); - } + addOptimizationPasses(&PM, jl_generating_output() ? 0 : jl_options.opt_level); if (TM.addPassesToEmitMC(PM, Ctx, ObjStream)) llvm_unreachable("Target does not support MC emission."); @@ -1278,7 +1307,7 @@ void jl_dump_native(const char *bc_fname, const char *unopt_bc_fname, const char } if (bc_fname || obj_fname) - addOptimizationPasses(&PM); + addOptimizationPasses(&PM, jl_options.opt_level); if (bc_fname) { #if JL_LLVM_VERSION >= 30500 @@ -1339,7 +1368,11 @@ void jl_dump_native(const char *bc_fname, const char *unopt_bc_fname, const char #if JL_LLVM_VERSION >= 30700 // Reset the target triple to make sure it matches the new target machine clone->setTargetTriple(TM->getTargetTriple().str()); -#if JL_LLVM_VERSION >= 30800 +#if JL_LLVM_VERSION >= 40000 + DataLayout DL = TM->createDataLayout(); + DL.reset(DL.getStringRepresentation() + "-ni:10:11:12"); + clone->setDataLayout(DL); +#elif JL_LLVM_VERSION >= 30800 clone->setDataLayout(TM->createDataLayout()); #else clone->setDataLayout(TM->getDataLayout()->getStringRepresentation()); @@ -1401,20 +1434,24 @@ GlobalVariable *jl_get_global_for(const char *cname, void *addr, Module *M) // An LLVM module pass that just runs all julia passes in order. Useful for // debugging extern "C" void jl_init_codegen(void); -class JuliaPipeline : public ModulePass { +class JuliaPipeline : public Pass { public: static char ID; - JuliaPipeline() : ModulePass(ID) {} - virtual bool runOnModule(Module &M) { + // A bit of a hack, but works + struct TPMAdapter : public PassManagerBase { + PMTopLevelManager *TPM; + TPMAdapter(PMTopLevelManager *TPM) : TPM(TPM) {} + void add(Pass *P) { TPM->schedulePass(P); } + }; + void preparePassManager(PMStack &Stack) override { (void)jl_init_llvm(); -#if JL_LLVM_VERSION >= 30700 - legacy::PassManager PM; -#else - PassManager PM; -#endif - addOptimizationPasses(&PM); - PM.run(M); - return true; + PMTopLevelManager *TPM = Stack.top()->getTopLevelManager(); + TPMAdapter Adapter(TPM); + addOptimizationPasses(&Adapter, 3); + } + JuliaPipeline() : Pass(PT_PassManager, ID) {} + Pass *createPrinterPass(raw_ostream &O, const std::string &Banner) const { + return createPrintModulePass(O, Banner); } }; char JuliaPipeline::ID = 0; diff --git a/src/jitlayers.h b/src/jitlayers.h index 972867f0ebda6..78f4147aff6a4 100644 --- a/src/jitlayers.h +++ b/src/jitlayers.h @@ -61,9 +61,9 @@ extern size_t jltls_offset_idx; typedef struct {Value *gv; int32_t index;} jl_value_llvm; // uses 1-based indexing #if JL_LLVM_VERSION >= 30700 -void addOptimizationPasses(legacy::PassManager *PM); +void addOptimizationPasses(legacy::PassManagerBase *PM, int opt_level); #else -void addOptimizationPasses(PassManager *PM); +void addOptimizationPasses(PassManager *PM, int opt_level); #endif void* jl_emit_and_add_to_shadow(GlobalVariable *gv, void *gvarinit = NULL); GlobalVariable *jl_emit_sysimg_slot(Module *m, Type *typ, const char *name, @@ -247,8 +247,10 @@ JL_DLLEXPORT extern LLVMContext &jl_LLVMContext; #endif Pass *createLowerPTLSPass(bool imaging_mode); -Pass *createLowerGCFramePass(); +Pass *createLateLowerGCFramePass(); Pass *createLowerExcHandlersPass(); +Pass *createGCInvariantVerifierPass(bool Strong); +Pass *createPropagateJuliaAddrspaces(); // Whether the Function is an llvm or julia intrinsic. static inline bool isIntrinsicFunction(Function *F) { diff --git a/src/julia.h b/src/julia.h index 183cfef8ab532..dca92fca16061 100644 --- a/src/julia.h +++ b/src/julia.h @@ -42,13 +42,17 @@ #if defined(__GNUC__) # define JL_NORETURN __attribute__ ((noreturn)) # define JL_CONST_FUNC __attribute__((const)) +# define JL_USED_FUNC __attribute__((used)) #elif defined(_COMPILER_MICROSOFT_) # define JL_NORETURN __declspec(noreturn) // This is the closest I can find for __attribute__((const)) # define JL_CONST_FUNC __declspec(noalias) +// Does MSVC have this? +# define JL_USED_FUNC #else # define JL_NORETURN # define JL_CONST_FUNC +# define JL_USED_FUNC #endif #define container_of(ptr, type, member) \ diff --git a/src/llvm-gc-invariant-verifier.cpp b/src/llvm-gc-invariant-verifier.cpp new file mode 100644 index 0000000000000..629152ee62ef3 --- /dev/null +++ b/src/llvm-gc-invariant-verifier.cpp @@ -0,0 +1,183 @@ +// This file is a part of Julia. License is MIT: https://julialang.org/license +// This LLVM pass verifier invariants required for correct GC root placement. +// See the devdocs for a description of these invariants. + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "llvm-version.h" +#include "codegen_shared.h" +#include "julia.h" + +#define DEBUG_TYPE "verify_gc_invariants" +#undef DEBUG + +using namespace llvm; + +struct GCInvariantVerifier : public FunctionPass, public InstVisitor { + static char ID; + bool Broken = false; + bool Strong; + GCInvariantVerifier(bool Strong = false) : FunctionPass(ID), Strong(Strong) {} + +private: + void Check(bool Cond, const char *message, Value *Val) { + if (!Cond) { + dbgs() << message << "\n\t" << *Val << "\n"; + Broken = true; + } + } + +public: + void getAnalysisUsage(AnalysisUsage &AU) const override { + FunctionPass::getAnalysisUsage(AU); + AU.setPreservesAll(); + } + + bool runOnFunction(Function &F) override; + void visitAddrSpaceCastInst(AddrSpaceCastInst &I); + void visitStoreInst(StoreInst &SI); + void visitLoadInst(LoadInst &LI); + void visitReturnInst(ReturnInst &RI); + void visitGetElementPtrInst(GetElementPtrInst &GEP); + void visitIntToPtrInst(IntToPtrInst &IPI); + void visitPtrToIntInst(PtrToIntInst &PII); + void visitCallInst(CallInst &CI); +}; + +void GCInvariantVerifier::visitAddrSpaceCastInst(AddrSpaceCastInst &I) { + unsigned FromAS = cast(I.getSrcTy())->getAddressSpace(); + unsigned ToAS = cast(I.getDestTy())->getAddressSpace(); + if (FromAS == 0) + return; + Check(FromAS != AddressSpace::Tracked || + ToAS == AddressSpace::CalleeRooted || + ToAS == AddressSpace::Derived, + "Illegal address space cast from tracked ptr", &I); + Check(FromAS != AddressSpace::CalleeRooted && + FromAS != AddressSpace::Derived, + "Illegal address space cast from decayed ptr", &I); +} + +void GCInvariantVerifier::visitStoreInst(StoreInst &SI) { + Type *VTy = SI.getValueOperand()->getType(); + if (VTy->isPointerTy()) { + /* We currently don't obey this for arguments. That's ok - they're + externally rooted. */ + if (!isa(SI.getValueOperand())) { + unsigned AS = cast(VTy)->getAddressSpace(); + Check(AS != AddressSpace::CalleeRooted && + AS != AddressSpace::Derived, + "Illegal store of decayed value", &SI); + } + } + VTy = SI.getPointerOperand()->getType(); + if (VTy->isPointerTy()) { + unsigned AS = cast(VTy)->getAddressSpace(); + Check(AS != AddressSpace::CalleeRooted, + "Illegal store to callee rooted value", &SI); + } +} + +void GCInvariantVerifier::visitLoadInst(LoadInst &LI) { + Type *Ty = LI.getType(); + if (Ty->isPointerTy()) { + unsigned AS = cast(Ty)->getAddressSpace(); + Check(AS != AddressSpace::CalleeRooted && + AS != AddressSpace::Derived, + "Illegal load of gc relevant value", &LI); + } + Ty = LI.getPointerOperand()->getType(); + if (Ty->isPointerTy()) { + unsigned AS = cast(Ty)->getAddressSpace(); + Check(AS != AddressSpace::CalleeRooted, + "Illegal store of callee rooted value", &LI); + } +} + +static bool isSpecialAS(unsigned AS) { + return AddressSpace::FirstSpecial <= AS && AS <= AddressSpace::LastSpecial; +} + +void GCInvariantVerifier::visitReturnInst(ReturnInst &RI) { + if (!RI.getReturnValue()) + return; + Type *RTy = RI.getReturnValue()->getType(); + if (!RTy->isPointerTy()) + return; + unsigned AS = cast(RTy)->getAddressSpace(); + Check(!isSpecialAS(AS) || AS == AddressSpace::Tracked, + "Only gc tracked values may be directly returned", &RI); +} + +void GCInvariantVerifier::visitGetElementPtrInst(GetElementPtrInst &GEP) { + Type *Ty = GEP.getType(); + if (!Ty->isPointerTy()) + return; + unsigned AS = cast(Ty)->getAddressSpace(); + if (!isSpecialAS(AS)) + return; + /* We're actually ok with GEPs here, as long as they don't feed into any + uses. Upstream is currently still debating whether CAST(GEP) == GEP(CAST), + so though we enforce casting to they decayed as first in the frontend, + the optimizer will introduce the other form. While upstream is debating + what the semantics here are, just check this for things coming from + the frontend */ + if (Strong) { + Check(AS != AddressSpace::Tracked, + "GC tracked values may not appear in GEP expressions." + " You may have to decay the value first", &GEP); + } +} + +void GCInvariantVerifier::visitCallInst(CallInst &CI) { + CallingConv::ID CC = CI.getCallingConv(); + if (CC == JLCALL_CC || CC == JLCALL_F_CC) { + for (Value *Arg : CI.arg_operands()) { + Type *Ty = Arg->getType(); + Check(Ty->isPointerTy() && cast(Ty)->getAddressSpace() == AddressSpace::Tracked, + "Invalid derived pointer in jlcall", &CI); + } + } +} + +/* These next two are caught by the regular verifier on LLVM 5.0+, but we + may want to run this on earlier LLVM versions. */ +void GCInvariantVerifier::visitIntToPtrInst(IntToPtrInst &IPI) { + Check(!isSpecialAS(IPI.getAddressSpace()), + "Illegal inttoptr", &IPI); +} + +void GCInvariantVerifier::visitPtrToIntInst(PtrToIntInst &PII) { + Check(!isSpecialAS(PII.getPointerAddressSpace()), + "Illegal inttoptr", &PII); +} + +bool GCInvariantVerifier::runOnFunction(Function &F) { + visit(F); + if (Broken) { + abort(); + } + return false; +} + +char GCInvariantVerifier::ID = 0; +static RegisterPass X("GCInvariantVerifier", "GC Invariant Verification Pass", false, false); + +Pass *createGCInvariantVerifierPass(bool Strong) { + return new GCInvariantVerifier(Strong); +} diff --git a/src/llvm-gcroot.cpp b/src/llvm-gcroot.cpp deleted file mode 100644 index 5f0af51c20f1a..0000000000000 --- a/src/llvm-gcroot.cpp +++ /dev/null @@ -1,1071 +0,0 @@ -// This file is a part of Julia. License is MIT: https://julialang.org/license - -#define DEBUG_TYPE "lower_gcroot" -#undef DEBUG - -#include "llvm-version.h" -#include -#include -#include -#include -#include -#include -#include -#if JL_LLVM_VERSION >= 30700 -#include -#else -#include -#endif -#include -#include -#if JL_LLVM_VERSION >= 30600 -#include -#include -#endif -#include - -#include "fix_llvm_assert.h" - -#include -#include -#include -#include -#include - -#include "julia.h" - -#if JL_LLVM_VERSION >= 30700 -#define LLVM37_param(x) (x), -#else -#define LLVM37_param(x) -#endif - -using namespace llvm; - -extern std::pair tbaa_make_child(const char *name, MDNode *parent=nullptr, bool isConstant=false); - -namespace { - -#ifndef JL_NDEBUG -static struct { - unsigned count; - unsigned locals; - unsigned temp; -} jl_gc_frame_stats = {0}; -#endif - -typedef std::pair frame_register; -struct liveness { - typedef unsigned id; - enum { - // an assignment to a gcroot exists in the basic-block - // (potentially no live-in from the predecessor basic-blocks) - assign = 1<<0, - // a use of a gcroot exists in the basic-block - // (potentially a "kill" and no live-out to the successor basic-blocks) - kill = 1<<1, - // the gcroot is live over the entire basic-block - // (the assign/kill are not dominating of entry/exit) - live = 1<<2 - - // live | kill | assign: - // a usage and assignment exist, but it is also live on exit, - // the entry liveness depends on whether a store or use is - // encountered first - // live | kill: - // a usage exists, - // but the value must be live for the entire basic-block - // since it is not the terminal usage in the domination tree - // kill | assign: - // a usage and definition exist in domination order, - // so the actual lifetime is only a subset of the basic-block - // live | assign: - // impossible (this would be strange) - }; -}; - -static void tbaa_decorate_gcframe(Instruction *inst, - std::set &visited, - MDNode *tbaa_gcframe) -{ - if (!visited.insert(inst).second) - return; -#if JL_LLVM_VERSION >= 30500 - Value::user_iterator I = inst->user_begin(), E = inst->user_end(); -#else - Value::use_iterator I = inst->use_begin(), E = inst->use_end(); -#endif - for (;I != E;++I) { - Instruction *user = dyn_cast(*I); - if (!user) { - continue; - } - else if (isa(user)) { - if (__likely(user->getOperand(0) == inst)) { - tbaa_decorate_gcframe(user, visited, tbaa_gcframe); - } - } - else if (isa(user)) { - if (user->getOperand(1) == inst) { - user->setMetadata(llvm::LLVMContext::MD_tbaa, tbaa_gcframe); - } - } - else if (isa(user)) { - user->setMetadata(llvm::LLVMContext::MD_tbaa, tbaa_gcframe); - } - else if (isa(user)) { - tbaa_decorate_gcframe(user, visited, tbaa_gcframe); - } - } -} - -static void tbaa_decorate_gcframe(Instruction *inst, MDNode *tbaa_gcframe) -{ - std::set visited; - tbaa_decorate_gcframe(inst, visited, tbaa_gcframe); -} - -class JuliaGCAllocator { -public: - JuliaGCAllocator(Function &F, CallInst *ptlsStates, - Type *T_pjlvalue, MDNode *tbaa) : - F(F), - M(*F.getParent()), - T_int1(Type::getInt1Ty(F.getContext())), - T_int8(Type::getInt8Ty(F.getContext())), - T_int32(Type::getInt32Ty(F.getContext())), - T_int64(Type::getInt64Ty(F.getContext())), - V_null(T_pjlvalue ? Constant::getNullValue(T_pjlvalue) : nullptr), - ptlsStates(ptlsStates), -#if JL_LLVM_VERSION >= 50000 - gcframe(ptlsStates ? new AllocaInst(T_pjlvalue, 0, ConstantInt::get(T_int32, 0)) : nullptr), -#else - gcframe(ptlsStates ? new AllocaInst(T_pjlvalue, ConstantInt::get(T_int32, 0)) : nullptr), -#endif - gcroot_func(M.getFunction("julia.gc_root_decl")), - gckill_func(M.getFunction("julia.gc_root_kill")), - jlcall_frame_func(M.getFunction("julia.jlcall_frame_decl")), - gcroot_flush_func(M.getFunction("julia.gcroot_flush")), - except_enter_func(M.getFunction("julia.except_enter")), - jlleave_func(M.getFunction("jl_pop_handler")), - tbaa_gcframe(tbaa) - { -/* Algorithm sketch: - * Compute liveness for each basic block - * liveness computed at the basic-block level for pairs - * Propagate liveness from each basic block to its predecessors - * Allocate argument slot for each jlcall frame - */ - if (gcframe) { -#ifdef JL_DEBUG_BUILD - gcframe->setName("gcrootframe"); -#endif - gcframe->insertAfter(ptlsStates); - } - } - -private: - Function &F; - Module &M; - Type *const T_int1; - Type *const T_int8; - Type *const T_int32; - Type *const T_int64; - Value *const V_null; - CallInst *const ptlsStates; - AllocaInst *const gcframe; - Function *const gcroot_func; - Function *const gckill_func; - Function *const jlcall_frame_func; - Function *const gcroot_flush_func; - Function *const except_enter_func; - Function *const jlleave_func; - MDNode *const tbaa_gcframe; - - Instruction *get_pgcstack(Instruction *ptlsStates); - frame_register get_gcroot(Value *ptr); - void collapseRedundantRoots(); - bool record_usage(CallInst *callInst, - std::map > &bb_uses, - std::map ®s_used, - unsigned &offset, bool commit=true); - unsigned find_space_for(CallInst *callInst, - std::map > &bb_uses, - std::map ®s_used); - void rearrangeRoots(); -public: - void allocate_frame(); -}; - -struct HandlerData { - // Pairs of , number of pops left after popping - // this frame. - std::vector> leaves; - // enters that are directly nested in this frame - std::set nested; - std::unique_ptr> parent_vec; - CallInst *parent{nullptr}; - bool processed{false}; -#ifdef _COMPILER_MICROSOFT_ - // MSVC 2013 seems to call the copy version instead of the move verion - // without this, which triggers compilation error since `std::unique_ptr` - // is not copyable. - HandlerData() = default; - HandlerData(HandlerData &&other) - : HandlerData() - { - operator=(std::move(other)); - } - HandlerData(const HandlerData&) = delete; - HandlerData &operator=(HandlerData &&other) - { - std::swap(leaves, other.leaves); - std::swap(nested, other.nested); - std::swap(parent_vec, other.parent_vec); - std::swap(parent, other.parent); - std::swap(processed, other.processed); - return *this; - } - HandlerData &operator=(const HandlerData&) = delete; -#endif -}; - - -Instruction *JuliaGCAllocator::get_pgcstack(Instruction *ptlsStates) -{ - Constant *offset = ConstantInt::getSigned(T_int32, offsetof(jl_tls_states_t, pgcstack) / sizeof(void*)); - return GetElementPtrInst::Create(LLVM37_param(NULL) - ptlsStates, - ArrayRef(offset), - "jl_pgcstack"); -} - -frame_register JuliaGCAllocator::get_gcroot(Value *ptr) -{ - frame_register frame; - frame.first = dyn_cast(ptr); - frame.second = 0; - if (frame.first == NULL) { - // also try to look through GEP for jlcall_frame_func - if (GetElementPtrInst *gepInst = dyn_cast(ptr)) { - if (gepInst->getNumIndices() == 1) { - frame.first = dyn_cast(gepInst->getPointerOperand()); - if (frame.first && frame.first->getCalledValue() == jlcall_frame_func) - frame.second = cast(gepInst->idx_begin()->get())->getZExtValue(); - else - frame.first = NULL; - } - } - } - return frame; -} - -void JuliaGCAllocator::collapseRedundantRoots() -{ - for (BasicBlock::iterator I = gcframe->getParent()->begin(), E(gcframe); I != E; ) { - CallInst* callInst = dyn_cast(&*I); - ++I; - if (callInst && callInst->getCalledValue() == gcroot_func) { - // see if a root is only used briefly for `store -> load -> store other` pattern or `store, store other` - // such that the first store can be trivially replaced with just "other" and delete the chain - // or if is used for store, but the value is never needed - StoreInst *theStore = NULL; - unsigned n_stores = 0; - bool variable_slot = true; // whether this gc-root is only used as a variable-slot; e.g. whether theLoad is theValue - LoadInst *theLoad = NULL; - for (User::use_iterator use = callInst->use_begin(), usee = callInst->use_end(); use != usee; ) { -#if JL_LLVM_VERSION >= 30500 - User *user = use->getUser(); -#else - User *user = use.getUse().getUser(); -#endif - ++use; - if (StoreInst *storeInst = dyn_cast(user)) { - if (n_stores == 0) - theStore = storeInst; - else - theStore = NULL; - Value *theValue = storeInst->getValueOperand(); - if (!theValue->hasOneUse()) { // not just the store - variable_slot = false; // this gc-root is used as more than just a variable-slot (aka phi node) - } - n_stores++; - } - else if (LoadInst *loadInst = dyn_cast(user)) { - if (loadInst->use_empty()) { - // dead load? - loadInst->eraseFromParent(); - } - else { - if (theLoad) { - // multiple live loads, this is hard to optimize, so skip it - n_stores = 0; - break; - } - theLoad = loadInst; - } - } - else { - // what is this? oh, well. skip trying to optimize this gc-root - n_stores = 0; - break; - } - } - - if (n_stores == 0) - continue; - - if (theLoad == NULL) { - // this gc-root is never loaded from, so we don't need it as a variable location - // delete any stores to this gc-root that would be keeping an otherwise-unused value alive - for (User::use_iterator use = callInst->use_begin(), usee = callInst->use_end(); use != usee; ) { -#if JL_LLVM_VERSION >= 30500 - User *user = use->getUser(); -#else - User *user = use.getUse().getUser(); -#endif - StoreInst *theStore = cast(user); - ++use; - Value *theValue = theStore->getValueOperand(); - if (theValue->hasOneUse()) { // just the store - if (&*I == theStore) ++I; - theStore->eraseFromParent(); - } - } - if (callInst->use_empty()) { - callInst->eraseFromParent(); - continue; - } - else if (callInst->hasOneUse()) { - User::use_iterator use = callInst->use_begin(); -#if JL_LLVM_VERSION >= 30500 - theStore = cast(use->getUser()); -#else - theStore = cast(use.getUse().getUser()); -#endif - } - } - - if ((theLoad != NULL && variable_slot) || - (theLoad == NULL && theStore != NULL)) { - Value *theValue = theLoad ? theLoad : theStore->getValueOperand(); - if (theValue->hasNUses(theLoad ? 1 : 2)) { // only uses are theStore and theLoad and theOther - // check if this value is only used for a store to another gcroot - User::use_iterator value_use = theValue->use_begin(); - if (theLoad && *value_use == theStore) - ++value_use; -#if JL_LLVM_VERSION >= 30500 - StoreInst *theOther = dyn_cast(value_use->getUser()); - unsigned OperandNo = value_use->getOperandNo(); -#else - StoreInst *theOther = dyn_cast(value_use.getUse().getUser()); - unsigned OperandNo = value_use.getOperandNo(); -#endif - if (theOther && OperandNo != StoreInst::getPointerOperandIndex()) { - // test whether this store is valid as a gc-root - bool patternMatchSuccess = false; - frame_register gcroot_other_gep = get_gcroot(theOther->getPointerOperand()); - CallInst *gcroot_other = gcroot_other_gep.first; - // it could be a gcroot... - if (gcroot_other && gcroot_other->getCalledValue() == gcroot_func && theStore != NULL) { - // need to make sure there aren't any other uses of gcroot_other (including gckill) - // between the initial store and the replacement store - // TODO: do this better once we have liveness information for locals? - BasicBlock *current = theStore->getParent(); - BasicBlock::iterator bbi(theStore); - BasicBlock::iterator bbi_end = current->end(); - patternMatchSuccess = true; - ++bbi; - while (patternMatchSuccess) { - Instruction *inst = &*bbi; - if (inst == theOther) { - break; // success - } - for (Instruction::op_iterator op = inst->op_begin(), op_e = inst->op_end(); op != op_e; ++op) { - if (op->get() == gcroot_other) { - patternMatchSuccess = false; - break; // fail: gcroot_other had another reference, can't make this replacement - } - } - if (++bbi == bbi_end) { - // iterate the basicblock forward, if it's a simple branch -#if JL_LLVM_VERSION >= 30500 - BasicBlock *next = current->getUniqueSuccessor(); -#else - succ_iterator SI = succ_begin(current), E = succ_end(current); - BasicBlock *next = NULL; - if (SI != E) { - next = *SI; - for (++SI; SI != E; ++SI) { - if (*SI != next) { - next = NULL; - break; - } - } - } -#endif - if (next) { - bbi = next->begin(); - bbi_end = next->end(); - current = next; - } - else { - patternMatchSuccess = false; - } - } - } - } - // ...or it could be a jlcall frame - else if (gcroot_other && gcroot_other->getCalledValue() == jlcall_frame_func) { - // jlcall_frame_func slots are effectively SSA, - // so it's always safe to merge an earlier store into it - // but do need to update liveness information for this slot - // TODO: do this better once we have liveness information for locals? - if (theStore != NULL && theOther->getParent() == theStore->getParent()) { - //unsigned arg_offset = gcroot_other_gep->second; - //frame_register def(gcroot_other, arg_offset); - //std::map &inuse_list = bb_uses[theOther->getParent()]; - //std::map::iterator inuse_reg = inuse_list.find(def); - patternMatchSuccess = true; - } - } - if (patternMatchSuccess) { - // do the gcroot merge -- replace gcroot with gcroot_other in all the store operations for this gcroot - // so that theOther, theLoad, and this gcroot are no longer needed - Value *gcroot_other = theOther->getPointerOperand(); - if (&*I == theOther) ++I; - theOther->eraseFromParent(); - if (theLoad) { - if (&*I == theLoad) ++I; - theLoad->eraseFromParent(); - } - if (theStore) { - theStore->setOperand(StoreInst::getPointerOperandIndex(), gcroot_other); - } - else { - for (User::use_iterator use = callInst->use_begin(), usee = callInst->use_end(); use != usee; ) { -#if JL_LLVM_VERSION >= 30500 - User *user = use->getUser(); -#else - User *user = use.getUse().getUser(); -#endif - ++use; - StoreInst *theStore = cast(user); - theStore->setOperand(StoreInst::getPointerOperandIndex(), gcroot_other); - } - } - callInst->eraseFromParent(); - } - } - } - } - } - } -} - -bool JuliaGCAllocator::record_usage(CallInst *callInst, - std::map > &bb_uses, - std::map ®s_used, - unsigned &offset, bool commit) -{ -/* record-usage(inst, bb-uses, regs-used, offset, commit=true) - * for (arg-offset, operand) in enumerate(arguments(inst)) - * reg = - * for (bb, liveness) in bb-uses - * if not reg in liveness - * continue - * # TODO: optimize better if liveness[reg] doesn't contain live - * conflict = regs-used[bb][offset + arg-offset] - * if commit - * assert !conflict - * regs-used[bb][offset + arg-offset] = true - * else if conflict - * return false - * return true - */ - unsigned arg_n = cast(callInst->getArgOperand(0))->getZExtValue(); -#if 0 // suboptimal allocator that ignores computed liveness data - { - SmallBitVector ®s = regs_used[&callInst->getParent()->getParent()->getEntryBlock()]; - if (offset + arg_n > regs.size()) - regs.resize(offset + arg_n); - for (unsigned arg_offset = 0; arg_offset < arg_n; ++arg_offset) { - frame_register def(callInst, arg_offset); -#else // }} better allocator that uses per-basicblock liveness - for (std::map >::iterator - live_reg = bb_uses.begin(), e = bb_uses.end(); live_reg != e; ++live_reg) { - BasicBlock *bb = live_reg->first; - SmallBitVector ®s = regs_used[bb]; - if (offset + arg_n > regs.size()) - regs.resize(offset + arg_n); - for (unsigned arg_offset = 0; arg_offset < arg_n; ++arg_offset) { - frame_register def(callInst, arg_offset); - std::map::iterator inuse_reg = live_reg->second.find(def); - if (inuse_reg == live_reg->second.end()) - continue; - // TODO: optimize here better when not live in inuse_reg->second, by ascertaining liveness at the instruction level for this bb -#endif - unsigned index = offset + arg_offset; - bool conflict = regs.test(index); - if (commit) { - assert(!conflict); - regs.set(index); - } - else if (conflict) { - // update the offset argument to point to the next open register beyond index - // to help avoid unnecessary work and accelerate the search - ++offset; - while (offset + arg_offset < regs.size() && regs.test(offset + arg_offset)) - ++offset; - return false; - } - } - } - return true; -} - -unsigned JuliaGCAllocator::find_space_for(CallInst *callInst, - std::map > &bb_uses, - std::map ®s_used) -{ -/* find-space-for(inst, bb-uses, regs-used) - * n = 0 - * while !record-usage(inst, bb-uses, regs-used, n, false) - * n++ - * return n - * - */ - unsigned n = 0; - while (!record_usage(callInst, bb_uses, regs_used, n, false)) { } - return n; -} - -void JuliaGCAllocator::rearrangeRoots() -{ - for (auto BB = F.begin(), E(F.end()); BB != E; BB++) { - auto terminst = BB->getTerminator(); - if (!isa(terminst) && !isa(terminst)) - continue; - SmallVector toRemove; - for (auto I = BB->rbegin(), E(BB->rend()); I != E; ++I) { - // Only handle the simplest case for now, give up if there's a call - // or load from the GC frame. - // (Assume we don't have loads that can alias GC frame - // unless the source address is a `julia.gc_root_decl`) - Instruction *inst = &*I; - if (isa(inst)) - break; - if (LoadInst *loadInst = dyn_cast(inst)) { - CallInst *loadAddr = - dyn_cast(loadInst->getPointerOperand()); - if (loadAddr && loadAddr->getCalledValue() == gcroot_func) - break; - continue; - } - if (StoreInst *storeInst = dyn_cast(inst)) { - CallInst *storeAddr = - dyn_cast(storeInst->getPointerOperand()); - if (storeAddr && storeAddr->getCalledValue() == gcroot_func) - toRemove.push_back(storeInst); - continue; - } - } - for (auto inst: toRemove) { - CallInst *decl = cast(inst->getPointerOperand()); - inst->eraseFromParent(); - // TODO removing unused slot should probably be handled later - // when we allocate the frame - if (decl->use_empty()) { - decl->eraseFromParent(); - } - } - } -} - -void JuliaGCAllocator::allocate_frame() -{ - if (!ptlsStates) - return; - Instruction *last_gcframe_inst = gcframe; - collapseRedundantRoots(); - rearrangeRoots(); - -/* # initialize the kill BasicBlock of all jlcall-frames - * bb-uses : map, assign|live|kill > > - * for inst in gc-frame(f) - * if inst match "a call to make-jlcall-frame" - * kill-use = get-unique-use(inst) - * bb-uses[bb][] = kill - */ - std::map > bb_uses; - std::priority_queue< std::pair > frames; - for (BasicBlock::iterator I = gcframe->getParent()->begin(), E(gcframe); I != E; ) { - CallInst* callInst = dyn_cast(&*I); - ++I; - if (callInst && callInst->getCalledValue() == jlcall_frame_func) { - BasicBlock *bb = NULL; - unsigned arg_n = cast(callInst->getArgOperand(0))->getZExtValue(); - frames.push(std::make_pair(arg_n, callInst)); - // the jlcall frame should have been passed to exactly one call (the jlcall) -- find its basic-block - for (User::use_iterator use = callInst->use_begin(), usee = callInst->use_end(); use != usee; ++use) { -#if JL_LLVM_VERSION >= 30500 - User *user = use->getUser(); -#else - User *user = use.getUse().getUser(); -#endif - if (CallInst *callInst = dyn_cast(user)) { - assert(bb == NULL); - bb = callInst->getParent(); -#ifdef JL_NDEBUG - break; -#endif - } - } - assert(bb != NULL); - std::map &inuse_list = bb_uses[bb]; - for (unsigned arg_offset = 0; arg_offset < arg_n; ++arg_offset) { - inuse_list[frame_register(callInst, arg_offset)] = liveness::kill; - } - } - } - -/* # initialize the dataflow queue for tracking liveness - * bb-queue : queue - * for bb in iterator(f) - * inuse-list = &bb-uses[bb] - * for inst in reverse-iterator(f) - * if inst matches store-inst # todo: or inst matches "gc-store-inst" (for stores to non-stack-slots) - * if inst->operand(0) matches "a call to make-jlcall-frame" (or gep thereof) - * def = - * if inuse-list[def] is kill - * inuse-list[def] = assign|kill - * if not has-live-out(bb) - * continue - * for pred in predecessors(bb) - * if not pred in bb-queue - * push-back(bb-queue, pred) - */ - std::vector bb_queue; - for (std::map >::iterator - live_reg = bb_uses.begin(), e = bb_uses.end(); live_reg != e; ++live_reg) { - BasicBlock *bb = live_reg->first; - std::map &inuse_list = live_reg->second; - unsigned live_out = inuse_list.size(); - - for (BasicBlock::iterator ri = bb->end(); ri != bb->begin(); ) { - Instruction *i = &*--ri; - if (StoreInst *storeInst = dyn_cast(i)) { - frame_register def = get_gcroot(storeInst->getPointerOperand()); - if (CallInst *callInst = def.first) { - if (callInst->getCalledValue() == jlcall_frame_func) { - std::map::iterator inuse_reg = inuse_list.find(def); - if (inuse_reg != inuse_list.end() && inuse_reg->second == liveness::kill) { - inuse_reg->second |= liveness::assign; - --live_out; - } - } - } - } - } - if (live_out == 0) - continue; - assert(&*bb != &F.getEntryBlock()); // only undef variables should live-out from the entry bb - for (pred_iterator PI = pred_begin(bb), PE = pred_end(bb); PI != PE; ++PI) { - if (std::find(bb_queue.begin(), bb_queue.end(), *PI) == bb_queue.end()) - bb_queue.push_back(*PI); - } - } - - -/* # follow liveness information flow until termination - * while not empty(bb-queue) - * bb = pop(bb-queue) - * inuse-list = &bb-uses[bb] - * changes = 0 - * for succ in successors(bb) - * for in bb-uses[succ] - * if (not assign in op) and not (inuse-list[def] contains live or assign) - * # need to add live value from successor to current block, unless it was already marked - * inuse-list[def] |= live - * changes += 1 - * for inst in iterator(bb) - * if inst matches store-inst # todo: or inst matches "gc-store-inst" (for stores to non-stack-slots) - * if inst->operand(0) matches "a call to make-jlcall-frame" (or gep thereof) - * def = - * if live in inuse-list[def] - * inuse-list[def] |= assign - * if not kill in inuse-list[def] - * # found the assignment, def is no longer live - * inuse-list[def] &= ~live - * else - * # not a true kill due to recursion -- the kill happened before this assign in this BB, so it is still live - * changes -= 1 - * # if the live list changed, make sure all predecessors are in the queue to be reanalyzed - * if changes == 0 - * continue - * for pred in predecessors(bb) - * if not pred in bb-queue - * push-back(bb-queue, pred) - */ - - while (!bb_queue.empty()) { - BasicBlock *bb = bb_queue.back(); - bb_queue.pop_back(); - std::map &inuse_list = bb_uses[bb]; - unsigned changes = 0; - for (succ_iterator SI = succ_begin(bb), SE = succ_end(bb); SI != SE; ++SI) { - std::map &succ_uses = bb_uses[*SI]; - for (std::map::iterator reg = succ_uses.begin(), rege = succ_uses.end(); reg != rege; ++reg) { - if (!(reg->second & liveness::assign)) { - liveness::id &live = inuse_list[reg->first]; - if (!(live & (liveness::live | liveness::assign))) { - live |= liveness::live; - ++changes; - } - } - } - } - if (!changes) // short-circuit - continue; - for (BasicBlock::iterator i = bb->begin(), ie = bb->end(); i != ie; ++i) { - if (StoreInst *storeInst = dyn_cast(&*i)) { - frame_register def = get_gcroot(storeInst->getPointerOperand()); - if (CallInst *callInst = def.first) { - if (callInst->getCalledValue() == jlcall_frame_func) { - std::map::iterator inuse_reg = inuse_list.find(def); - if (inuse_reg != inuse_list.end() && (inuse_reg->second & liveness::live)) { - inuse_reg->second |= liveness::assign; - if (!(inuse_reg->second & liveness::kill)) - inuse_reg->second &= ~liveness::live; - --changes; - } - } - } - } - } - if (!changes) - continue; - assert(bb != &F.getEntryBlock()); // only undef variables should live-out from the entry bb - for (pred_iterator PI = pred_begin(bb), PE = pred_end(bb); PI != PE; ++PI) { - if (std::find(bb_queue.begin(), bb_queue.end(), *PI) == bb_queue.end()) - bb_queue.push_back(*PI); - } - } - -/* # allocate space in locals for the variables - * TBD - */ - -/* # allocate space in temp-args for each jlcall frame - * regs-used = zip(get-basic-blocks(), falses) - * for in frames - * frame-offset = find-space-for(inst, bb-uses, regs-used) - * record-usage(inst, bb-uses, regs-used, frame-offset) - * # frame iterator allocates space in reverse size order - * # so that the large frames get allocated first - * # and the smaller frames just fill in the gaps - * # I believe this is likely to give good results (compact gc-frames) - */ - std::map regs_used; - std::map frame_offsets; - unsigned maxDepth = 0; - for (; !frames.empty(); frames.pop()) { - std::pair frame = frames.top(); - unsigned arg_n = frame.first; - if (arg_n == 0) continue; - CallInst *callInst = frame.second; - unsigned frame_offset = find_space_for(callInst, bb_uses, regs_used); - record_usage(callInst, bb_uses, regs_used, frame_offset); - frame_offsets[callInst] = frame_offset; - if (frame_offset + arg_n > maxDepth) - maxDepth = frame_offset + arg_n; - } - -/* # cleanup and finalize the IR */ - for (Function::iterator bb = F.begin(), be = F.end(); bb != be; ++bb) { - for (BasicBlock::iterator i = bb->begin(), ie = bb->end(); i != ie; ) { - Instruction *inst = &*i; - ++i; - // delete the now unused gckill information - if (CallInst* callInst = dyn_cast(inst)) { - Value *callee = callInst->getCalledValue(); - if (callee == gckill_func || callee == gcroot_flush_func) { - callInst->eraseFromParent(); - } - } - // delete any StoreInst to a gcframe slot that isn't live - else if (StoreInst *storeInst = dyn_cast(inst)) { - frame_register def = get_gcroot(storeInst->getPointerOperand()); - if (CallInst *gcroot = def.first) { - if (gcroot->getCalledValue() == jlcall_frame_func) { - std::map &inuse_list = bb_uses[storeInst->getParent()]; - std::map::iterator inuse_reg = inuse_list.find(def); - if (inuse_reg == inuse_list.end()) - storeInst->eraseFromParent(); - } - } - } - } - } - - Instruction *tempSlot; - if (frame_offsets.empty()) { - tempSlot = NULL; - } - else { - tempSlot = GetElementPtrInst::Create(LLVM37_param(NULL) gcframe, ArrayRef(ConstantInt::get(T_int32, 2))); -#ifdef JL_DEBUG_BUILD - tempSlot->setName("temproots"); -#endif - tempSlot->insertAfter(gcframe); - if (last_gcframe_inst == gcframe) - last_gcframe_inst = tempSlot; - - // finalize all of the jlcall frames by replacing all of the frames with the appropriate gep(tempslot) - for (std::map::iterator frame = frame_offsets.begin(), framee = frame_offsets.end(); frame != framee; ++frame) { - CallInst *gcroot = frame->first; - tbaa_decorate_gcframe(gcroot, tbaa_gcframe); - Value* offset[1] = {ConstantInt::get(T_int32, frame->second)}; - GetElementPtrInst *gep = GetElementPtrInst::Create(LLVM37_param(NULL) tempSlot, makeArrayRef(offset)); - gep->insertAfter(last_gcframe_inst); - gcroot->replaceAllUsesWith(gep); - gep->takeName(gcroot); - gcroot->eraseFromParent(); - last_gcframe_inst = gep; - } - } - -/* # replace all intermediate roots defs with the appropriate gep(gcroot) - * for inst in entry-basic-block(function) - * if inst matches "gc-root" - * slot = get-argument(inst) - * newslot = CreateGEP(gc-frame) -> at InsertPoint(gc-frame) - * Replace(slot, newslot) -> at InsertPoint(gc-frame) - * CreateStore(NULL, newslot) -> at InsertPoint(gc-frame) - */ -#if JL_LLVM_VERSION >= 30600 - DIBuilder dbuilder(M, false); -#endif - unsigned argSpaceSize = 0; - for (BasicBlock::iterator I = gcframe->getParent()->begin(), E(gcframe); I != E; ) { - Instruction* inst = &*I; - ++I; - if (CallInst* callInst = dyn_cast(inst)) { - if (callInst->getCalledValue() == gcroot_func) { - unsigned offset = 2 + argSpaceSize++; - Instruction *argTempi = GetElementPtrInst::Create(LLVM37_param(NULL) gcframe, ArrayRef(ConstantInt::get(T_int32, offset))); - argTempi->insertAfter(last_gcframe_inst); -#if JL_LLVM_VERSION >= 30600 - Metadata *md = ValueAsMetadata::getIfExists(callInst); - if (md) { - Value *mdValue = MetadataAsValue::get(M.getContext(), md); - for (User::use_iterator use = mdValue->use_begin(), usee = mdValue->use_end(); use != usee; ) { - // need to recreate the dbg_declare accordingly -- sadly llvm can't handle this in RAUW - User *user = use->getUser(); - ++use; - if (CallInst* dbg = dyn_cast(user)) { - Function *called = dbg->getCalledFunction(); - if (called && called->getIntrinsicID() == Intrinsic::dbg_declare) { - DILocalVariable *dinfo = cast(cast(dbg->getOperand(1))->getMetadata()); - DIExpression *expr = cast(cast(dbg->getOperand(2))->getMetadata()); - SmallVector addr; - addr.push_back(llvm::dwarf::DW_OP_plus); - addr.push_back(offset * sizeof(void*)); - addr.append(expr->elements_begin(), expr->elements_end()); - expr = dbuilder.createExpression(addr); - dbuilder.insertDeclare(gcframe, dinfo, expr, -#if JL_LLVM_VERSION >= 30700 - dbg->getDebugLoc(), -#endif - dbg->getParent()); - dbg->eraseFromParent(); - } - } - } - } -#endif - tbaa_decorate_gcframe(callInst, tbaa_gcframe); - callInst->replaceAllUsesWith(argTempi); - argTempi->takeName(callInst); - callInst->eraseFromParent(); - // Initialize the slots for function variables to NULL - StoreInst *store = new StoreInst(V_null, argTempi); - store->setMetadata(llvm::LLVMContext::MD_tbaa, tbaa_gcframe); - store->insertAfter(argTempi); - last_gcframe_inst = store; - } - } - else if (AllocaInst *allocaInst = dyn_cast(inst)) { - if (allocaInst->getAllocatedType() == V_null->getType()) { - // TODO: this is overly aggressive at zeroing allocas that may not actually need to be zeroed - StoreInst *store = new StoreInst(V_null, allocaInst); - store->insertAfter(allocaInst); - } - } - } -#if JL_LLVM_VERSION >= 30600 - dbuilder.finalize(); -#endif - - if (argSpaceSize + maxDepth == 0) { - // 0 roots; remove gc frame entirely - gcframe->eraseFromParent(); - } - else { - // Initialize the slots for temporary variables to NULL - if (maxDepth > 0) { - BitCastInst *tempSlot_i8 = new BitCastInst(tempSlot, PointerType::get(T_int8, 0), "", last_gcframe_inst); - Type *argsT[2] = {tempSlot_i8->getType(), T_int32}; - Function *memset = Intrinsic::getDeclaration(&M, Intrinsic::memset, makeArrayRef(argsT)); - Value *args[5] = { - tempSlot_i8, // dest - ConstantInt::get(T_int8, 0), // val - ConstantInt::get(T_int32, sizeof(jl_value_t*)*maxDepth), // len - ConstantInt::get(T_int32, 0), // align - ConstantInt::get(T_int1, 0)}; // volatile - CallInst *zeroing = CallInst::Create(memset, makeArrayRef(args)); - zeroing->setMetadata(llvm::LLVMContext::MD_tbaa, tbaa_gcframe); - zeroing->insertAfter(tempSlot_i8); - last_gcframe_inst = zeroing; - } - - gcframe->setOperand(0, ConstantInt::get(T_int32, 2 + argSpaceSize + maxDepth)); // fix up the size of the gc frame - if (tempSlot) - tempSlot->setOperand(1, ConstantInt::get(T_int32, 2 + argSpaceSize)); // fix up the offset to the temp slot space - - IRBuilder<> builder(F.getContext()); - Type *T_ppjlvalue = V_null->getType()->getPointerTo(); -#ifdef _P64 - Type *T_size = T_int64; -#else - Type *T_size = T_int32; -#endif - builder.SetInsertPoint(&*(++BasicBlock::iterator(last_gcframe_inst))); // set insert *before* point, e.g. after the gcframe - DebugLoc noDbg; - builder.SetCurrentDebugLocation(noDbg); - - Instruction *inst = - builder.CreateStore(ConstantInt::get(T_size, (argSpaceSize + maxDepth) << 1), - builder.CreateBitCast(builder.CreateConstGEP1_32(gcframe, 0), T_size->getPointerTo())); - inst->setMetadata(llvm::LLVMContext::MD_tbaa, tbaa_gcframe); - inst = builder.CreateStore(builder.CreateLoad(builder.Insert(get_pgcstack(ptlsStates))), - builder.CreatePointerCast(builder.CreateConstGEP1_32(gcframe, 1), PointerType::get(T_ppjlvalue,0))); - inst->setMetadata(llvm::LLVMContext::MD_tbaa, tbaa_gcframe); - builder.CreateStore(gcframe, builder.Insert(get_pgcstack(ptlsStates))); - - // Finish by emitting the gc pops before any return - for(Function::iterator I = F.begin(), E = F.end(); I != E; ++I) { - if (isa(I->getTerminator())) { - builder.SetInsertPoint(I->getTerminator()); // set insert *before* Ret - Instruction *gcpop = - (Instruction*)builder.CreateConstGEP1_32(gcframe, 1); - inst = builder.CreateLoad(gcpop); - inst->setMetadata(llvm::LLVMContext::MD_tbaa, tbaa_gcframe); - inst = builder.CreateStore(builder.CreatePointerCast(inst, T_ppjlvalue), - builder.Insert(get_pgcstack(ptlsStates))); - inst->setMetadata(llvm::LLVMContext::MD_tbaa, tbaa_gcframe); - } - } - } - -#ifndef JL_NDEBUG - jl_gc_frame_stats.count++; - jl_gc_frame_stats.locals += argSpaceSize; - jl_gc_frame_stats.temp += maxDepth; -#endif -} - -struct LowerGCFrame: public ModulePass { - static char ID; - LowerGCFrame() : ModulePass(ID) - {} - -private: - void runOnFunction(Module *M, Function &F, Function *ptls_getter, - Type *T_pjlvalue, MDNode *tbaa_gcframe); - bool runOnModule(Module &M) override; -}; - -static void eraseFunction(Module &M, const char *name) -{ - if (Function *f = M.getFunction(name)) { - f->eraseFromParent(); - } -} - -bool LowerGCFrame::runOnModule(Module &M) -{ - MDNode *tbaa_gcframe = tbaa_make_child("jtbaa_gcframe").first; - - Function *ptls_getter = M.getFunction("jl_get_ptls_states"); - FunctionType *functype = nullptr; - Type *T_pjlvalue = nullptr; - if (ptls_getter) { - functype = ptls_getter->getFunctionType(); - auto T_ppjlvalue = - cast(functype->getReturnType())->getElementType(); - T_pjlvalue = cast(T_ppjlvalue)->getElementType(); - } - for (auto F = M.begin(), E = M.end(); F != E; ++F) { - if (F->isDeclaration()) - continue; - runOnFunction(&M, *F, ptls_getter, T_pjlvalue, tbaa_gcframe); - } - - // Cleanup for GC frame lowering. - eraseFunction(M, "julia.gc_root_decl"); - eraseFunction(M, "julia.gc_root_kill"); - eraseFunction(M, "julia.jlcall_frame_decl"); - eraseFunction(M, "julia.gcroot_flush"); - return true; -} - -void LowerGCFrame::runOnFunction(Module *M, Function &F, Function *ptls_getter, - Type *T_pjlvalue, MDNode *tbaa_gcframe) -{ - CallInst *ptlsStates = nullptr; - for (auto I = F.getEntryBlock().begin(), E = F.getEntryBlock().end(); - ptls_getter && I != E; ++I) { - if (CallInst *callInst = dyn_cast(&*I)) { - if (callInst->getCalledValue() == ptls_getter) { - ptlsStates = callInst; - break; - } - } - } - JuliaGCAllocator allocator(F, ptlsStates, T_pjlvalue, tbaa_gcframe); - allocator.allocate_frame(); -} - -char LowerGCFrame::ID = 0; - -static RegisterPass X("LowerGCFrame", "Lower GCFrame Pass", - false /* Only looks at CFG */, - false /* Analysis Pass */); -} - -#ifndef JL_NDEBUG // llvm assertions build -// gdb debugging code for inspecting the bb_uses map -void jl_dump_bb_uses(std::map > &bb_uses) -{ - for (std::map >::iterator - live_reg = bb_uses.begin(), e = bb_uses.end(); live_reg != e; ++live_reg) { - BasicBlock *bb = live_reg->first; - errs() << '\n' << bb << '\n'; - for (std::map::iterator - regs = live_reg->second.begin(), regse = live_reg->second.end(); regs != regse; ++regs) { - errs() << regs->second << " #" << regs->first.second << ' ' << regs->first.first << '\n'; - } - } -} -#endif - -Pass *createLowerGCFramePass() -{ - return new LowerGCFrame(); -} - -extern "C" JL_DLLEXPORT -void LLVMAddLowerGCFramePass(LLVMPassManagerRef PM) { - unwrap(PM)->add(createLowerGCFramePass()); -} diff --git a/src/llvm-late-gc-lowering.cpp b/src/llvm-late-gc-lowering.cpp new file mode 100644 index 0000000000000..f40d9215dec95 --- /dev/null +++ b/src/llvm-late-gc-lowering.cpp @@ -0,0 +1,1250 @@ +// This file is a part of Julia. License is MIT: https://julialang.org/license + +#include +#include +#include "llvm/Analysis/CFG.h" +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "llvm-version.h" +#include "codegen_shared.h" +#include "julia.h" + +#define DEBUG_TYPE "late_lower_gcroot" + +using namespace llvm; + +/* Julia GC Root Placement pass. For a general overview of the design of GC + root lowering, see the devdocs. This file is the actual implementation. + + The actual algorithm is fairly straightforward. First recall the goal of this + pass: + + Minimize the number of needed gc roots/stores to them subject to the constraint + that at every safepoint, any live gc-tracked pointer (i.e. for which there is + a path after this point that contains a use of this pointer) is in some gc slot. + + In particular, in order to understand this algorithm, it is important to + realize that the only places where rootedness matters is at a safepoint. + + Now, the primary phases of the algorithm are: + + 1. Local Scan + + During this step, each Basic Block is inspected and analyzed for local + properties. In particular, we want to determine the ordering of any of + the following activities: + + - Any Def of a gc-tracked pointer. In general Defs are the results of + calls or loads from appropriate memory locations. Phi nodes and + selects do complicate this story slightly as described below. + - Any use of a gc-tracked or derived pointer. As described in the + devdocs, a use is in general one of + a) a load from a tracked/derived value + b) a store to a tracked/derived value + c) a store OF a tracked/derived value + d) a use of a value as a call operand (including operand bundles) + - Any safepoint + + Crucially, we also perform pointer numbering during the local scan, + assigning every Def a unique integer and caching the integer for each + derived pointer. This allows us to operate only on the set of Defs ( + represented by these integers) for the rest of the algorithm. We also + maintain some local utility information that is needed by later passes + (see the BBState struct for details). + + 2. Dataflow Computation + + This computation operates entirely over the function's control flow graph + and does not look into a basic block. The algorithm is essentially + textbook iterative data flow for liveness computation. However, the + data flow equations are slightly more complicated because we also + forward propagate rootedness information in addition to backproagating + liveness. + + 3. Live Set Computation + + With the liveness information from the previous step, we can now compute, + for every safepoint, the set of values live at that particular safepoint. + There are three pieces of information being combined here: + i. Values that needed to be live due to local analysis (e.g. there + was a def, then a safepoint, then a use). This was computed during + local analysis. + ii. Values that are live across the basic block (i.e. they are live + at every safepoint within the basic block). This relies entirely + on the liveness information. + iii. Values that are now live-out from the basic block (i.e. they are + live at every safepoint following their def). During local + analysis, we keep, for every safepoint, those values that would + be live if they were live out. Here we can check if they are + actually live-out and make the appropriate additions to the live + set. + + Lastly, we also explicitly compute, for each value, the list of values + that are simulataneously live at some safepoint. This is known as an + "interference graph" and is the input to the next step. + + 4. GC Root coloring + + Two values which are not simulataneously live at a safepoint can share the + same slot. This is an important optimization, because otherwise long + functions would have exceptionally large GC slots, reducing performance + and bloating the size of the stack. Assigning values to these slots is, + equivalent to doing graph coloring on the interference graph - the graph + where nodes are values and two values have an edge if they are + simulataneously live at a safepoint - which we computed in the previous + step. Now graph coloring in general is a hard problem. However, for SSA + form programs, (and most programs in general, by virtue of their + structure), the resulting interference graphs are chordal and can be + colored optimally in linear time by performing greedy coloring in a + perfect elimination order. Now, our interference graphs are likely not + entirely chordal due to some non-SSA corner cases. However, using the same + algorithm should still give a very good coloring while having sufficiently + low runtime. + + 5. JLCall frame optimizations + + Unlike earlier iterations of the gc root placement logic, jlcall frames + are no longer treated as a special case and need not necessarily be sunk + into the gc frame. Additionally, we now emit lifetime + intrinsics, so regular stack slot coloring will merge any jlcall frames + not sunk into the gc frame. Nevertheless performing such sinking can still + be profitable. Since all arguments to a jlcall are guaranteed to be live + at that call in some gc slot, we can attempt to rearrange the slots within + the gc-frame, or re-use slots not assigned at that particular location + for the gcframe. However, even without this optimization, stack frames + are at most two times larger than optimal (because regular stack coloring + can merge the jlcall allocas). + + N.B.: This step is not yet implemented. + + 6. Root placement + + This performs the actual insertion of the GCFrame pushes/pops, zeros out + the gc frame and creates the stores to the gc frame according to the + stack slot assignment computed in the previous step. GC frames stores + are generally sunk right before the first safe point that use them + (this is beneficial for code where the primary path does not have + safepoints, but some other path - e.g. the error path does). However, + if the first safepoint is not dominated by the definition (this can + happen due to the non-ssa corner cases), the store is insert right after + the definition. + + 7. Cleanup + + This step performs necessary cleanup before passing the IR to codegen. In + particular, it removes any calls to julia_from_objref intrinsics and + removes the extra operand bundles from ccalls. In the future it could + also strip the addrspace information from all values as this + information is no longer needed. + + + There are a couple important special cases that deserve special attention: + + A. PHIs and Selects + + In general PHIs and selects are treated as separate defs for the purposes + of the algorithm and their operands as uses of those values. It is + important to consider however WHERE the uses of PHI's operands are + located. It is neither at the start of the basic block, because the values + do not dominated the block (so can't really consider them live-in), nor + at the end of the predecessor (because they are actually live out). + Instead it is best to think of those uses as living on the edge between + the appropriate predecessor and the block containing the PHI. + + Another concern is PHIs of derived values. Since we cannot simply root + these values by storing them to a GC slot, we need to insert a new, + artificial PHI that tracks the base pointers for the derived values. E.g. + in: + + A: + %Abase = load addrspace(10) *... + %Aderived = addrspacecast %Abase to addrspace(11) + B: + %Bbase = load addrspace(10) *... + %Bderived = addrspacecast %Bbase to addrspace(11) + C: + %phi = phi [%Aderived, %A + %Bderived, %B] + + we will insert another phi in C to track the relevant base pointers: + + %philift = phi [%Abase, %A + %Bbase, %B] + + We then pretend, for the purposes of numbering that %phi was derived from + %philift. Note that in order to be able to this, we need to be able to + perform this lifting either during numbering or instruction scanning. + + B. Vectors of pointers/Union representations + + Since this pass runs very late in the pass pipeline, it runs after the + various vectorization passes. As a result, we have to potentially deal + with vectors of gc-tracked pointers. For the purposes of most of the + algorithm, we simply assign every element of the vector a separate number + and no changes are needed. However, those parts of the algorithm that + look at IR need to be aware of the possibility of encountering vectors of + pointers. + + Similarly, unions (e.g. in call returns) are represented as a struct of + a gc-tracked value and an argument selector. We simply assign a single + number to this struct and proceed as if it was a single pointer. However, + this again requires care at the IR level. + + C. Non mem2reg'd allocas + + Under some circumstances, allocas will still be present in the IR when + we get to this pass. We don't try very hard to handle this case, and + simply sink the alloca into the GCFrame. +*/ + +struct BBState { + // These do not get updated after local analysis + BitVector Defs; + BitVector PhiOuts; + //// Upward exposed uses that do not have a preceeding safepoint + BitVector UpExposedUsesUnrooted; + //// All other uses + BitVector UpExposedUses; + //// Downward exposed uses that were not followed by a safepoint + BitVector DownExposedUnrooted; + // These get updated during dataflow + BitVector LiveIn; + BitVector LiveOut; + //// Incoming values that are unrooted - these are propagated forward. I.e. + //// they need not necessarily be LiveIn if there are no following uses, + //// but if they are they haven't been rooted yet. + BitVector UnrootedIn; + BitVector UnrootedOut; + std::vector Safepoints; + int TopmostSafepoint = -1; + bool HasSafepoint = false; + // Have we gone through this basic block in our local scan yet? + bool Done = false; +}; + +struct State { + // The maximum assigned value number + int MaxPtrNumber; + // The maximum assigned safepoint number + int MaxSafepointNumber; + // Cache of numbers assigned to IR values. This includes caching of numbers + // for derived values + std::map AllPtrNumbering; + std::map> AllVectorNumbering; + // Numbering of pointers. This only includes Defs + std::map PtrNumbering; + // The reverse of the previous maps + std::map ReversePtrNumbering; + // Neighbors in the coloring interferece graph. I.e. for each value, the + // indicies of other values that are used simulataneously at some safe point. + std::vector> Neighbors; + // The result of the local analysis + std::map BBStates; + + // The assignment of numbers to safepoints. The indicies in the map + // are indices into the next three maps which store safepoint properties + std::map SafepointNumbering; + + // The set of values live at a particular safepoint + std::vector LiveSets; + // The set of values for which this is the first safepoint along some + // relevant path - i.e. the value needs to be rooted at this safepoint + std::vector> Rootings; + // Those values that - if live out from our parent basic block - are live + // at this safepoint. + std::vector> LiveIfLiveOut; + // We don't bother doing liveness on Allocas that were not mem2reg'ed. + // they just get directly sunk into the root array. + std::vector Allocas; + State() : MaxPtrNumber(-1), MaxSafepointNumber(-1) {} +}; + +namespace llvm { + void initializeLateLowerGCFramePass(PassRegistry &Registry); +} + +extern std::pair tbaa_make_child(const char *name, MDNode *parent=nullptr, bool isConstant=false); +struct LateLowerGCFrame: public FunctionPass { + static char ID; + LateLowerGCFrame() : FunctionPass(ID) + { + llvm::initializeDominatorTreeWrapperPassPass(*PassRegistry::getPassRegistry()); + tbaa_gcframe = tbaa_make_child("jtbaa_gcframe").first; + } + +protected: + void getAnalysisUsage(AnalysisUsage &AU) const override { + FunctionPass::getAnalysisUsage(AU); + AU.addRequired(); + AU.addPreserved(); + AU.setPreservesCFG(); + } + +private: + Type *T_prjlvalue; + Type *T_ppjlvalue; + Type *T_size; + Type *T_int32; + MDNode *tbaa_gcframe; + Function *gc_kill_func; + Function *ptls_getter; + Function *gc_flush_func; + Function *pointer_from_objref_func; + CallInst *ptlsStates; + + void MaybeNoteDef(State &S, BBState &BBS, Value *Def, const std::vector &SafepointsSoFar); + void NoteUse(State &S, BBState &BBS, Value *V, BitVector &Uses); + void NoteUse(State &S, BBState &BBS, Value *V) { + NoteUse(S, BBS, V, BBS.UpExposedUses); + } + int LiftPhi(State &S, PHINode *Phi); + int Number(State &S, Value *V); + std::vector NumberVector(State &S, Value *Vec); + void NoteOperandUses(State &S, BBState &BBS, User &UI, BitVector &Uses); + void NoteOperandUses(State &S, BBState &BBS, User &UI){ + NoteOperandUses(S, BBS, UI, BBS.UpExposedUses); + } + State LocalScan(Function &F); + void ComputeLiveness(Function &F, State &S); + void ComputeLiveSets(Function &F, State &S); + void PushGCFrame(AllocaInst *gcframe, unsigned NRoots, Instruction *InsertAfter); + void PopGCFrame(AllocaInst *gcframe, Instruction *InsertBefore); + std::vector ColorRoots(const State &S); + void PlaceRootsAndUpdateCalls(Function &F, std::vector &Colors, State &S, std::map>); + bool doInitialization(Module &M) override; + bool runOnFunction(Function &F) override; + Instruction *get_pgcstack(Instruction *ptlsStates); + bool CleanupIR(Function &F); + void NoteUseChain(State &S, BBState &BBS, User *TheUser); +}; + +static unsigned getValueAddrSpace(Value *V) { + return cast(V->getType())->getAddressSpace(); +} + +static bool isSpecialPtr(Type *Ty) { + PointerType *PTy = dyn_cast(Ty); + if (!PTy) + return false; + unsigned AS = PTy->getAddressSpace(); + return AddressSpace::FirstSpecial <= AS && AS <= AddressSpace::LastSpecial; +} + +static bool isSpecialPtrVec(Type *Ty) { + auto *VTy = dyn_cast(Ty); + if (!VTy) + return false; + return isSpecialPtr(VTy->getElementType()); +} + +static bool isUnionRep(Type *Ty) { + return Ty->isStructTy() && cast(Ty)->getNumElements() == 2 && + isSpecialPtr(cast(Ty)->getTypeAtIndex((unsigned)0)); +} + +static Value *FindBaseValue(const State &S, Value *V, bool UseCache = true) { + Value *CurrentV = V; + while (true) { + if (UseCache) { + if (CurrentV->getType()->isPointerTy()) { + auto it = S.AllPtrNumbering.find(CurrentV); + if (it != S.AllPtrNumbering.end()) + return CurrentV; + } else { + auto it = S.AllVectorNumbering.find(CurrentV); + if (it != S.AllVectorNumbering.end()) + return CurrentV; + } + } + if (isa(CurrentV)) + CurrentV = cast(CurrentV)->getOperand(0); + else if (isa(CurrentV)) { + Value *NewV = cast(CurrentV)->getOperand(0); + if (getValueAddrSpace(NewV) == 0) + break; + CurrentV = NewV; + } + else if (isa(CurrentV)) + CurrentV = cast(CurrentV)->getOperand(0); + else if (isa(CurrentV)) { + Value *Operand = cast(CurrentV)->getOperand(0); + if (!isUnionRep(Operand->getType())) + break; + CurrentV = Operand; + continue; + } else + break; + } + assert(isa(CurrentV) || isa(CurrentV) || + isa(CurrentV) || isa(CurrentV) || + isa(CurrentV) || isa(CurrentV) || + isa(CurrentV) || isa(CurrentV) || + isa(CurrentV)); + return CurrentV; +} + +static Value *MaybeExtractUnion(Value *Val, Instruction *InsertBefore) { + if (isUnionRep(Val->getType())) { + Val = ExtractValueInst::Create(Val, {(unsigned)0}, "", InsertBefore); + } + return Val; +} + +static int LiftSelect(State &S, SelectInst *SI) { + Value *TrueBase = FindBaseValue(S, SI->getTrueValue(), false); + Value *FalseBase = FindBaseValue(S, SI->getFalseValue(), false); + TrueBase = MaybeExtractUnion(TrueBase, SI); + FalseBase = MaybeExtractUnion(FalseBase, SI); + if (getValueAddrSpace(TrueBase) != AddressSpace::Tracked) + TrueBase = ConstantPointerNull::get(cast(FalseBase->getType())); + else if (getValueAddrSpace(FalseBase) != AddressSpace::Tracked) + FalseBase = ConstantPointerNull::get(cast(TrueBase->getType())); + Value *SelectBase = SelectInst::Create(SI->getCondition(), + TrueBase, FalseBase, "gclift", SI); + int Number = ++S.MaxPtrNumber; + S.PtrNumbering[SelectBase] = S.AllPtrNumbering[SelectBase] = + S.AllPtrNumbering[SI] = Number; + S.ReversePtrNumbering[Number] = SelectBase; + return Number; +} + +int LateLowerGCFrame::LiftPhi(State &S, PHINode *Phi) +{ + PHINode *lift = PHINode::Create(T_prjlvalue, Phi->getNumIncomingValues(), "gclift", Phi); + for (unsigned i = 0; i < Phi->getNumIncomingValues(); ++i) { + Value *Incoming = Phi->getIncomingValue(i); + Value *Base = FindBaseValue(S, Incoming, false); + if (getValueAddrSpace(Base) != AddressSpace::Tracked) + Base = ConstantPointerNull::get(cast(T_prjlvalue)); + if (Base->getType() != T_prjlvalue) + Base = new BitCastInst(Base, T_prjlvalue, "", Phi->getIncomingBlock(i)->getTerminator()); + lift->addIncoming(Base, Phi->getIncomingBlock(i)); + } + int Number = ++S.MaxPtrNumber; + S.PtrNumbering[lift] = S.AllPtrNumbering[lift] = + S.AllPtrNumbering[Phi] = Number; + S.ReversePtrNumbering[Number] = lift; + return Number; +} + +int LateLowerGCFrame::Number(State &S, Value *V) { + assert(isSpecialPtr(V->getType()) || isUnionRep(V->getType())); + Value *CurrentV = FindBaseValue(S, V); + auto it = S.AllPtrNumbering.find(CurrentV); + if (it != S.AllPtrNumbering.end()) + return it->second; + int Number; + if (isa(CurrentV) || isa(CurrentV) || + ((isa(CurrentV) || isa(CurrentV)) && + getValueAddrSpace(CurrentV) != AddressSpace::Tracked)) { + // We know this is rooted in the parent + Number = -1; + } else if (isa(CurrentV) && getValueAddrSpace(CurrentV) != AddressSpace::Tracked) { + int Number = LiftSelect(S, cast(CurrentV)); + S.AllPtrNumbering[V] = Number; + return Number; + } else if (isa(CurrentV) && getValueAddrSpace(CurrentV) != AddressSpace::Tracked) { + int Number = LiftPhi(S, cast(CurrentV)); + S.AllPtrNumbering[V] = Number; + return Number; + } else if (isa(CurrentV) && !isUnionRep(CurrentV->getType())) { + assert(false && "TODO: Extract"); + abort(); + } else { + assert( + (CurrentV->getType()->isPointerTy() && + getValueAddrSpace(CurrentV) == AddressSpace::Tracked) || + isUnionRep(CurrentV->getType())); + Number = ++S.MaxPtrNumber; + S.ReversePtrNumbering[Number] = CurrentV; + } + S.PtrNumbering[CurrentV] = S.AllPtrNumbering[CurrentV] = S.AllPtrNumbering[V] = Number; + return Number; +} + +std::vector LateLowerGCFrame::NumberVector(State &S, Value *V) { + auto it = S.AllVectorNumbering.find(V); + if (it != S.AllVectorNumbering.end()) + return it->second; + Value *CurrentV = FindBaseValue(S, V); + it = S.AllVectorNumbering.find(CurrentV); + if (it != S.AllVectorNumbering.end()) + return it->second; + if (isa(CurrentV) || + ((isa(CurrentV) || isa(CurrentV) || + isa(CurrentV)) && + getValueAddrSpace(CurrentV) != AddressSpace::Tracked)) { + S.AllVectorNumbering[V] = std::vector{}; + } + /* We (the frontend) don't insert either of these, but it would be legal - + though a bit strange, considering they're pointers) for the optimizer to + do so. All that's needed here is to NumberVector the previous vector/value + and lift the operation */ + else if (isa(CurrentV)) { + assert(false && "TODO Shuffle"); + } else if (isa(CurrentV)) { + assert(false && "TODO Insert"); + } else if (isa(CurrentV)) { + // This is simple, we can just number them sequentially + std::vector Numbers; + for (unsigned i = 0; i < cast(CurrentV->getType())->getNumElements(); ++i) { + int Num = ++S.MaxPtrNumber; + Numbers.push_back(Num); + S.ReversePtrNumbering[Num] = V; + } + S.AllVectorNumbering[V] = Numbers; + } + return S.AllVectorNumbering[CurrentV]; +} + +static void MaybeResize(BBState &BBS, unsigned Idx) { + if (BBS.Defs.size() <= Idx) { + BBS.Defs.resize(Idx + 1); + BBS.UpExposedUses.resize(Idx + 1); + BBS.UpExposedUsesUnrooted.resize(Idx + 1); + BBS.DownExposedUnrooted.resize(Idx + 1); + BBS.PhiOuts.resize(Idx + 1); + } +} + +static bool HasBitSet(const BitVector &BV, unsigned Bit) { + return Bit < BV.size() && BV[Bit]; +} + +static void NoteDef(State &S, BBState &BBS, int Num, const std::vector &SafepointsSoFar) { + assert(Num != -1); + MaybeResize(BBS, Num); + assert(BBS.Defs[Num] == 0 && "SSA Violation or misnumbering?"); + BBS.Defs[Num] = 1; + BBS.UpExposedUses[Num] = 0; + BBS.UpExposedUsesUnrooted[Num] = 0; + if (!BBS.HasSafepoint) + BBS.DownExposedUnrooted[Num] = 1; + else if (HasBitSet(S.LiveSets[BBS.TopmostSafepoint], Num)) { + S.Rootings[BBS.TopmostSafepoint].insert(Num); + } + // This value could potentially be live at any following safe point + // if it ends up live out, so add it to the LiveIfLiveOut lists for all + // following safepoints. + for (int Safepoint : SafepointsSoFar) { + S.LiveIfLiveOut[Safepoint].push_back(Num); + } +} + +void LateLowerGCFrame::MaybeNoteDef(State &S, BBState &BBS, Value *Def, const std::vector &SafepointsSoFar) { + int Num = -1; + Type *RT = Def->getType(); + if (isSpecialPtr(RT)) { + assert(getValueAddrSpace(Def) == AddressSpace::Tracked && + "Returned value of GC interest, but not tracked?"); + Num = Number(S, Def); + } + else if (isUnionRep(RT)) { + // Probably a union return. Find the extractvalue + Num = Number(S, Def); + } + else if (isSpecialPtrVec(RT)) { + std::vector Nums = NumberVector(S, Def); + for (int Num : Nums) { + NoteDef(S, BBS, Num, SafepointsSoFar); + } + return; + } + else { + return; + } + NoteDef(S, BBS, Num, SafepointsSoFar); +} + +static int NoteSafepoint(State &S, BBState &BBS, CallInst *CI) { + int Number = ++S.MaxSafepointNumber; + S.SafepointNumbering[CI] = Number; + // Note which pointers are upward exposed live here. They need to be + // considered live at this safepoint even when they have a def earlier + // in this BB (i.e. even when they don't participate in the dataflow + // computation) + BBS.UpExposedUses |= BBS.UpExposedUsesUnrooted; + BBS.UpExposedUsesUnrooted.reset(); + S.LiveSets.push_back(BBS.UpExposedUses); + S.Rootings.push_back(std::set{}); + S.LiveIfLiveOut.push_back(std::vector{}); + return Number; +} + +void LateLowerGCFrame::NoteUse(State &S, BBState &BBS, Value *V, BitVector &Uses) { + // Short circuit to have to avoid dealing specially with vectors of + // constants, etc. + if (isa(V)) + return; + else if (isSpecialPtrVec(V->getType())) { + std::vector Nums = NumberVector(S, V); + for (int Num : Nums) { + MaybeResize(BBS, Num); + Uses[Num] = 1; + } + } + else { + int Num = Number(S, V); + if (Num == -1) + return; + MaybeResize(BBS, Num); + Uses[Num] = 1; + } +} + +void LateLowerGCFrame::NoteOperandUses(State &S, BBState &BBS, User &UI, BitVector &Uses) { + for (Use &U : UI.operands()) { + Value *V = U; + if (!isSpecialPtr(V->getType())) + continue; + NoteUse(S, BBS, V, Uses); + } +} + +template +void RecursivelyVisit(callback f, Value *V) { + for (Use &VU : V->uses()) { + User *TheUser = VU.getUser(); + if (isa(TheUser)) + f(VU); + if (isa(TheUser) || isa(TheUser) || + isa(TheUser) || isa(TheUser) || + isa(TheUser)) + continue; + if (isa(TheUser) || isa(TheUser) || isa(TheUser)) { + RecursivelyVisit(f, TheUser); + continue; + } + V->dump(); + TheUser->dump(); + assert(false && "Unexpected instruction"); + } +} + +static void dumpBitVectorValues(State &S, BitVector &BV) { + bool first = true; + for (int Idx = BV.find_first(); Idx >= 0; Idx = BV.find_next(Idx)) { + if (!first) + dbgs() << ", "; + first = false; + S.ReversePtrNumbering[Idx]->printAsOperand(dbgs()); + } +} + +/* Debugging utility to dump liveness information */ +JL_USED_FUNC static void dumpLivenessState(Function &F, State &S) { + for (auto &BB : F) { + dbgs() << "Liveness analysis for BB " << BB.getName(); + dbgs() << "\n\tDefs: "; + dumpBitVectorValues(S, S.BBStates[&BB].Defs); + dbgs() << "\n\tPhiOuts: "; + dumpBitVectorValues(S, S.BBStates[&BB].PhiOuts); + dbgs() << "\n\tUpExposedUsesUnrooted: "; + dumpBitVectorValues(S, S.BBStates[&BB].UpExposedUsesUnrooted); + dbgs() << "\n\tUpExposedUses: "; + dumpBitVectorValues(S, S.BBStates[&BB].UpExposedUses); + dbgs() << "\n\tDownExposedUnrooted: "; + dumpBitVectorValues(S, S.BBStates[&BB].DownExposedUnrooted); + dbgs() << "\n\tLiveIn: "; + dumpBitVectorValues(S, S.BBStates[&BB].LiveIn); + dbgs() << "\n\tLiveOut: "; + dumpBitVectorValues(S, S.BBStates[&BB].LiveOut); + dbgs() << "\n\tUnrootedIn: "; + dumpBitVectorValues(S, S.BBStates[&BB].UnrootedIn); + dbgs() << "\n\tUnrootedOut: "; + dumpBitVectorValues(S, S.BBStates[&BB].UnrootedOut); + dbgs() << "\n"; + } +} + +State LateLowerGCFrame::LocalScan(Function &F) { + State S; + for (BasicBlock &BB : F) { + BBState &BBS = S.BBStates[&BB]; + for (auto it = BB.rbegin(); it != BB.rend(); ++it) { + Instruction &I = *it; + if (CallInst *CI = dyn_cast(&I)) { + if (isa(CI)) { + // Intrinsics are never GC uses/defs + continue; + } + MaybeNoteDef(S, BBS, CI, BBS.Safepoints); + NoteOperandUses(S, BBS, I); + for (Use &U : CI->operands()) { + Value *V = U; + if (isUnionRep(V->getType())) { + NoteUse(S, BBS, V); + continue; + } + } + int SafepointNumber = NoteSafepoint(S, BBS, CI); + BBS.HasSafepoint = true; + BBS.TopmostSafepoint = SafepointNumber; + BBS.Safepoints.push_back(SafepointNumber); + } else if (LoadInst *LI = dyn_cast(&I)) { + // Allocas get sunk into the gc frame, so don't generate defs + MaybeNoteDef(S, BBS, LI, BBS.Safepoints); + NoteOperandUses(S, BBS, I, BBS.UpExposedUsesUnrooted); + } else if (SelectInst *SI = dyn_cast(&I)) { + // We need to insert an extra select for the GC root + if (!isSpecialPtr(SI->getType())) + continue; + if (getValueAddrSpace(SI) != AddressSpace::Tracked) { + if (S.AllPtrNumbering.find(SI) != S.AllPtrNumbering.end()) + continue; + LiftSelect(S, SI); + } else { + MaybeNoteDef(S, BBS, SI, BBS.Safepoints); + NoteOperandUses(S, BBS, I, BBS.UpExposedUsesUnrooted); + } + } else if (PHINode *Phi = dyn_cast(&I)) { + if (!isSpecialPtr(Phi->getType())) { + continue; + } + // We need to insert an extra phi for the GC root + if (getValueAddrSpace(Phi) != AddressSpace::Tracked) { + if (S.AllPtrNumbering.find(Phi) != S.AllPtrNumbering.end()) + continue; + LiftPhi(S, Phi); + } else { + MaybeNoteDef(S, BBS, Phi, BBS.Safepoints); + for (unsigned i = 0; i < Phi->getNumIncomingValues(); ++i) { + BBState &IncombingBBS = S.BBStates[Phi->getIncomingBlock(i)]; + NoteUse(S, IncombingBBS, Phi->getIncomingValue(i), IncombingBBS.PhiOuts); + } + } + } else if (isa(&I) || isa(&I)) { + NoteOperandUses(S, BBS, I, BBS.UpExposedUsesUnrooted); + } else if (auto *ASCI = dyn_cast(&I)) { + if (getValueAddrSpace(ASCI) == AddressSpace::Tracked) + MaybeNoteDef(S, BBS, ASCI, BBS.Safepoints); + } else if (auto *AI = dyn_cast(&I)) { + if (isSpecialPtr(AI->getAllocatedType()) && !AI->isArrayAllocation() && + cast(AI->getAllocatedType())->getAddressSpace() == AddressSpace::Tracked) + { + S.Allocas.push_back(AI); + } + } + } + // Pre-seed the dataflow variables; + BBS.LiveIn = BBS.UpExposedUses; + BBS.LiveIn |= BBS.UpExposedUsesUnrooted; + BBS.UnrootedOut = BBS.DownExposedUnrooted; + BBS.Done = true; + } + return S; +} + +/* + * DataFlow equations: + * LiveIn[BB] = UpExposedUses[BB] ∪ (LiveOut[BB] - Defs[BB]) + * LiveOut[BB] = PhiUses[BB] ∪ ∪_{Succ} LiveIn[Succ] + * UnrootedOut[BB] = DownExposedUnrooted[BB] ∪ (HasSafepoint ? {} : UnrootedIn[BB]) + * UnrootedIn[BB] = ∪_{Pred} UnrootedOut[Pred] + * + * We'll perform textbook iterative dataflow to compute this. There are better + * algorithms. If this starts becoming a problem, we should use one of them. + */ +void LateLowerGCFrame::ComputeLiveness(Function &F, State &S) { + bool Converged = false; + /* Liveness is a reverse problem. Our problem is slightly more general, + * because the Unrooted* variables are forward problems. Nevertheless, + * we use reverse postorder in an attempt to speed convergence of the Live* + * variables, in anticipation of the live ranges being larger than the + * unrooted ranges (since those terminate at any safe point). + */ + ReversePostOrderTraversal RPOT(&F); + while (!Converged) { + bool AnyChanged = false; + for (BasicBlock *BB : RPOT) { + // This could all be done more efficiently, by only updating what + // changed - Let's get it working first though. + BBState &BBS = S.BBStates[BB]; + BitVector NewLiveOut = BBS.PhiOuts; + for (BasicBlock *Succ : successors(BB)) { + NewLiveOut |= S.BBStates[Succ].LiveIn; + } + if (NewLiveOut != BBS.LiveOut) { + AnyChanged = true; + BBS.LiveOut = NewLiveOut; + MaybeResize(BBS, BBS.LiveOut.size() - 1); + } + BitVector NewLiveIn = BBS.LiveOut; + BitVector FlippedDefs = BBS.Defs; + FlippedDefs.flip(); + NewLiveIn &= FlippedDefs; + NewLiveIn |= BBS.UpExposedUses; + NewLiveIn |= BBS.UpExposedUsesUnrooted; + if (NewLiveIn != BBS.LiveIn) { + AnyChanged = true; + BBS.LiveIn = NewLiveIn; + } + BitVector NewUnrootedIn; + for (BasicBlock *Pred : predecessors(BB)) + NewUnrootedIn |= S.BBStates[Pred].UnrootedOut; + if (NewUnrootedIn != BBS.UnrootedIn) { + AnyChanged = true; + BBS.UnrootedIn = NewUnrootedIn; + if (!BBS.HasSafepoint) { + BBS.UnrootedOut |= BBS.UnrootedIn; + } + } + } + Converged = !AnyChanged; + } + ComputeLiveSets(F, S); +} + +void LateLowerGCFrame::ComputeLiveSets(Function &F, State &S) { + // Iterate over all safe points. Add to live sets all those variables that + // are now live across their parent block. + for (auto it : S.SafepointNumbering) { + int idx = it.second; + Instruction *Safepoint = it.first; + BasicBlock *BB = Safepoint->getParent(); + BBState &BBS = S.BBStates[BB]; + BitVector LiveAcross = BBS.LiveIn; + LiveAcross &= BBS.LiveOut; + S.LiveSets[idx] |= LiveAcross; + for (int Live : S.LiveIfLiveOut[idx]) { + if (HasBitSet(BBS.LiveOut, Live)) + S.LiveSets[idx][Live] = 1; + } + } + // Compute the inference graph + for (int i = 0; i <= S.MaxPtrNumber; ++i) { + std::vector Neighbors; + for (auto it : S.SafepointNumbering) { + const BitVector &LS = S.LiveSets[it.second]; + if ((unsigned)i >= LS.size() || !LS[i]) + continue; + for (int Idx = LS.find_first(); Idx >= 0; Idx = LS.find_next(Idx)) { + // We explicitly let i be a neighbor of itself, to distinguish + // between being the only value live at a safepoint, vs not + // being live at any safepoint. + Neighbors.push_back(Idx); + } + } + S.Neighbors.push_back(Neighbors); + } + // Compute Rooting Locations + for (auto &BB : F) { + BBState &BBS = S.BBStates[&BB]; + if (BBS.HasSafepoint) { + BitVector UnrootedIn = BBS.UnrootedIn; + // Only those values that have uses after a safepoint or are live + // across need to be rooted. N.B. We're explicitly not or-ing in + // UpExposedUsesUnrooted + BitVector Mask = BBS.UpExposedUses; + Mask |= BBS.LiveOut; + Mask &= BBS.LiveIn; + UnrootedIn &= Mask; + for (int Idx = UnrootedIn.find_first(); Idx >= 0; Idx = UnrootedIn.find_next(Idx)) { + S.Rootings[BBS.TopmostSafepoint].insert(Idx); + } + // Backfill any interior rootings + BitVector Interior = BBS.UnrootedOut; + Interior.flip(); + Interior &= BBS.LiveOut; + Interior &= BBS.Defs; + for (int Idx = Interior.find_first(); Idx >= 0; Idx = Interior.find_next(Idx)) { + // Needs to be rooted at the first safepoint after the def + Instruction *Def = cast(S.ReversePtrNumbering[Idx]); + auto it = ++BasicBlock::iterator(Def); + while (true) { + auto sit = S.SafepointNumbering.find(&*it++); + if (sit != S.SafepointNumbering.end()) { + S.Rootings[sit->second].insert(Idx); + break; + } + assert(it != Def->getParent()->end()); + } + } + } + } +} + +/* For chordal interference graphs, this class gives the verticies in a (reverse + * - depending on definition) perfect elimination ordering, in such a way that + * greedy coloring gives an optimal coloring. Since our roots are in SSA form, + * the interference should be chordal. + */ +struct PEOIterator { + struct Element { + unsigned weight; + unsigned pos; + }; + std::vector Elements; + std::vector> Levels; + const std::vector> &Neighbors; + PEOIterator(const std::vector> &Neighbors) : Neighbors(Neighbors) { + // Initialize State + std::vector FirstLevel; + for (unsigned i = 0; i < Neighbors.size(); ++i) { + FirstLevel.push_back(i); + Element E{0, i}; + Elements.push_back(E); + } + Levels.push_back(FirstLevel); + } + int next() { + // Find the element in the highest bucket + int NextElement = -1; + while (NextElement == -1 && !Levels.empty()) { + std::vector &LastLevel = Levels.back(); + while (NextElement == -1 && !LastLevel.empty()) { + NextElement = LastLevel.back(); + LastLevel.pop_back(); + } + if (LastLevel.empty()) + Levels.pop_back(); + } + if (NextElement == -1) + return NextElement; + // Make sure not to try to re-use this later. + Elements[NextElement].weight = (unsigned)-1; + // Raise neighbors + for (int Neighbor : Neighbors[NextElement]) { + if (Neighbor == NextElement) + continue; + Element &NElement = Elements[Neighbor]; + // Already processed. Don't re-enqueue + if (NElement.weight == (unsigned)-1) + continue; + // Kill old queue position + Levels[NElement.weight][NElement.pos] = -1; + // Raise the neighbor to the next level. + NElement.weight += 1; + if (NElement.weight >= Levels.size()) + Levels.push_back(std::vector{}); + Levels[NElement.weight].push_back(Neighbor); + NElement.pos = Levels[NElement.weight].size()-1; + } + // As an enhancement, we might want to periodically compactify the whole + // data structure. This could be done here. + return NextElement; + } +}; + +std::vector LateLowerGCFrame::ColorRoots(const State &S) { + std::vector Colors; + Colors.resize(S.MaxPtrNumber + 1, -1); + PEOIterator Ordering(S.Neighbors); + /* Greedy coloring */ + int ActiveElement = 1; + int MaxAssignedColor = -1; + BitVector UsedColors; + while ((ActiveElement = Ordering.next()) != -1) { + assert(Colors[ActiveElement] == -1); + UsedColors.resize(MaxAssignedColor + 2, false); + UsedColors.reset(); + if (S.Neighbors[ActiveElement].empty()) { + // No need to color a value not live at any safe point + continue; + } + for (int Neighbor : S.Neighbors[ActiveElement]) { + if (Colors[Neighbor] == -1) + continue; + UsedColors[Colors[Neighbor]] = 1; + } + Colors[ActiveElement] = UsedColors.flip().find_first(); + if (Colors[ActiveElement] > MaxAssignedColor) + MaxAssignedColor = Colors[ActiveElement]; + } + return Colors; +} + +Instruction *LateLowerGCFrame::get_pgcstack(Instruction *ptlsStates) +{ + Constant *offset = ConstantInt::getSigned(T_int32, offsetof(jl_tls_states_t, pgcstack) / sizeof(void*)); + return GetElementPtrInst::Create(nullptr, + ptlsStates, + ArrayRef(offset), + "jl_pgcstack"); +} + +void LateLowerGCFrame::PushGCFrame(AllocaInst *gcframe, unsigned NRoots, Instruction *InsertAfter) { + IRBuilder<> builder(gcframe->getContext()); + builder.SetInsertPoint(&*(++BasicBlock::iterator(InsertAfter))); + Instruction *inst = + builder.CreateStore(ConstantInt::get(T_size, NRoots << 1), + builder.CreateBitCast(builder.CreateConstGEP1_32(gcframe, 0), T_size->getPointerTo())); + inst->setMetadata(llvm::LLVMContext::MD_tbaa, tbaa_gcframe); + Value *pgcstack = builder.Insert(get_pgcstack(ptlsStates)); + inst = builder.CreateStore(builder.CreateLoad(pgcstack), + builder.CreatePointerCast(builder.CreateConstGEP1_32(gcframe, 1), PointerType::get(T_ppjlvalue,0))); + inst->setMetadata(llvm::LLVMContext::MD_tbaa, tbaa_gcframe); + builder.CreateStore(gcframe, builder.CreateBitCast(pgcstack, + PointerType::get(PointerType::get(T_prjlvalue, 0), 0))); +} + +void LateLowerGCFrame::PopGCFrame(AllocaInst *gcframe, Instruction *InsertBefore) { + IRBuilder<> builder(InsertBefore->getContext()); + builder.SetInsertPoint(InsertBefore); // set insert *before* Ret + Instruction *gcpop = + (Instruction*)builder.CreateConstGEP1_32(gcframe, 1); + Instruction *inst = builder.CreateLoad(gcpop); + inst->setMetadata(llvm::LLVMContext::MD_tbaa, tbaa_gcframe); + inst = builder.CreateStore(inst, + builder.CreateBitCast( + builder.Insert(get_pgcstack(ptlsStates)), + PointerType::get(T_prjlvalue, 0))); + inst->setMetadata(llvm::LLVMContext::MD_tbaa, tbaa_gcframe); +} + +bool LateLowerGCFrame::CleanupIR(Function &F) { + bool ChangesMade = false; + // We create one alloca for all the jlcall frames that haven't been processed + // yet. LLVM would merge them anyway later, so might as well save it a bit + // of work + size_t maxframeargs = 0; + PointerType *T_pprjlvalue = T_prjlvalue->getPointerTo(); + Instruction *StartOff = &*(F.getEntryBlock().begin()); + AllocaInst *Frame = new AllocaInst(T_prjlvalue, ConstantInt::get(T_int32, maxframeargs), +#if JL_LLVM_VERSION >= 50000 + 0, +#endif + "", StartOff); + for (BasicBlock &BB : F) { + for (auto it = BB.begin(); it != BB.end();) { + auto *CI = dyn_cast(&*it); + if (!CI) { + ++it; + continue; + } + CallingConv::ID CC = CI->getCallingConv(); + if ((gc_kill_func != nullptr && CI->getCalledFunction() == gc_kill_func) || + (gc_flush_func != nullptr && CI->getCalledFunction() == gc_flush_func)) { + /* No replacement */ + } else if (pointer_from_objref_func != nullptr && + CI->getCalledFunction() == pointer_from_objref_func) { + auto *ASCI = new AddrSpaceCastInst(CI->getOperand(0), + CI->getType(), "", CI); + ASCI->takeName(CI); + CI->replaceAllUsesWith(ASCI); + } else if (CC == JLCALL_CC || + CC == JLCALL_F_CC) { + size_t nframeargs = CI->getNumArgOperands() - (CC == JLCALL_F_CC); + SmallVector ReplacementArgs; + auto it = CI->arg_begin(); + if (CC == JLCALL_F_CC) + ReplacementArgs.push_back(*(it++)); + maxframeargs = std::max(maxframeargs, nframeargs); + int slot = 0; + IRBuilder<> Builder (CI); + for (; it != CI->arg_end(); ++it) { + Builder.CreateStore(*it, Builder.CreateGEP(T_prjlvalue, Frame, + {ConstantInt::get(T_int32, slot++)})); + } + ReplacementArgs.push_back(nframeargs == 0 ? + (llvm::Value*)ConstantPointerNull::get(T_pprjlvalue) : + (llvm::Value*)Frame); + ReplacementArgs.push_back(ConstantInt::get(T_int32, nframeargs)); + FunctionType *FTy = CC == JLCALL_F_CC ? + FunctionType::get(T_prjlvalue, {T_prjlvalue, + T_pprjlvalue, T_int32}, false) : + FunctionType::get(T_prjlvalue, + {T_pprjlvalue, T_int32}, false); + Value *newFptr = Builder.CreateBitCast(CI->getCalledValue(), + FTy->getPointerTo()); + CallInst *NewCall = CallInst::Create(newFptr, ReplacementArgs, "", CI); + NewCall->setTailCallKind(CI->getTailCallKind()); + NewCall->setAttributes(CI->getAttributes()); + NewCall->setDebugLoc(CI->getDebugLoc()); + CI->replaceAllUsesWith(NewCall); + } else if (CI->getNumArgOperands() == CI->getNumOperands()) { + /* No operand bundle to lower */ + ++it; + continue; + } else { + CallInst *NewCall = CallInst::Create(CI, None, CI); + NewCall->takeName(CI); + CI->replaceAllUsesWith(NewCall); + } + it = CI->eraseFromParent(); + ChangesMade = true; + } + } + if (maxframeargs == 0) + Frame->eraseFromParent(); + else + Frame->setOperand(0, ConstantInt::get(T_int32, maxframeargs)); + return ChangesMade; +} + +static Value *GetPtrForNumber(State &S, unsigned Num, Instruction *InsertionPoint) +{ + Value *Val = S.ReversePtrNumbering[Num]; + if (isSpecialPtrVec(Val->getType())) { + const std::vector &AllNums = S.AllVectorNumbering[Val]; + unsigned Idx = 0; + for (; Idx < AllNums.size(); ++Idx) { + if ((unsigned)AllNums[Idx] == Num) + break; + } + Val = ExtractElementInst::Create(Val, ConstantInt::get( + Type::getInt32Ty(Val->getContext()), Idx), "", InsertionPoint); + } + return Val; +} + +void LateLowerGCFrame::PlaceRootsAndUpdateCalls(Function &F, std::vector &Colors, State &S, std::map>) { + auto &DT = getAnalysis().getDomTree(); + int MaxColor = -1; + for (auto C : Colors) + if (C > MaxColor) + MaxColor = C; + // Insert instructions for the actual gc frame + if (MaxColor != -1 || S.Allocas.size() != 0) { + unsigned NRoots = MaxColor + 1 + S.Allocas.size(); + // Create GC Frame + AllocaInst *gcframe = new AllocaInst(T_prjlvalue, +#if JL_LLVM_VERSION >= 50000 + 0, +#endif + ConstantInt::get(T_int32, NRoots+2), "gcframe"); + gcframe->insertBefore(&*F.getEntryBlock().begin()); + // Zero out gcframe + BitCastInst *tempSlot_i8 = new BitCastInst(gcframe, Type::getInt8PtrTy(F.getContext()), ""); + tempSlot_i8->insertAfter(gcframe); + Type *argsT[2] = {tempSlot_i8->getType(), T_int32}; + Function *memset = Intrinsic::getDeclaration(F.getParent(), Intrinsic::memset, makeArrayRef(argsT)); + Value *args[5] = { + tempSlot_i8, // dest + ConstantInt::get(Type::getInt8Ty(F.getContext()), 0), // val + ConstantInt::get(T_int32, sizeof(jl_value_t*)*(NRoots+2)), // len + ConstantInt::get(T_int32, 0), // align + ConstantInt::get(Type::getInt1Ty(F.getContext()), 0)}; // volatile + CallInst *zeroing = CallInst::Create(memset, makeArrayRef(args)); + zeroing->setMetadata(llvm::LLVMContext::MD_tbaa, tbaa_gcframe); + zeroing->insertAfter(tempSlot_i8); + // Push GC Frame + PushGCFrame(gcframe, NRoots, ptlsStates); + // Replace Allocas + unsigned AllocaSlot = 2; + for (AllocaInst *AI : S.Allocas) { + Value *args[1] = { + ConstantInt::get(T_int32, AllocaSlot++) + }; + GetElementPtrInst *gep = GetElementPtrInst::Create(T_prjlvalue, gcframe, makeArrayRef(args)); + gep->insertAfter(gcframe); + gep->takeName(AI); + // Check for lifetime intrinsics on this alloca, we can't keep them + // because we're changing the semantics + std::vector ToDelete; + RecursivelyVisit([&](Use &VU) { + IntrinsicInst *II = cast(VU.getUser()); + if ((II->getIntrinsicID() != Intrinsic::lifetime_start && + II->getIntrinsicID() != Intrinsic::lifetime_end)) + return; + ToDelete.push_back(II); + }, AI); + for (CallInst *II : ToDelete) + II->eraseFromParent(); + AI->replaceAllUsesWith(gep); + AI->eraseFromParent(); + } + unsigned MinColorRoot = AllocaSlot; + // Insert GC frame stores + for (auto it : S.SafepointNumbering) { + const std::set &Rooting = S.Rootings[it.second]; + for (int R : Rooting) { + if (Colors[R] != -1) { + Instruction *InsertionPoint = it.first; + Value *Val = S.ReversePtrNumbering[R]; + /* + * Generally we like doing the rooting late, because it lets + * us avoid doing so on paths that have no safe points. + * However, it is possible for the first safepoint to not + * be dominated by the definition. In that case, just start + * rooting it right after the definition. + */ + if (isa(Val) && !DT.dominates(cast(Val), InsertionPoint)) { + InsertionPoint = &*(++(cast(Val)->getIterator())); + // No need to root this anywhere else any more + Colors[R] = -1; + } + Val = GetPtrForNumber(S, R, InsertionPoint); + Value *args[1] = { + ConstantInt::get(T_int32, Colors[R]+MinColorRoot) + }; + GetElementPtrInst *gep = GetElementPtrInst::Create(T_prjlvalue, gcframe, makeArrayRef(args)); + gep->insertBefore(InsertionPoint); + Val = MaybeExtractUnion(Val, InsertionPoint); + // Pointee types don't have semantics, so the optimizer is + // free to rewrite them if convenient. We need to change + // it back here for the store. + if (Val->getType() != T_prjlvalue) + Val = new BitCastInst(Val, T_prjlvalue, "", InsertionPoint); + new StoreInst(Val, gep, InsertionPoint); + } + } + } + // Insert GCFrame pops + for(Function::iterator I = F.begin(), E = F.end(); I != E; ++I) { + if (isa(I->getTerminator())) { + PopGCFrame(gcframe, I->getTerminator()); + } + } + } +} + +bool LateLowerGCFrame::doInitialization(Module &M) { + ptls_getter = M.getFunction("jl_get_ptls_states"); + gc_kill_func = M.getFunction("julia.gc_root_kill"); + gc_flush_func = M.getFunction("julia.gcroot_flush"); + pointer_from_objref_func = M.getFunction("julia.pointer_from_objref"); + return false; +} + +bool LateLowerGCFrame::runOnFunction(Function &F) { + DEBUG(dbgs() << "GC ROOT PLACEMENT: Processing function " << F.getName() << "\n"); + if (ptls_getter) { + auto functype = ptls_getter->getFunctionType(); + T_ppjlvalue = + cast(functype->getReturnType())->getElementType(); + auto T_pjlvalue = cast(T_ppjlvalue)->getElementType(); + T_prjlvalue = PointerType::get(cast(T_pjlvalue)->getElementType(), AddressSpace::Tracked); + } else { + return CleanupIR(F); + } + T_size = F.getParent()->getDataLayout().getIntPtrType(F.getContext()); + T_int32 = Type::getInt32Ty(F.getContext()); + ptlsStates = nullptr; + for (auto I = F.getEntryBlock().begin(), E = F.getEntryBlock().end(); + ptls_getter && I != E; ++I) { + if (CallInst *callInst = dyn_cast(&*I)) { + if (callInst->getCalledValue() == ptls_getter) { + ptlsStates = callInst; + break; + } + } + } + if (!ptlsStates) + return CleanupIR(F); + State S = LocalScan(F); + ComputeLiveness(F, S); + std::vector Colors = ColorRoots(S); + std::map> CallFrames; // = OptimizeCallFrames(S, Ordering); + PlaceRootsAndUpdateCalls(F, Colors, S, CallFrames); + CleanupIR(F); + return true; +} + +char LateLowerGCFrame::ID = 0; +static RegisterPass X("LateLowerGCFrame", "Late Lower GCFrame Pass", false, false); + +Pass *createLateLowerGCFramePass() { + return new LateLowerGCFrame(); +} diff --git a/src/llvm-propagate-addrspaces.cpp b/src/llvm-propagate-addrspaces.cpp new file mode 100644 index 0000000000000..6e6d4a5f3032d --- /dev/null +++ b/src/llvm-propagate-addrspaces.cpp @@ -0,0 +1,205 @@ +// This file is a part of Julia. License is MIT: https://julialang.org/license + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "llvm-version.h" +#include "codegen_shared.h" +#include "julia.h" + +#define DEBUG_TYPE "propagate_julia_addrspaces" + +using namespace llvm; + +/* This pass performs propagation of addrspace information that is legal from + the frontend definition, but illegal by general IR semantics. In particular, + this includes: + - Changing the address space of a load/store if the base pointer is + in an untracked address space + - Commuting GEPs and addrspace casts + + This is most useful for removing superflous casts that can inhibit LLVM + optimizations. +*/ + +struct PropagateJuliaAddrspaces : public FunctionPass, public InstVisitor { + static char ID; + DenseMap LiftingMap; + SmallPtrSet Visited; + std::vector ToDelete; + std::vector> ToInsert; + PropagateJuliaAddrspaces() : FunctionPass(ID) {}; + +public: + bool runOnFunction(Function &F) override; + Value *LiftPointer(Value *V, Type *LocTy = nullptr, Instruction *InsertPt=nullptr); + void visitStoreInst(StoreInst &SI); + void visitLoadInst(LoadInst &LI); + void visitMemSetInst(MemSetInst &MI); + void visitMemTransferInst(MemTransferInst &MTI); +}; + +bool PropagateJuliaAddrspaces::runOnFunction(Function &F) { + visit(F); + for (auto it : ToInsert) + it.first->insertBefore(it.second); + for (Instruction *I : ToDelete) + I->eraseFromParent(); + ToInsert.clear(); + ToDelete.clear(); + LiftingMap.clear(); + Visited.clear(); + return true; +} + +static unsigned getValueAddrSpace(Value *V) { + return cast(V->getType())->getAddressSpace(); +} + +static bool isSpecialAS(unsigned AS) { + return AddressSpace::FirstSpecial <= AS && AS <= AddressSpace::LastSpecial; +} + +Value *PropagateJuliaAddrspaces::LiftPointer(Value *V, Type *LocTy, Instruction *InsertPt) { + SmallVector Stack; + Value *CurrentV = V; + // Follow pointer casts back, see if we're based on a pointer in + // an untracked address space, in which case we're allowed to drop + // intermediate addrspace casts. + while (true) { + Stack.push_back(CurrentV); + if (isa(CurrentV)) + CurrentV = cast(CurrentV)->getOperand(0); + else if (isa(CurrentV)) { + CurrentV = cast(CurrentV)->getOperand(0); + if (!isSpecialAS(getValueAddrSpace(CurrentV))) + break; + } + else if (isa(CurrentV)) { + if (LiftingMap.count(CurrentV)) { + CurrentV = LiftingMap[CurrentV]; + break; + } else if (Visited.count(CurrentV)) { + return nullptr; + } + Visited.insert(CurrentV); + CurrentV = cast(CurrentV)->getOperand(0); + } else + break; + } + if (!CurrentV->getType()->isPointerTy()) + return nullptr; + if (isSpecialAS(getValueAddrSpace(CurrentV))) + return nullptr; + // Ok, we're allowed to change the address space of this load, go back and + // reconstitute any GEPs in the new address space. + for (Value *V : llvm::reverse(Stack)) { + GetElementPtrInst *GEP = dyn_cast(V); + if (!GEP) + continue; + if (LiftingMap.count(GEP)) { + CurrentV = LiftingMap[GEP]; + continue; + } + GetElementPtrInst *NewGEP = cast(GEP->clone()); + ToInsert.push_back(std::make_pair(NewGEP, GEP)); + Type *GEPTy = GEP->getSourceElementType(); + Type *NewRetTy = cast(GEP->getType())->getElementType()->getPointerTo(getValueAddrSpace(CurrentV)); + NewGEP->mutateType(NewRetTy); + if (cast(CurrentV->getType())->getElementType() != GEPTy) { + auto *BCI = new BitCastInst(CurrentV, GEPTy->getPointerTo()); + ToInsert.push_back(std::make_pair(BCI, NewGEP)); + CurrentV = BCI; + } + NewGEP->setOperand(GetElementPtrInst::getPointerOperandIndex(), CurrentV); + LiftingMap[GEP] = NewGEP; + CurrentV = NewGEP; + } + if (LocTy && cast(CurrentV->getType())->getElementType() != LocTy) { + auto *BCI = new BitCastInst(CurrentV, LocTy->getPointerTo()); + ToInsert.push_back(std::make_pair(BCI, InsertPt)); + CurrentV = BCI; + } + return CurrentV; +} + +void PropagateJuliaAddrspaces::visitLoadInst(LoadInst &LI) { + unsigned AS = LI.getPointerAddressSpace(); + if (!isSpecialAS(AS)) + return; + Value *Replacement = LiftPointer(LI.getPointerOperand(), LI.getType(), &LI); + if (!Replacement) + return; + LI.setOperand(LoadInst::getPointerOperandIndex(), Replacement); +} + +void PropagateJuliaAddrspaces::visitStoreInst(StoreInst &SI) { + unsigned AS = SI.getPointerAddressSpace(); + if (!isSpecialAS(AS)) + return; + Value *Replacement = LiftPointer(SI.getPointerOperand(), SI.getValueOperand()->getType(), &SI); + if (!Replacement) + return; + SI.setOperand(StoreInst::getPointerOperandIndex(), Replacement); +} + +void PropagateJuliaAddrspaces::visitMemSetInst(MemSetInst &MI) { + unsigned AS = MI.getDestAddressSpace(); + if (!isSpecialAS(AS)) + return; + Value *Replacement = LiftPointer(MI.getRawDest()); + if (!Replacement) + return; + Value *TheFn = Intrinsic::getDeclaration(MI.getModule(), Intrinsic::memset, + {Replacement->getType(), MI.getOperand(1)->getType()}); + MI.setCalledFunction(TheFn); + MI.setArgOperand(0, Replacement); +} + +void PropagateJuliaAddrspaces::visitMemTransferInst(MemTransferInst &MTI) { + unsigned DestAS = MTI.getDestAddressSpace(); + unsigned SrcAS = MTI.getSourceAddressSpace(); + if (!isSpecialAS(DestAS) && !isSpecialAS(SrcAS)) + return; + Value *Dest = MTI.getRawDest(); + if (isSpecialAS(DestAS)) { + Value *Replacement = LiftPointer(Dest, cast(Dest->getType())->getElementType(), &MTI); + if (Replacement) + Dest = Replacement; + } + Value *Src = MTI.getRawSource(); + if (isSpecialAS(SrcAS)) { + Value *Replacement = LiftPointer(Src, cast(Src->getType())->getElementType(), &MTI); + if (Replacement) + Src = Replacement; + } + if (Dest == MTI.getRawDest() && Src == MTI.getRawSource()) + return; + Value *TheFn = Intrinsic::getDeclaration(MTI.getModule(), MTI.getIntrinsicID(), + {Dest->getType(), Src->getType(), + MTI.getOperand(2)->getType()}); + MTI.setCalledFunction(TheFn); + MTI.setArgOperand(0, Dest); + MTI.setArgOperand(1, Src); +} + +char PropagateJuliaAddrspaces::ID = 0; +static RegisterPass X("PropagateJuliaAddrspaces", "Propagate (non-)rootedness information", false, false); + +Pass *createPropagateJuliaAddrspaces() { + return new PropagateJuliaAddrspaces(); +} diff --git a/test/llvmpasses/gcroots.ll b/test/llvmpasses/gcroots.ll new file mode 100644 index 0000000000000..db2c22543936b --- /dev/null +++ b/test/llvmpasses/gcroots.ll @@ -0,0 +1,176 @@ +; RUN: opt -load libjulia.so -LateLowerGCFrame -S %s | FileCheck %s + +%jl_value_t = type opaque + +declare void @boxed_simple(%jl_value_t addrspace(10)*, %jl_value_t addrspace(10)*) +declare %jl_value_t addrspace(10)* @jl_box_int64(i64) +declare %jl_value_t*** @jl_get_ptls_states() +declare %jl_value_t addrspace(10)* @jl_apply_generic(%jl_value_t addrspace(10)*, %jl_value_t addrspace(10)**, i32) + +define void @simple(i64 %a, i64 %b) { +top: +; CHECK-LABEL: @simple + %ptls = call %jl_value_t*** @jl_get_ptls_states() +; CHECK: %gcframe = alloca %jl_value_t addrspace(10)*, i32 4 +; CHECK: call %jl_value_t addrspace(10)* @jl_box_int64 + %aboxed = call %jl_value_t addrspace(10)* @jl_box_int64(i64 signext %a) +; CHECK: [[GEP0:%.*]] = getelementptr %jl_value_t addrspace(10)*, %jl_value_t addrspace(10)** %gcframe, i32 [[GEPSLOT0:[0-9]+]] +; CHECK-NEXT: store %jl_value_t addrspace(10)* %aboxed, %jl_value_t addrspace(10)** [[GEP0]] + %bboxed = call %jl_value_t addrspace(10)* @jl_box_int64(i64 signext %b) +; CHECK-NEXT: %bboxed = +; Make sure the same gc slot isn't re-used +; CHECK-NOT: getelementptr %jl_value_t addrspace(10)*, %jl_value_t addrspace(10)** %gcframe, i32 [[GEPSLOT0]] +; CHECK: [[GEP1:%.*]] = getelementptr %jl_value_t addrspace(10)*, %jl_value_t addrspace(10)** %gcframe, i32 [[GEPSLOT1:[0-9]+]] +; CHECK-NEXT: store %jl_value_t addrspace(10)* %bboxed, %jl_value_t addrspace(10)** [[GEP1]] +; CHECK-NEXT: call void @boxed_simple + call void @boxed_simple(%jl_value_t addrspace(10)* %aboxed, + %jl_value_t addrspace(10)* %bboxed) + ret void +} + +define void @leftover_alloca(%jl_value_t addrspace(10)*%a) { +; If this pass encounters an alloca, it'll just sink it into the gcframe, +; relying on mem2reg to catch simple cases such as this earlier +; CHECK-LABEL: @leftover_alloca +; CHECK: %var = getelementptr %jl_value_t addrspace(10)*, %jl_value_t addrspace(10)** %gcframe + %ptls = call %jl_value_t*** @jl_get_ptls_states() + %var = alloca %jl_value_t addrspace(10)* + store %jl_value_t addrspace(10)* %a, %jl_value_t addrspace(10)** %var + %b = load %jl_value_t addrspace(10)*, %jl_value_t addrspace(10)** %var + call void @boxed_simple(%jl_value_t addrspace(10)* %a, + %jl_value_t addrspace(10)* %b) + ret void +} + +declare {%jl_value_t addrspace(10)*, i8} @union_ret() +declare void @union_arg({%jl_value_t addrspace(10)*, i8}) + +define void @simple_union() { +; CHECK-LABEL: @simple_union + %ptls = call %jl_value_t*** @jl_get_ptls_states() +; CHECK: %a = call { %jl_value_t addrspace(10)*, i8 } @union_ret() + %a = call { %jl_value_t addrspace(10)*, i8 } @union_ret() +; CHECK: [[GEP0:%.*]] = getelementptr %jl_value_t addrspace(10)*, %jl_value_t addrspace(10)** %gcframe, i32 [[GEPSLOT0:[0-9]+]] +; CHECK-NEXT: [[EXTRACT:%.*]] = extractvalue { %jl_value_t addrspace(10)*, i8 } %a, 0 +; CHECK-NEXT: store %jl_value_t addrspace(10)* [[EXTRACT]], %jl_value_t addrspace(10)** [[GEP0]] + call void @union_arg({%jl_value_t addrspace(10)*, i8} %a) + ret void +} + +declare void @one_arg_boxed(%jl_value_t addrspace(10)*) + +define void @select_simple(i64 %a, i64 %b) { +; CHECK-LABEL: @select_simple + %ptls = call %jl_value_t*** @jl_get_ptls_states() + %aboxed = call %jl_value_t addrspace(10)* @jl_box_int64(i64 signext %a) + %bboxed = call %jl_value_t addrspace(10)* @jl_box_int64(i64 signext %b) + %cmp = icmp eq i64 %a, %b + %selectb = select i1 %cmp, %jl_value_t addrspace(10)* %aboxed, %jl_value_t addrspace(10)* %bboxed + call void @one_arg_boxed(%jl_value_t addrspace(10)* %selectb) + ret void +} + +define void @phi_simple(i64 %a, i64 %b) { +top: +; CHECK-LABEL: @phi_simple +; CHECK: %gcframe = alloca %jl_value_t addrspace(10)*, i32 3 + %ptls = call %jl_value_t*** @jl_get_ptls_states() + %cmp = icmp eq i64 %a, %b + br i1 %cmp, label %alabel, label %blabel +alabel: + %aboxed = call %jl_value_t addrspace(10)* @jl_box_int64(i64 signext %a) + br label %common +blabel: + %bboxed = call %jl_value_t addrspace(10)* @jl_box_int64(i64 signext %b) + br label %common +common: + %phi = phi %jl_value_t addrspace(10)* [ %aboxed, %alabel ], [ %bboxed, %blabel ] +; CHECK: [[GEP:%.*]] = getelementptr %jl_value_t addrspace(10)*, %jl_value_t addrspace(10)** %gcframe, i32 2 +; CHECK: store %jl_value_t addrspace(10)* %phi, %jl_value_t addrspace(10)** [[GEP]] + call void @one_arg_boxed(%jl_value_t addrspace(10)* %phi) + ret void +} + +declare void @one_arg_decayed(i64 addrspace(12)*) + +define void @select_lift(i64 %a, i64 %b) { +; CHECK-LABEL: @select_lift +; CHECK: %gcframe = alloca %jl_value_t addrspace(10)*, i32 3 + %ptls = call %jl_value_t*** @jl_get_ptls_states() + %aboxed = call %jl_value_t addrspace(10)* @jl_box_int64(i64 signext %a) + %adecayed = addrspacecast %jl_value_t addrspace(10)* %aboxed to i64 addrspace(12)* + %bboxed = call %jl_value_t addrspace(10)* @jl_box_int64(i64 signext %b) + %bdecayed = addrspacecast %jl_value_t addrspace(10)* %bboxed to i64 addrspace(12)* + %cmp = icmp eq i64 %a, %b +; CHECK: %gclift = select i1 %cmp, %jl_value_t addrspace(10)* %aboxed, %jl_value_t addrspace(10)* %bboxed + %selectb = select i1 %cmp, i64 addrspace(12)* %adecayed, i64 addrspace(12)* %bdecayed + call void @one_arg_decayed(i64 addrspace(12)* %selectb) + ret void +} + +define void @phi_lift(i64 %a, i64 %b) { +top: +; CHECK-LABEL: @phi_lift + %ptls = call %jl_value_t*** @jl_get_ptls_states() + %cmp = icmp eq i64 %a, %b + br i1 %cmp, label %alabel, label %blabel +alabel: + %aboxed = call %jl_value_t addrspace(10)* @jl_box_int64(i64 signext %a) + %adecayed = addrspacecast %jl_value_t addrspace(10)* %aboxed to i64 addrspace(12)* + br label %common +blabel: + %bboxed = call %jl_value_t addrspace(10)* @jl_box_int64(i64 signext %b) + %bdecayed = addrspacecast %jl_value_t addrspace(10)* %bboxed to i64 addrspace(12)* + br label %common +common: + %phi = phi i64 addrspace(12)* [ %adecayed, %alabel ], [ %bdecayed, %blabel ] + call void @one_arg_decayed(i64 addrspace(12)* %phi) + ret void +} + +define void @live_if_live_out(i64 %a, i64 %b) { +; CHECK-LABEL: @live_if_live_out +top: +; CHECK: %gcframe = alloca %jl_value_t addrspace(10)*, i32 4 + %ptls = call %jl_value_t*** @jl_get_ptls_states() +; The failure case is failing to realize that `aboxed` is live across the first +; one_arg_boxed safepoint and putting bboxed in the same root slot + %aboxed = call %jl_value_t addrspace(10)* @jl_box_int64(i64 signext %a) + %bboxed = call %jl_value_t addrspace(10)* @jl_box_int64(i64 signext %b) + call void @one_arg_boxed(%jl_value_t addrspace(10)* %bboxed) + br label %succ +succ: + call void @one_arg_boxed(%jl_value_t addrspace(10)* %aboxed) + ret void +} + +; A ret is a use - make sure the value is kept alive for any intervening +; safepoint +define %jl_value_t addrspace(10)* @ret_use(i64 %a, i64 %b) { +; CHECK-LABEL: @ret_use +; CHECK: %gcframe = alloca %jl_value_t addrspace(10)*, i32 3 + %ptls = call %jl_value_t*** @jl_get_ptls_states() + %aboxed = call %jl_value_t addrspace(10)* @jl_box_int64(i64 signext %a) +; CHECK: store %jl_value_t addrspace(10)* %aboxed + %bboxed = call %jl_value_t addrspace(10)* @jl_box_int64(i64 signext %b) + ret %jl_value_t addrspace(10)* %aboxed +} + +define i8 @nosafepoint(%jl_value_t addrspace(10)* dereferenceable(16)) { +; CHECK-LABEL: @nosafepoint +; CHECK-NOT: %gcframe +top: + %1 = call %jl_value_t*** @jl_get_ptls_states() + %2 = bitcast %jl_value_t*** %1 to %jl_value_t addrspace(10)** + %3 = getelementptr %jl_value_t addrspace(10)*, %jl_value_t addrspace(10)** %2, i64 3 + %4 = bitcast %jl_value_t addrspace(10)** %3 to i64** + %5 = load i64*, i64** %4 + %6 = bitcast %jl_value_t addrspace(10)* %0 to i8 addrspace(10)* + %7 = addrspacecast i8 addrspace(10)* %6 to i8 addrspace(11)* + %8 = getelementptr i8, i8 addrspace(11)* %7, i64 0 + %9 = load i8, i8 addrspace(11)* %8 + %10 = trunc i8 %9 to i1 + %11 = zext i1 %10 to i8 + %12 = xor i8 %11, 1 + ret i8 %12 +} diff --git a/test/llvmpasses/propagate-addrspace.ll b/test/llvmpasses/propagate-addrspace.ll new file mode 100644 index 0000000000000..80ee269616d8c --- /dev/null +++ b/test/llvmpasses/propagate-addrspace.ll @@ -0,0 +1,21 @@ +; RUN: opt -load libjulia.so -PropagateJuliaAddrspaces -dce -S %s | FileCheck %s + +define i64 @simple() { +; CHECK-LABEL: @simple +; CHECK-NOT: addrspace(11) + %stack = alloca i64 + %casted = addrspacecast i64 *%stack to i64 addrspace(11)* + %loaded = load i64, i64 addrspace(11)* %casted + ret i64 %loaded +} + +define i64 @twogeps() { +; CHECK-LABEL: @twogeps +; CHECK-NOT: addrspace(11) + %stack = alloca i64 + %casted = addrspacecast i64 *%stack to i64 addrspace(11)* + %gep1 = getelementptr i64, i64 addrspace(11)* %casted, i64 1 + %gep2 = getelementptr i64, i64 addrspace(11)* %gep1, i64 1 + %loaded = load i64, i64 addrspace(11)* %gep2 + ret i64 %loaded +}