From 8ebb35efb7fa5a11ead47a35e060c1fff5e60472 Mon Sep 17 00:00:00 2001 From: Keno Fischer Date: Sat, 23 Nov 2024 08:56:34 +0000 Subject: [PATCH] Extend `invoke` to accept CodeInstance This is an alternative mechanism to #56650 that largely achieves the same result, but by hooking into `invoke` rather than a generated function. They are orthogonal mechanisms, and its possible we want both. However, in #56650, both Jameson and Valentin were skeptical of the generated function signature bottleneck. This PR is sort of a hybrid of mechanism in #52964 and what I proposed in https://github.com/JuliaLang/julia/pull/56650#issuecomment-2493800877. In particular, this PR: 1. Extends `invoke` to support a CodeInstance in place of its usual `types` argument. 2. Adds a new `typeinf` optimized generic. The semantics of this optimized generic allow the compiler to instead call a companion `typeinf_edge` function, allowing a mid-inference interpreter switch (like #52964), without being forced through a concrete signature bottleneck. However, if calling `typeinf_edge` does not work (e.g. because the compiler version is mismatched), this still has well defined semantics, you just don't get inference support. The additional benefit of the `typeinf` optimized generic is that it lets custom cache owners tell the runtime how to "cure" code instances that have lost their native code. Currently the runtime only knows how to do that for `owner == nothing` CodeInstances (by re-running inference). This extension is not implemented, but the idea is that the runtime would be permitted to call the `typeinf` optimized generic on the dead CodeInstance's `owner` and `def` fields to obtain a cured CodeInstance (or a user-actionable error from the plugin). This PR includes an implementation of `with_new_compiler` from #56650. That said, this PR does not yet include the compiler optimization that implements the semantics of the optimized generic, which will be in a follow up PR. --- .../extras/CompilerDevTools/Manifest.toml | 15 ++++++ Compiler/extras/CompilerDevTools/Project.toml | 5 ++ .../CompilerDevTools/src/CompilerDevTools.jl | 45 +++++++++++++++++ Compiler/src/abstractinterpretation.jl | 48 +++++++++++++++---- Compiler/src/abstractlattice.jl | 2 +- Compiler/src/bootstrap.jl | 10 +++- Compiler/src/stmtinfo.jl | 11 +++++ Compiler/src/utilities.jl | 4 +- NEWS.md | 2 + base/docs/basedocs.jl | 17 +++++++ base/optimized_generics.jl | 27 +++++++++++ src/builtins.c | 22 +++++++++ src/interpreter.c | 24 +++++++++- test/core.jl | 14 ++++++ 14 files changed, 231 insertions(+), 15 deletions(-) create mode 100644 Compiler/extras/CompilerDevTools/Manifest.toml create mode 100644 Compiler/extras/CompilerDevTools/Project.toml create mode 100644 Compiler/extras/CompilerDevTools/src/CompilerDevTools.jl diff --git a/Compiler/extras/CompilerDevTools/Manifest.toml b/Compiler/extras/CompilerDevTools/Manifest.toml new file mode 100644 index 00000000000000..bcc78f1ded34a0 --- /dev/null +++ b/Compiler/extras/CompilerDevTools/Manifest.toml @@ -0,0 +1,15 @@ +# This file is machine-generated - editing it directly is not advised + +julia_version = "1.12.0-DEV" +manifest_format = "2.0" +project_hash = "84f495a1bf065c95f732a48af36dd0cd2cefb9d5" + +[[deps.Compiler]] +path = "../.." +uuid = "807dbc54-b67e-4c79-8afb-eafe4df6f2e1" +version = "0.0.2" + +[[deps.CompilerDevTools]] +path = "." +uuid = "92b2d91f-d2bd-4c05-9214-4609ac33433f" +version = "0.0.0" diff --git a/Compiler/extras/CompilerDevTools/Project.toml b/Compiler/extras/CompilerDevTools/Project.toml new file mode 100644 index 00000000000000..a2749a9a56a84b --- /dev/null +++ b/Compiler/extras/CompilerDevTools/Project.toml @@ -0,0 +1,5 @@ +name = "CompilerDevTools" +uuid = "92b2d91f-d2bd-4c05-9214-4609ac33433f" + +[deps] +Compiler = "807dbc54-b67e-4c79-8afb-eafe4df6f2e1" diff --git a/Compiler/extras/CompilerDevTools/src/CompilerDevTools.jl b/Compiler/extras/CompilerDevTools/src/CompilerDevTools.jl new file mode 100644 index 00000000000000..e019e893d24c22 --- /dev/null +++ b/Compiler/extras/CompilerDevTools/src/CompilerDevTools.jl @@ -0,0 +1,45 @@ +__precompile__(false) +module CompilerDevTools + +using Compiler +using Core.IR + +include(joinpath(dirname(pathof(Compiler)), "..", "test", "newinterp.jl")) + +@newinterp SplitCacheInterp +struct SplitCacheOwner; end + +import Core.OptimizedGenerics.CompilerPlugins: typeinf, typeinf_edge + +Compiler.cache_owner(::SplitCacheInterp) = SplitCacheOwner() +let typeinf_world_age = Base.tls_world_age() + @eval @noinline typeinf(::SplitCacheOwner, mi::MethodInstance, source_mode::UInt8) = + Base.invoke_in_world($typeinf_world_age, Compiler.typeinf_ext, SplitCacheInterp(; world=Base.tls_world_age()), mi, source_mode) + + @eval @noinline function typeinf_edge(::SplitCacheOwner, mi::MethodInstance, parent_frame::Compiler.InferenceState, world::UInt, source_mode::UInt8) + # TODO: This isn't quite right, we're just sketching things for now + interp = SplitCacheInterp(; world) + Compiler.typeinf_edge(interp, mi.def, mi.specTypes, Core.svec(), parent_frame, false, false) + end +end + +# TODO: This needs special compiler support to properly case split for multiple +# method matches, etc. This annotation is not sound, but just for demo purpoes. +@noinline function mi_for_tt(tt, world=Base.tls_world_age()) + interp = SplitCacheInterp(; world) + match, _ = Compiler.findsup(tt, Compiler.method_table(interp)) + Base.specialize_method(match) +end + +function with_new_compiler(f, args...) + tt = Base.signature_type(f, typeof(args)) + world = Base.tls_world_age() + new_compiler_ci = Core.OptimizedGenerics.CompilerPlugins.typeinf( + SplitCacheOwner(), mi_for_tt(tt), Compiler.SOURCE_MODE_ABI + ) + invoke(f, new_compiler_ci, args...) +end + +export with_new_compiler + +end diff --git a/Compiler/src/abstractinterpretation.jl b/Compiler/src/abstractinterpretation.jl index 5946adf80ad526..fe30d0320ea46d 100644 --- a/Compiler/src/abstractinterpretation.jl +++ b/Compiler/src/abstractinterpretation.jl @@ -2218,16 +2218,46 @@ function abstract_invoke(interp::AbstractInterpreter, arginfo::ArgInfo, si::Stmt ft = widenconst(ft′) ft === Bottom && return Future(CallMeta(Bottom, Any, EFFECTS_THROWS, NoCallInfo())) types = argtype_by_index(argtypes, 3) - if types isa Const && types.val isa Method - method = types.val::Method - types = method # argument value - lookupsig = method.sig # edge kind - argtype = argtypes_to_type(pushfirst!(argtype_tail(argtypes, 4), ft)) - nargtype = typeintersect(lookupsig, argtype) - nargtype === Bottom && return Future(CallMeta(Bottom, TypeError, EFFECTS_THROWS, NoCallInfo())) - nargtype isa DataType || return Future(CallMeta(Any, Any, Effects(), NoCallInfo())) # other cases are not implemented below + if types isa Const && types.val isa Union{Method, CodeInstance} + method_or_ci = types.val + if isa(method_or_ci, CodeInstance) + our_world = sv.world.this + argtype = argtypes_to_type(pushfirst!(argtype_tail(argtypes, 4), ft)) + sig = method_or_ci.def.specTypes + exct = method_or_ci.exctype + if !hasintersect(argtype, sig) + return Future(CallMeta(Bottom, TypeError, EFFECTS_THROWS, NoCallInfo())) + elseif !(argtype <: sig) + exct = Union{exct, TypeError} + end + callee_valid_range = WorldRange(method_or_ci.min_world, method_or_ci.max_world) + if !(our_world in callee_valid_range) + if our_world < first(callee_valid_range) + update_valid_age!(sv, WorldRange(first(sv.world.valid_worlds), first(callee_valid_range)-1)) + else + update_valid_age!(sv, WorldRange(last(callee_valid_range)+1, last(sv.world.valid_worlds))) + end + return Future(CallMeta(Bottom, ErrorException, EFFECTS_THROWS, NoCallInfo())) + end + # TODO: When we add curing, we may want to assume this is nothrow + if (method_or_ci.owner === Nothing && method_ir_ci.def.def isa Method) + exct = Union{exct, ErrorException} + end + update_valid_age!(sv, callee_valid_range) + return Future(CallMeta(method_or_ci.rettype, exct, Effects(decode_effects(method_or_ci.ipo_purity_bits), nothrow=(exct===Bottom)), + InvokeCICallInfo(method_or_ci))) + else + method = method_or_ci + types = method # argument value + lookupsig = method.sig # edge kind + argtype = argtypes_to_type(pushfirst!(argtype_tail(argtypes, 4), ft)) + nargtype = typeintersect(lookupsig, argtype) + nargtype === Bottom && return Future(CallMeta(Bottom, TypeError, EFFECTS_THROWS, NoCallInfo())) + nargtype isa DataType || return Future(CallMeta(Any, Any, Effects(), NoCallInfo())) # other cases are not implemented below + # Fall through to generic invoke handling + end else - widenconst(types) >: Method && return Future(CallMeta(Any, Any, Effects(), NoCallInfo())) + widenconst(types) >: Union{Method, CodeInstance} && return Future(CallMeta(Any, Any, Effects(), NoCallInfo())) (types, isexact, isconcrete, istype) = instanceof_tfunc(argtype_by_index(argtypes, 3), false) isexact || return Future(CallMeta(Any, Any, Effects(), NoCallInfo())) unwrapped = unwrap_unionall(types) diff --git a/Compiler/src/abstractlattice.jl b/Compiler/src/abstractlattice.jl index c1f30507391701..7a9cff89181758 100644 --- a/Compiler/src/abstractlattice.jl +++ b/Compiler/src/abstractlattice.jl @@ -229,7 +229,7 @@ end if isa(t, Const) # don't consider mutable values useful constants val = t.val - return isa(val, Symbol) || isa(val, Type) || isa(val, Method) || !ismutable(val) + return isa(val, Symbol) || isa(val, Type) || isa(val, Method) || isa(val, CodeInstance) || !ismutable(val) end isa(t, PartialTypeVar) && return false # this isn't forwardable return is_const_prop_profitable_arg(widenlattice(𝕃), t) diff --git a/Compiler/src/bootstrap.jl b/Compiler/src/bootstrap.jl index 7ee439cc7ac67c..475c53e3171527 100644 --- a/Compiler/src/bootstrap.jl +++ b/Compiler/src/bootstrap.jl @@ -5,7 +5,15 @@ # especially try to make sure any recursive and leaf functions have concrete signatures, # since we won't be able to specialize & infer them at runtime -activate_codegen!() = ccall(:jl_set_typeinf_func, Cvoid, (Any,), typeinf_ext_toplevel) +function activate_codegen!() + ccall(:jl_set_typeinf_func, Cvoid, (Any,), typeinf_ext_toplevel) + Core.eval(Compiler, quote + let typeinf_world_age = Base.tls_world_age() + @eval Core.OptimizedGenerics.CompilerPlugins.typeinf(::Nothing, mi::MethodInstance, source_mode::UInt8) = + Base.invoke_in_world($(Expr(:$, :typeinf_world_age)), typeinf_ext_toplevel, mi, Base.tls_world_age(), source_mode) + end + end) +end function bootstrap!() let time() = ccall(:jl_clock_now, Float64, ()) diff --git a/Compiler/src/stmtinfo.jl b/Compiler/src/stmtinfo.jl index 830bfa02d2d99b..9f0f1f38d4c8a7 100644 --- a/Compiler/src/stmtinfo.jl +++ b/Compiler/src/stmtinfo.jl @@ -268,6 +268,17 @@ end add_edges_impl(edges::Vector{Any}, info::UnionSplitApplyCallInfo) = for split in info.infos; add_edges!(edges, split); end +""" + info::InvokeCICallInfo + +Represents a resolved call to `Core.invoke` targeting a `Core.CodeInstance` +""" +struct InvokeCICallInfo <: CallInfo + edge::CodeInstance +end +add_edges_impl(edges::Vector{Any}, info::InvokeCICallInfo) = + add_one_edge!(edges, info.edge) + """ info::InvokeCallInfo diff --git a/Compiler/src/utilities.jl b/Compiler/src/utilities.jl index 29f3dfa4afd4a2..da20f9aafbfb27 100644 --- a/Compiler/src/utilities.jl +++ b/Compiler/src/utilities.jl @@ -54,8 +54,8 @@ function count_const_size(@nospecialize(x), count_self::Bool = true) # No definite size (isa(x, GenericMemory) || isa(x, String) || isa(x, SimpleVector)) && return MAX_INLINE_CONST_SIZE + 1 - if isa(x, Module) || isa(x, Method) - # We allow modules and methods, because we already assume they are externally + if isa(x, Module) || isa(x, Method) || isa(x, CodeInstance) + # We allow modules, methods and CodeInstance, because we already assume they are externally # rooted, so we count their contents as 0 size. return sizeof(Ptr{Cvoid}) end diff --git a/NEWS.md b/NEWS.md index 61bad831e261c5..3ab50ca2ee4ff4 100644 --- a/NEWS.md +++ b/NEWS.md @@ -120,6 +120,8 @@ New library features * New `ltruncate`, `rtruncate` and `ctruncate` functions for truncating strings to text width, accounting for char widths ([#55351]) * `isless` (and thus `cmp`, sorting, etc.) is now supported for zero-dimensional `AbstractArray`s ([#55772]) * `invoke` now supports passing a Method instead of a type signature making this interface somewhat more flexible for certain uncommon use cases ([#56692]). +* `invoke` now supports passing a CodeInstance instead of a type, which can enable +certain compiler plugin workflows ([#56660]). Standard library changes ------------------------ diff --git a/base/docs/basedocs.jl b/base/docs/basedocs.jl index 5119ceaf2164a6..141950f5e92ffd 100644 --- a/base/docs/basedocs.jl +++ b/base/docs/basedocs.jl @@ -2031,6 +2031,7 @@ applicable """ invoke(f, argtypes::Type, args...; kwargs...) invoke(f, argtypes::Method, args...; kwargs...) + invoke(f, argtypes::CodeInstance, args...; kwargs...) Invoke a method for the given generic function `f` matching the specified types `argtypes` on the specified arguments `args` and passing the keyword arguments `kwargs`. The arguments `args` must @@ -2056,6 +2057,22 @@ Note in particular that the specified `Method` may be entirely unreachable from If the method is part of the ordinary method table, this call behaves similar to `invoke(f, method.sig, args...)`. +!!! compat "Julia 1.12" + Passing a `Method` requires Julia 1.12. + +# Passing a `CodeInstance` instead of a signature +The `argtypes` argument may be a `CodeInstance`, bypassing both method lookup and specialization. +The semantics of this invocation are similar to a function pointer call of the `CodeInstance`'s +`invoke` pointer. It is an error to invoke a `CodeInstance` with arguments that do not match its +parent MethodInstance or from a world age not included in the `min_world`/`max_world` range. +It is undefined behavior to invoke a CodeInstance whose behavior does not match the constraints +specified in its fields. For some code instances with `owner !== nothing` (i.e. those generated +by external compilers), it may be an error to invoke them after passing through precompilation. +This is an advanced interface intended for use with external compiler plugins. + +!!! compat "Julia 1.12" + Passing a `CodeInstance` requires Julia 1.12. + # Examples ```jldoctest julia> f(x::Real) = x^2; diff --git a/base/optimized_generics.jl b/base/optimized_generics.jl index 86b54a294564dd..c0b953777ca94c 100644 --- a/base/optimized_generics.jl +++ b/base/optimized_generics.jl @@ -54,4 +54,31 @@ module KeyValue function get end end +# Compiler-recognized intrinsics for compiler plugins +""" + module CompilerPlugins + +Implements a pair of functions `typeinf`/`typeinf_edge`. When the optimizer sees +a call to `typeinf`, it has license to instead call `typeinf_edge`, supplying the +current inference stack in `parent_frame` (but otherwise supplying the arguments +to `typeinf`). typeinf_edge will return the `CodeInstance` that `typeinf` would +have returned at runtime. The optimizer may perform a non-IPO replacement of +the call to `typeinf` by the result of `typeinf_edge`. In addition, the IPO-safe +fields of the `CodeInstance` may be propagated in IPO mode. +""" +module CompilerPlugins + """ + typeinf(owner, mi, source_mode)::CodeInstance + + Return a `CodeInstance` for the given `mi` whose valid results include at + the least current tls world and satisfies the requirements of `source_mode`. + """ + function typeinf end + + """ + typeinf_edge(owner, mi, parent_frame, world, abi_mode)::CodeInstance + """ + function typeinf_edge end +end + end diff --git a/src/builtins.c b/src/builtins.c index c6b0bf130550ba..3f555da9d2a836 100644 --- a/src/builtins.c +++ b/src/builtins.c @@ -1587,6 +1587,28 @@ JL_CALLABLE(jl_f_invoke) if (!jl_tuple1_isa(args[0], &args[2], nargs - 1, (jl_datatype_t*)m->sig)) jl_type_error("invoke: argument type error", argtypes, arg_tuple(args[0], &args[2], nargs - 1)); return jl_gf_invoke_by_method(m, args[0], &args[2], nargs - 1); + } else if (jl_is_code_instance(argtypes)) { + jl_code_instance_t *codeinst = (jl_code_instance_t*)args[1]; + jl_callptr_t invoke = jl_atomic_load_acquire(&codeinst->invoke); + if (jl_tuple1_isa(args[0], &args[2], nargs - 2, (jl_datatype_t*)codeinst->def->specTypes)) { + jl_type_error("invoke: argument type error", codeinst->def->specTypes, arg_tuple(args[0], &args[2], nargs - 2)); + } + if (jl_atomic_load_relaxed(&codeinst->min_world) > jl_current_task->world_age || + jl_current_task->world_age > jl_atomic_load_relaxed(&codeinst->max_world)) { + jl_error("invoke: CodeInstance not valid for this world"); + } + if (!invoke) { + jl_compile_codeinst(codeinst); + invoke = jl_atomic_load_acquire(&codeinst->invoke); + } + if (invoke) { + return invoke(args[0], &args[2], nargs - 2, codeinst); + } else { + if (codeinst->owner != jl_nothing || !jl_is_method(codeinst->def->def.value)) { + jl_error("Failed to invoke or compile external codeinst"); + } + return jl_gf_invoke_by_method(codeinst->def->def.method, args[0], &args[2], nargs - 1); + } } if (!jl_is_tuple_type(jl_unwrap_unionall(argtypes))) jl_type_error("invoke", (jl_value_t*)jl_anytuple_type_type, argtypes); diff --git a/src/interpreter.c b/src/interpreter.c index 49a3afed14f0c0..2dc1c9ed5a0c47 100644 --- a/src/interpreter.c +++ b/src/interpreter.c @@ -137,8 +137,28 @@ static jl_value_t *do_invoke(jl_value_t **args, size_t nargs, interpreter_state argv[i-1] = eval_value(args[i], s); jl_value_t *c = args[0]; assert(jl_is_code_instance(c) || jl_is_method_instance(c)); - jl_method_instance_t *meth = jl_is_method_instance(c) ? (jl_method_instance_t*)c : ((jl_code_instance_t*)c)->def; - jl_value_t *result = jl_invoke(argv[0], nargs == 2 ? NULL : &argv[1], nargs - 2, meth); + jl_value_t *result = NULL; + if (jl_is_code_instance(c)) { + jl_code_instance_t *codeinst = (jl_code_instance_t*)c; + assert(jl_atomic_load_relaxed(&codeinst->min_world) <= jl_current_task->world_age && + jl_current_task->world_age <= jl_atomic_load_relaxed(&codeinst->max_world)); + jl_callptr_t invoke = jl_atomic_load_acquire(&codeinst->invoke); + if (!invoke) { + jl_compile_codeinst(codeinst); + invoke = jl_atomic_load_acquire(&codeinst->invoke); + } + if (invoke) { + result = invoke(argv[0], nargs == 2 ? NULL : &argv[1], nargs - 2, codeinst); + + } else { + if (codeinst->owner != jl_nothing) { + jl_error("Failed to invoke or compile external codeinst"); + } + result = jl_invoke(argv[0], nargs == 2 ? NULL : &argv[1], nargs - 2, codeinst->def); + } + } else { + result = jl_invoke(argv[0], nargs == 2 ? NULL : &argv[1], nargs - 2, (jl_method_instance_t*)c); + } JL_GC_POP(); return result; } diff --git a/test/core.jl b/test/core.jl index 39d02d5d567c9b..63952e8728e1e9 100644 --- a/test/core.jl +++ b/test/core.jl @@ -8353,9 +8353,23 @@ end @test eval(Expr(:toplevel, :(@define_call(f_macro_defined1)))) == 1 @test @define_call(f_macro_defined2) == 1 +# `invoke` of `Method` let m = which(+, (Int, Int)) @eval f56692(i) = invoke(+, $m, i, 4) global g56692() = f56692(5) == 9 ? "true" : false end @test @inferred(f56692(3)) == 7 @test @inferred(g56692()) == "true" + +# `invoke` of `CodeInstance` +f_invalidate_me() = return 1 +f_invoke_me() = return f_invalidate_me() +@test f_invoke_me() == 1 +const f_invoke_me_ci = Base.specialize_method(Base._which(Tuple{typeof(f_invoke_me)})).cache +f_call_me() = invoke(f_invoke_me, f_invoke_me_ci) +@test invoke(f_invoke_me, f_invoke_me_ci) == 1 +@test f_call_me() == 1 +@test_throws TypeError invoke(f_invoke_me, f_invoke_me_ci, 1) +f_invalidate_me() = 2 +@test_throws ErrorException invoke(f_invoke_me, f_invoke_me_ci) +@test_throws ErrorException f_call_me()