diff --git a/.gitignore b/.gitignore
index 524a12d066c4d..80bdd67619454 100644
--- a/.gitignore
+++ b/.gitignore
@@ -34,6 +34,7 @@
 .DS_Store
 .idea/*
 .vscode/*
+.zed/*
 *.heapsnapshot
 .cache
 # Buildkite: Ignore the entire .buildkite directory
diff --git a/Makefile b/Makefile
index 735d342a79eb5..4fd8b878c5d1f 100644
--- a/Makefile
+++ b/Makefile
@@ -82,7 +82,7 @@ julia-deps: | $(DIRS) $(build_datarootdir)/julia/base $(build_datarootdir)/julia
 julia-stdlib: | $(DIRS) julia-deps
 	@$(MAKE) $(QUIET_MAKE) -C $(BUILDROOT)/stdlib
 
-julia-base: julia-deps $(build_sysconfdir)/julia/startup.jl $(build_man1dir)/julia.1 $(build_datarootdir)/julia/julia-config.jl
+julia-base: julia-deps $(build_sysconfdir)/julia/startup.jl $(build_man1dir)/julia.1 $(build_datarootdir)/julia/julia-config.jl $(build_datarootdir)/julia/juliac.jl $(build_datarootdir)/julia/juliac-buildscript.jl
 	@$(MAKE) $(QUIET_MAKE) -C $(BUILDROOT)/base
 
 julia-libccalltest: julia-deps
@@ -181,7 +181,7 @@ $(build_sysconfdir)/julia/startup.jl: $(JULIAHOME)/etc/startup.jl | $(build_sysc
 	@echo Creating usr/etc/julia/startup.jl
 	@cp $< $@
 
-$(build_datarootdir)/julia/julia-config.jl: $(JULIAHOME)/contrib/julia-config.jl | $(build_datarootdir)/julia
+$(build_datarootdir)/julia/%: $(JULIAHOME)/contrib/% | $(build_datarootdir)/julia
 	$(INSTALL_M) $< $(dir $@)
 
 $(build_depsbindir)/stringreplace: $(JULIAHOME)/contrib/stringreplace.c | $(build_depsbindir)
@@ -410,7 +410,7 @@ endif
 	$(INSTALL_F) $(JULIAHOME)/contrib/julia.appdata.xml $(DESTDIR)$(datarootdir)/metainfo/
 	# Install terminal info database
 ifneq ($(WITH_TERMINFO),0)
-	cp -R -L $(build_datarootdir)/terminfo $(DESTDIR)$(datarootdir)
+	cp -R -L $(build_datarootdir)/julia/terminfo $(DESTDIR)$(datarootdir)/julia/
 endif
 
 	# Update RPATH entries and JL_SYSTEM_IMAGE_PATH if $(private_libdir_rel) != $(build_private_libdir_rel)
diff --git a/NEWS.md b/NEWS.md
index 9ecdd87f0c2bb..fb1fcf381cc7f 100644
--- a/NEWS.md
+++ b/NEWS.md
@@ -4,6 +4,8 @@ Julia v1.12 Release Notes
 New language features
 ---------------------
 
+- New option `--trim` for building "trimmed" binaries, where code not provably reachable from entry points
+  is removed. Entry points can be marked using `Base.Experimental.entrypoint` ([#55047]).
 - A new keyword argument `usings::Bool` has been added to `names`. By using this, we can now
   find all the names available in module `A` by `names(A; all=true, imported=true, usings=true)`. ([#54609])
 - the `@atomic(...)` macro family supports now the reference assignment syntax, e.g.
@@ -35,6 +37,10 @@ Language changes
    expression within a given `:toplevel` expression to make use of macros
    defined earlier in the same `:toplevel` expression. ([#53515])
 
+ - Trivial infinite loops (like `while true; end`) are no longer undefined
+   behavior. Infinite loops that actually do things (e.g. have side effects
+   or sleep) were never and are still not undefined behavior. ([#52999])
+
 Compiler/Runtime improvements
 -----------------------------
 
@@ -57,6 +63,7 @@ variables. ([#53742]).
 * New `--trace-compile-timing` option to report how long each method reported by `--trace-compile` took
   to compile, in ms. ([#54662])
 * `--trace-compile` now prints recompiled methods in yellow or with a trailing comment if color is not supported ([#55763])
+* New `--trace-dispatch` option to report methods that are dynamically dispatched ([#55848]).
 
 Multi-threading changes
 -----------------------
diff --git a/base/Base.jl b/base/Base.jl
index 10a8dd1532f92..23633f0b5138b 100644
--- a/base/Base.jl
+++ b/base/Base.jl
@@ -306,7 +306,6 @@ end
 include("hashing.jl")
 include("rounding.jl")
 include("div.jl")
-include("rawbigints.jl")
 include("float.jl")
 include("twiceprecision.jl")
 include("complex.jl")
diff --git a/base/abstractarray.jl b/base/abstractarray.jl
index 754ab20660ab8..e877a87c2cdd1 100644
--- a/base/abstractarray.jl
+++ b/base/abstractarray.jl
@@ -1101,11 +1101,8 @@ function copyto_unaliased!(deststyle::IndexStyle, dest::AbstractArray, srcstyle:
             end
         else
             # Dual-iterator implementation
-            ret = iterate(iterdest)
-            @inbounds for a in src
-                idx, state = ret::NTuple{2,Any}
-                dest[idx] = a
-                ret = iterate(iterdest, state)
+            for (Idest, Isrc) in zip(iterdest, itersrc)
+                @inbounds dest[Idest] = src[Isrc]
             end
         end
     end
diff --git a/base/abstractdict.jl b/base/abstractdict.jl
index 62a5b3ee9e1b0..85a726b4cdbf4 100644
--- a/base/abstractdict.jl
+++ b/base/abstractdict.jl
@@ -392,6 +392,10 @@ Dict{String, Float64} with 3 entries:
 
 julia> ans == mergewith(+)(a, b)
 true
+
+julia> mergewith(-, Dict(), Dict(:a=>1))  # Combining function only used if key is present in both
+Dict{Any, Any} with 1 entry:
+  :a => 1
 ```
 """
 mergewith(combine, d::AbstractDict, others::AbstractDict...) =
diff --git a/base/compiler/abstractinterpretation.jl b/base/compiler/abstractinterpretation.jl
index 68b8394b72c3d..c8a25be422637 100644
--- a/base/compiler/abstractinterpretation.jl
+++ b/base/compiler/abstractinterpretation.jl
@@ -47,223 +47,210 @@ function abstract_call_gf_by_type(interp::AbstractInterpreter, @nospecialize(f),
     matches = find_method_matches(interp, argtypes, atype; max_methods)
     if isa(matches, FailedMethodMatch)
         add_remark!(interp, sv, matches.reason)
-        return CallMeta(Any, Any, Effects(), NoCallInfo())
+        return Future(CallMeta(Any, Any, Effects(), NoCallInfo()))
     end
 
     (; valid_worlds, applicable, info) = matches
     update_valid_age!(sv, valid_worlds)
-    napplicable = length(applicable)
+
+    # final result
+    gfresult = Future{CallMeta}()
+    # intermediate work for computing gfresult
     rettype = exctype = Bottom
     edges = MethodInstance[]
     conditionals = nothing # keeps refinement information of call argument types when the return type is boolean
-    seen = 0               # number of signatures actually inferred
+    seenall = true
     const_results = nothing # or const_results::Vector{Union{Nothing,ConstResult}} if any const results are available
-    multiple_matches = napplicable > 1
     fargs = arginfo.fargs
     all_effects = EFFECTS_TOTAL
     slotrefinements = nothing # keeps refinement information on slot types obtained from call signature
 
-    for i in 1:napplicable
-        match = applicable[i]::MethodMatch
-        method = match.method
-        sig = match.spec_types
-        if bail_out_toplevel_call(interp, InferenceLoopState(sig, rettype, all_effects), sv)
-            # only infer concrete call sites in top-level expressions
-            add_remark!(interp, sv, "Refusing to infer non-concrete call site in top-level expression")
-            break
-        end
-        this_rt = Bottom
-        this_exct = Bottom
-        splitunions = false
-        # TODO: this used to trigger a bug in inference recursion detection, and is unmaintained now
-        # sigtuple = unwrap_unionall(sig)::DataType
-        # splitunions = 1 < unionsplitcost(sigtuple.parameters) * napplicable <= InferenceParams(interp).max_union_splitting
-        if splitunions
-            splitsigs = switchtupleunion(sig)
-            for sig_n in splitsigs
-                result = abstract_call_method(interp, method, sig_n, svec(), multiple_matches, si, sv)
-                (; rt, exct, edge, effects, volatile_inf_result) = result
+    # split the for loop off into a function, so that we can pause and restart it at will
+    i::Int = 1
+    f = Core.Box(f)
+    atype = Core.Box(atype)
+    function infercalls(interp, sv)
+        napplicable = length(applicable)
+        multiple_matches = napplicable > 1
+        while i <= napplicable
+            match = applicable[i]::MethodMatch
+            method = match.method
+            sig = match.spec_types
+            if bail_out_toplevel_call(interp, InferenceLoopState(sig, rettype, all_effects), sv)
+                # only infer concrete call sites in top-level expressions
+                add_remark!(interp, sv, "Refusing to infer non-concrete call site in top-level expression")
+                seenall = false
+                break
+            end
+            # TODO: this is unmaintained now as it didn't seem to improve things, though it does avoid hard-coding the union split at the higher level,
+            # it also can hurt infer-ability of some constrained parameter types (e.g. quacks like a duck)
+            # sigtuple = unwrap_unionall(sig)::DataType
+            # splitunions = 1 < unionsplitcost(sigtuple.parameters) * napplicable <= InferenceParams(interp).max_union_splitting
+            #if splitunions
+            #    splitsigs = switchtupleunion(sig)
+            #    for sig_n in splitsigs
+            #        result = abstract_call_method(interp, method, sig_n, svec(), multiple_matches, si, sv)::Future
+            #        handle1(...)
+            #    end
+            #end
+            mresult = abstract_call_method(interp, method, sig, match.sparams, multiple_matches, si, sv)::Future
+            function handle1(interp, sv)
+                local (; rt, exct, edge, effects, volatile_inf_result) = mresult[]
+                this_conditional = ignorelimited(rt)
+                this_rt = widenwrappedconditional(rt)
+                this_exct = exct
+                # try constant propagation with argtypes for this match
+                # this is in preparation for inlining, or improving the return result
                 this_argtypes = isa(matches, MethodMatches) ? argtypes : matches.applicable_argtypes[i]
                 this_arginfo = ArgInfo(fargs, this_argtypes)
                 const_call_result = abstract_call_method_with_const_args(interp,
-                    result, f, this_arginfo, si, match, sv)
+                    mresult[], f.contents, this_arginfo, si, match, sv)
                 const_result = volatile_inf_result
                 if const_call_result !== nothing
-                    if const_call_result.rt ⊑ₚ rt
-                        rt = const_call_result.rt
+                    this_const_conditional = ignorelimited(const_call_result.rt)
+                    this_const_rt = widenwrappedconditional(const_call_result.rt)
+                    if this_const_rt ⊑ₚ this_rt
+                        # As long as the const-prop result we have is not *worse* than
+                        # what we found out on types, we'd like to use it. Even if the
+                        # end result is exactly equivalent, it is likely that the IR
+                        # we produced while constproping is better than that with
+                        # generic types.
+                        # Return type of const-prop' inference can be wider than that of non const-prop' inference
+                        # e.g. in cases when there are cycles but cached result is still accurate
+                        this_conditional = this_const_conditional
+                        this_rt = this_const_rt
                         (; effects, const_result, edge) = const_call_result
                     elseif is_better_effects(const_call_result.effects, effects)
                         (; effects, const_result, edge) = const_call_result
                     else
                         add_remark!(interp, sv, "[constprop] Discarded because the result was wider than inference")
                     end
-                    if const_call_result.exct ⋤ exct
-                        (; exct, const_result, edge) = const_call_result
+                    # Treat the exception type separately. Currently, constprop often cannot determine the exception type
+                    # because consistent-cy does not apply to exceptions.
+                    if const_call_result.exct ⋤ this_exct
+                        this_exct = const_call_result.exct
+                        (; const_result, edge) = const_call_result
                     else
                         add_remark!(interp, sv, "[constprop] Discarded exception type because result was wider than inference")
                     end
                 end
+
                 all_effects = merge_effects(all_effects, effects)
                 if const_result !== nothing
                     if const_results === nothing
-                        const_results = fill!(Vector{Union{Nothing,ConstResult}}(undef, #=TODO=#napplicable), nothing)
+                        const_results = fill!(Vector{Union{Nothing,ConstResult}}(undef, napplicable), nothing)
                     end
                     const_results[i] = const_result
                 end
                 edge === nothing || push!(edges, edge)
-                this_rt = this_rt ⊔ₚ rt
-                this_exct = this_exct ⊔ₚ exct
-                if bail_out_call(interp, this_rt, sv)
-                    break
+                @assert !(this_conditional isa Conditional || this_rt isa MustAlias) "invalid lattice element returned from inter-procedural context"
+                if can_propagate_conditional(this_conditional, argtypes)
+                    # The only case where we need to keep this in rt is where
+                    # we can directly propagate the conditional to a slot argument
+                    # that is not one of our arguments, otherwise we keep all the
+                    # relevant information in `conditionals` below.
+                    this_rt = this_conditional
                 end
-            end
-            this_conditional = ignorelimited(this_rt)
-            this_rt = widenwrappedconditional(this_rt)
-        else
-            result = abstract_call_method(interp, method, sig, match.sparams, multiple_matches, si, sv)
-            (; rt, exct, edge, effects, volatile_inf_result) = result
-            this_conditional = ignorelimited(rt)
-            this_rt = widenwrappedconditional(rt)
-            this_exct = exct
-            # try constant propagation with argtypes for this match
-            # this is in preparation for inlining, or improving the return result
-            this_argtypes = isa(matches, MethodMatches) ? argtypes : matches.applicable_argtypes[i]
-            this_arginfo = ArgInfo(fargs, this_argtypes)
-            const_call_result = abstract_call_method_with_const_args(interp,
-                result, f, this_arginfo, si, match, sv)
-            const_result = volatile_inf_result
-            if const_call_result !== nothing
-                this_const_conditional = ignorelimited(const_call_result.rt)
-                this_const_rt = widenwrappedconditional(const_call_result.rt)
-                if this_const_rt ⊑ₚ this_rt
-                    # As long as the const-prop result we have is not *worse* than
-                    # what we found out on types, we'd like to use it. Even if the
-                    # end result is exactly equivalent, it is likely that the IR
-                    # we produced while constproping is better than that with
-                    # generic types.
-                    # Return type of const-prop' inference can be wider than that of non const-prop' inference
-                    # e.g. in cases when there are cycles but cached result is still accurate
-                    this_conditional = this_const_conditional
-                    this_rt = this_const_rt
-                    (; effects, const_result, edge) = const_call_result
-                elseif is_better_effects(const_call_result.effects, effects)
-                    (; effects, const_result, edge) = const_call_result
-                else
-                    add_remark!(interp, sv, "[constprop] Discarded because the result was wider than inference")
+
+                rettype = rettype ⊔ₚ this_rt
+                exctype = exctype ⊔ₚ this_exct
+                if has_conditional(𝕃ₚ, sv) && this_conditional !== Bottom && is_lattice_bool(𝕃ₚ, rettype) && fargs !== nothing
+                    if conditionals === nothing
+                        conditionals = Any[Bottom for _ in 1:length(argtypes)],
+                                       Any[Bottom for _ in 1:length(argtypes)]
+                    end
+                    for i = 1:length(argtypes)
+                        cnd = conditional_argtype(𝕃ᵢ, this_conditional, sig, argtypes, i)
+                        conditionals[1][i] = conditionals[1][i] ⊔ᵢ cnd.thentype
+                        conditionals[2][i] = conditionals[2][i] ⊔ᵢ cnd.elsetype
+                    end
                 end
-                # Treat the exception type separately. Currently, constprop often cannot determine the exception type
-                # because consistent-cy does not apply to exceptions.
-                if const_call_result.exct ⋤ this_exct
-                    this_exct = const_call_result.exct
-                    (; const_result, edge) = const_call_result
-                else
-                    add_remark!(interp, sv, "[constprop] Discarded exception type because result was wider than inference")
+                if i < napplicable && bail_out_call(interp, InferenceLoopState(sig, rettype, all_effects), sv)
+                    add_remark!(interp, sv, "Call inference reached maximally imprecise information. Bailing on.")
+                    seenall = false
+                    i = napplicable # break in outer function
                 end
+                i += 1
+                return true
             end
-            all_effects = merge_effects(all_effects, effects)
-            if const_result !== nothing
-                if const_results === nothing
-                    const_results = fill!(Vector{Union{Nothing,ConstResult}}(undef, napplicable), nothing)
-                end
-                const_results[i] = const_result
+            if isready(mresult) && handle1(interp, sv)
+                continue
+            else
+                push!(sv.tasks, handle1)
+                return false
             end
-            edge === nothing || push!(edges, edge)
-        end
-        @assert !(this_conditional isa Conditional || this_rt isa MustAlias) "invalid lattice element returned from inter-procedural context"
-        seen += 1
+        end # while
 
-        if can_propagate_conditional(this_conditional, argtypes)
-            # The only case where we need to keep this in rt is where
-            # we can directly propagate the conditional to a slot argument
-            # that is not one of our arguments, otherwise we keep all the
-            # relevant information in `conditionals` below.
-            this_rt = this_conditional
+        if const_results !== nothing
+            @assert napplicable == nmatches(info) == length(const_results)
+            info = ConstCallInfo(info, const_results)
         end
 
-        rettype = rettype ⊔ₚ this_rt
-        exctype = exctype ⊔ₚ this_exct
-        if has_conditional(𝕃ₚ, sv) && this_conditional !== Bottom && is_lattice_bool(𝕃ₚ, rettype) && fargs !== nothing
-            if conditionals === nothing
-                conditionals = Any[Bottom for _ in 1:length(argtypes)],
-                               Any[Bottom for _ in 1:length(argtypes)]
+        if seenall
+            if !fully_covering(matches) || any_ambig(matches)
+                # Account for the fact that we may encounter a MethodError with a non-covered or ambiguous signature.
+                all_effects = Effects(all_effects; nothrow=false)
+                exctype = exctype ⊔ₚ MethodError
             end
-            for i = 1:length(argtypes)
-                cnd = conditional_argtype(𝕃ᵢ, this_conditional, sig, argtypes, i)
-                conditionals[1][i] = conditionals[1][i] ⊔ᵢ cnd.thentype
-                conditionals[2][i] = conditionals[2][i] ⊔ᵢ cnd.elsetype
+            if sv isa InferenceState && fargs !== nothing
+                slotrefinements = collect_slot_refinements(𝕃ᵢ, applicable, argtypes, fargs, sv)
             end
-        end
-        if bail_out_call(interp, InferenceLoopState(sig, rettype, all_effects), sv)
-            add_remark!(interp, sv, "Call inference reached maximally imprecise information. Bailing on.")
-            break
-        end
-    end
-
-    if const_results !== nothing
-        @assert napplicable == nmatches(info) == length(const_results)
-        info = ConstCallInfo(info, const_results)
-    end
-
-    if seen ≠ napplicable
-        # there is unanalyzed candidate, widen type and effects to the top
-        rettype = exctype = Any
-        all_effects = Effects()
-    else
-        if !fully_covering(matches) || any_ambig(matches)
-            # Account for the fact that we may encounter a MethodError with a non-covered or ambiguous signature.
-            all_effects = Effects(all_effects; nothrow=false)
-            exctype = exctype ⊔ₚ MethodError
-        end
-        if sv isa InferenceState && fargs !== nothing
-            slotrefinements = collect_slot_refinements(𝕃ᵢ, applicable, argtypes, fargs, sv)
-        end
-    end
-
-    rettype = from_interprocedural!(interp, rettype, sv, arginfo, conditionals)
-
-    # Also considering inferring the compilation signature for this method, so
-    # it is available to the compiler in case it ends up needing it.
-    if (isa(sv, InferenceState) && infer_compilation_signature(interp) &&
-        (1 == seen == napplicable) && rettype !== Any && rettype !== Bottom &&
-        !is_removable_if_unused(all_effects))
-        match = applicable[1]::MethodMatch
-        method = match.method
-        sig = match.spec_types
-        mi = specialize_method(match; preexisting=true)
-        if mi !== nothing && !const_prop_methodinstance_heuristic(interp, mi, arginfo, sv)
-            csig = get_compileable_sig(method, sig, match.sparams)
-            if csig !== nothing && csig !== sig
-                abstract_call_method(interp, method, csig, match.sparams, multiple_matches, StmtInfo(false), sv)
+        else
+            # there is unanalyzed candidate, widen type and effects to the top
+            rettype = exctype = Any
+            all_effects = Effects()
+        end
+
+        rettype = from_interprocedural!(interp, rettype, sv, arginfo, conditionals)
+
+        # Also considering inferring the compilation signature for this method, so
+        # it is available to the compiler in case it ends up needing it.
+        if (isa(sv, InferenceState) && infer_compilation_signature(interp) &&
+            (seenall && 1 == napplicable) && rettype !== Any && rettype !== Bottom &&
+            !is_removable_if_unused(all_effects))
+            match = applicable[1]::MethodMatch
+            method = match.method
+            sig = match.spec_types
+            mi = specialize_method(match; preexisting=true)
+            if mi !== nothing && !const_prop_methodinstance_heuristic(interp, mi, arginfo, sv)
+                csig = get_compileable_sig(method, sig, match.sparams)
+                if csig !== nothing && csig !== sig
+                    abstract_call_method(interp, method, csig, match.sparams, multiple_matches, StmtInfo(false), sv)::Future
+                end
             end
         end
-    end
 
-    if call_result_unused(si) && !(rettype === Bottom)
-        add_remark!(interp, sv, "Call result type was widened because the return value is unused")
-        # We're mainly only here because the optimizer might want this code,
-        # but we ourselves locally don't typically care about it locally
-        # (beyond checking if it always throws).
-        # So avoid adding an edge, since we don't want to bother attempting
-        # to improve our result even if it does change (to always throw),
-        # and avoid keeping track of a more complex result type.
-        rettype = Any
-    end
-    any_slot_refined = slotrefinements !== nothing
-    add_call_backedges!(interp, rettype, all_effects, any_slot_refined, edges, matches, atype, sv)
-    if isa(sv, InferenceState)
-        # TODO (#48913) implement a proper recursion handling for irinterp:
-        # This works just because currently the `:terminate` condition guarantees that
-        # irinterp doesn't fail into unresolved cycles, but it's not a good solution.
-        # We should revisit this once we have a better story for handling cycles in irinterp.
-        if !isempty(sv.pclimitations) # remove self, if present
-            delete!(sv.pclimitations, sv)
-            for caller in callers_in_cycle(sv)
-                delete!(sv.pclimitations, caller)
+        if call_result_unused(si) && !(rettype === Bottom)
+            add_remark!(interp, sv, "Call result type was widened because the return value is unused")
+            # We're mainly only here because the optimizer might want this code,
+            # but we ourselves locally don't typically care about it locally
+            # (beyond checking if it always throws).
+            # So avoid adding an edge, since we don't want to bother attempting
+            # to improve our result even if it does change (to always throw),
+            # and avoid keeping track of a more complex result type.
+            rettype = Any
+        end
+        any_slot_refined = slotrefinements !== nothing
+        add_call_backedges!(interp, rettype, all_effects, any_slot_refined, edges, matches, atype.contents, sv)
+        if isa(sv, InferenceState)
+            # TODO (#48913) implement a proper recursion handling for irinterp:
+            # This works just because currently the `:terminate` condition guarantees that
+            # irinterp doesn't fail into unresolved cycles, but it's not a good solution.
+            # We should revisit this once we have a better story for handling cycles in irinterp.
+            if !isempty(sv.pclimitations) # remove self, if present
+                delete!(sv.pclimitations, sv)
+                for caller in callers_in_cycle(sv)
+                    delete!(sv.pclimitations, caller)
+                end
             end
         end
-    end
 
-    return CallMeta(rettype, exctype, all_effects, info, slotrefinements)
+        gfresult[] = CallMeta(rettype, exctype, all_effects, info, slotrefinements)
+        return true
+    end # infercalls
+    # start making progress on the first call
+    infercalls(interp, sv) || push!(sv.tasks, infercalls)
+    return gfresult
 end
 
 struct FailedMethodMatch
@@ -607,9 +594,9 @@ function abstract_call_method(interp::AbstractInterpreter,
                               hardlimit::Bool, si::StmtInfo, sv::AbsIntState)
     sigtuple = unwrap_unionall(sig)
     sigtuple isa DataType ||
-        return MethodCallResult(Any, Any, false, false, nothing, Effects())
+        return Future(MethodCallResult(Any, Any, false, false, nothing, Effects()))
     all(@nospecialize(x) -> valid_as_lattice(unwrapva(x), true), sigtuple.parameters) ||
-        return MethodCallResult(Union{}, Any, false, false, nothing, EFFECTS_THROWS) # catch bad type intersections early
+        return Future(MethodCallResult(Union{}, Any, false, false, nothing, EFFECTS_THROWS)) # catch bad type intersections early
 
     if is_nospecializeinfer(method)
         sig = get_nospecializeinfer_sig(method, sig, sparams)
@@ -634,7 +621,7 @@ function abstract_call_method(interp::AbstractInterpreter,
                     # we have a self-cycle in the call-graph, but not in the inference graph (typically):
                     # break this edge now (before we record it) by returning early
                     # (non-typically, this means that we lose the ability to detect a guaranteed StackOverflow in some cases)
-                    return MethodCallResult(Any, Any, true, true, nothing, Effects())
+                    return Future(MethodCallResult(Any, Any, true, true, nothing, Effects()))
                 end
                 topmost = nothing
                 edgecycle = true
@@ -689,7 +676,7 @@ function abstract_call_method(interp::AbstractInterpreter,
                 # since it's very unlikely that we'll try to inline this,
                 # or want make an invoke edge to its calling convention return type.
                 # (non-typically, this means that we lose the ability to detect a guaranteed StackOverflow in some cases)
-                return MethodCallResult(Any, Any, true, true, nothing, Effects())
+                return Future(MethodCallResult(Any, Any, true, true, nothing, Effects()))
             end
             add_remark!(interp, sv, washardlimit ? RECURSION_MSG_HARDLIMIT : RECURSION_MSG)
             # TODO (#48913) implement a proper recursion handling for irinterp:
@@ -745,31 +732,7 @@ function abstract_call_method(interp::AbstractInterpreter,
         sparams = recomputed[2]::SimpleVector
     end
 
-    (; rt, exct, edge, effects, volatile_inf_result) = typeinf_edge(interp, method, sig, sparams, sv)
-
-    if edge === nothing
-        edgecycle = edgelimited = true
-    end
-
-    # we look for the termination effect override here as well, since the :terminates effect
-    # may have been tainted due to recursion at this point even if it's overridden
-    if is_effect_overridden(sv, :terminates_globally)
-        # this frame is known to terminate
-        effects = Effects(effects, terminates=true)
-    elseif is_effect_overridden(method, :terminates_globally)
-        # this edge is known to terminate
-        effects = Effects(effects; terminates=true)
-    elseif edgecycle
-        # Some sort of recursion was detected.
-        if edge !== nothing && !edgelimited && !is_edge_recursed(edge, sv)
-            # no `MethodInstance` cycles -- don't taint :terminate
-        else
-            # we cannot guarantee that the call will terminate
-            effects = Effects(effects; terminates=false)
-        end
-    end
-
-    return MethodCallResult(rt, exct, edgecycle, edgelimited, edge, effects, volatile_inf_result)
+    return typeinf_edge(interp, method, sig, sparams, sv, edgecycle, edgelimited)
 end
 
 function edge_matches_sv(interp::AbstractInterpreter, frame::AbsIntState,
@@ -1319,7 +1282,7 @@ function semi_concrete_eval_call(interp::AbstractInterpreter,
                     effects = Effects(effects; noub=ALWAYS_TRUE)
                 end
                 exct = refine_exception_type(result.exct, effects)
-                return ConstCallResults(rt, exct, SemiConcreteResult(mi, ir, effects), effects, mi)
+                return ConstCallResults(rt, exct, SemiConcreteResult(mi, ir, effects, spec_info(irsv)), effects, mi)
             end
         end
     end
@@ -1331,7 +1294,7 @@ const_prop_result(inf_result::InferenceResult) =
                      inf_result.ipo_effects, inf_result.linfo)
 
 # return cached result of constant analysis
-return_cached_result(::AbstractInterpreter, inf_result::InferenceResult, ::AbsIntState) =
+return_localcache_result(::AbstractInterpreter, inf_result::InferenceResult, ::AbsIntState) =
     const_prop_result(inf_result)
 
 function compute_forwarded_argtypes(interp::AbstractInterpreter, arginfo::ArgInfo, sv::AbsIntState)
@@ -1361,7 +1324,7 @@ function const_prop_call(interp::AbstractInterpreter,
             return nothing
         end
         @assert inf_result.linfo === mi "MethodInstance for cached inference result does not match"
-        return return_cached_result(interp, inf_result, sv)
+        return return_localcache_result(interp, inf_result, sv)
     end
     overridden_by_const = falses(length(argtypes))
     for i = 1:length(argtypes)
@@ -1375,7 +1338,7 @@ function const_prop_call(interp::AbstractInterpreter,
     end
     # perform fresh constant prop'
     inf_result = InferenceResult(mi, argtypes, overridden_by_const)
-    frame = InferenceState(inf_result, #=cache_mode=#:local, interp)
+    frame = InferenceState(inf_result, #=cache_mode=#:local, interp) # TODO: this should also be converted to a stackless Future
     if frame === nothing
         add_remark!(interp, sv, "[constprop] Could not retrieve the source")
         return nothing # this is probably a bad generated function (unsound), but just ignore it
@@ -1517,9 +1480,9 @@ function precise_container_type(interp::AbstractInterpreter, @nospecialize(itft)
         widet = typ.typ
         if isa(widet, DataType)
             if widet.name === Tuple.name
-                return AbstractIterationResult(typ.fields, nothing)
+                return Future(AbstractIterationResult(typ.fields, nothing))
             elseif widet.name === _NAMEDTUPLE_NAME
-                return AbstractIterationResult(typ.fields, nothing)
+                return Future(AbstractIterationResult(typ.fields, nothing))
             end
         end
     end
@@ -1527,7 +1490,7 @@ function precise_container_type(interp::AbstractInterpreter, @nospecialize(itft)
     if isa(typ, Const)
         val = typ.val
         if isa(val, SimpleVector) || isa(val, Tuple) || isa(val, NamedTuple)
-            return AbstractIterationResult(Any[ Const(val[i]) for i in 1:length(val) ], nothing) # avoid making a tuple Generator here!
+            return Future(AbstractIterationResult(Any[ Const(val[i]) for i in 1:length(val) ], nothing)) # avoid making a tuple Generator here!
         end
     end
 
@@ -1544,18 +1507,18 @@ function precise_container_type(interp::AbstractInterpreter, @nospecialize(itft)
         # refine the Union to remove elements that are not valid tags for objects
         filter!(@nospecialize(x) -> valid_as_lattice(x, true), utis)
         if length(utis) == 0
-            return AbstractIterationResult(Any[], nothing) # oops, this statement was actually unreachable
+            return Future(AbstractIterationResult(Any[], nothing)) # oops, this statement was actually unreachable
         elseif length(utis) == 1
             tti = utis[1]
             tti0 = rewrap_unionall(tti, tti0)
         else
             if any(@nospecialize(t) -> !isa(t, DataType) || !(t <: Tuple) || !isknownlength(t), utis)
-                return AbstractIterationResult(Any[Vararg{Any}], nothing, Effects())
+                return Future(AbstractIterationResult(Any[Vararg{Any}], nothing, Effects()))
             end
             ltp = length((utis[1]::DataType).parameters)
             for t in utis
                 if length((t::DataType).parameters) != ltp
-                    return AbstractIterationResult(Any[Vararg{Any}], nothing)
+                    return Future(AbstractIterationResult(Any[Vararg{Any}], nothing))
                 end
             end
             result = Any[ Union{} for _ in 1:ltp ]
@@ -1566,14 +1529,14 @@ function precise_container_type(interp::AbstractInterpreter, @nospecialize(itft)
                     result[j] = tmerge(result[j], rewrap_unionall(tps[j], tti0))
                 end
             end
-            return AbstractIterationResult(result, nothing)
+            return Future(AbstractIterationResult(result, nothing))
         end
     end
     if tti0 <: Tuple
         if isa(tti0, DataType)
-            return AbstractIterationResult(Any[ p for p in tti0.parameters ], nothing)
+            return Future(AbstractIterationResult(Any[ p for p in tti0.parameters ], nothing))
         elseif !isa(tti, DataType)
-            return AbstractIterationResult(Any[Vararg{Any}], nothing)
+            return Future(AbstractIterationResult(Any[Vararg{Any}], nothing))
         else
             len = length(tti.parameters)
             last = tti.parameters[len]
@@ -1586,17 +1549,17 @@ function precise_container_type(interp::AbstractInterpreter, @nospecialize(itft)
                     elts[len] = Vararg{elts[len]}
                 end
             end
-            return AbstractIterationResult(elts, nothing)
+            return Future(AbstractIterationResult(elts, nothing))
         end
     elseif tti0 === SimpleVector
-        return AbstractIterationResult(Any[Vararg{Any}], nothing)
+        return Future(AbstractIterationResult(Any[Vararg{Any}], nothing))
     elseif tti0 === Any
-        return AbstractIterationResult(Any[Vararg{Any}], nothing, Effects())
+        return Future(AbstractIterationResult(Any[Vararg{Any}], nothing, Effects()))
     elseif tti0 <: Array || tti0 <: GenericMemory
         if eltype(tti0) === Union{}
-            return AbstractIterationResult(Any[], nothing)
+            return Future(AbstractIterationResult(Any[], nothing))
         end
-        return AbstractIterationResult(Any[Vararg{eltype(tti0)}], nothing)
+        return Future(AbstractIterationResult(Any[Vararg{eltype(tti0)}], nothing))
     else
         return abstract_iteration(interp, itft, typ, sv)
     end
@@ -1607,95 +1570,144 @@ function abstract_iteration(interp::AbstractInterpreter, @nospecialize(itft), @n
     if isa(itft, Const)
         iteratef = itft.val
     else
-        return AbstractIterationResult(Any[Vararg{Any}], nothing, Effects())
+        return Future(AbstractIterationResult(Any[Vararg{Any}], nothing, Effects()))
     end
     @assert !isvarargtype(itertype)
-    call = abstract_call_known(interp, iteratef, ArgInfo(nothing, Any[itft, itertype]), StmtInfo(true), sv)
-    stateordonet = call.rt
-    info = call.info
-    # Return Bottom if this is not an iterator.
-    # WARNING: Changes to the iteration protocol must be reflected here,
-    # this is not just an optimization.
-    # TODO: this doesn't realize that Array, GenericMemory, SimpleVector, Tuple, and NamedTuple do not use the iterate protocol
-    stateordonet === Bottom && return AbstractIterationResult(Any[Bottom], AbstractIterationInfo(CallMeta[CallMeta(Bottom, Any, call.effects, info)], true))
-    valtype = statetype = Bottom
-    ret = Any[]
-    calls = CallMeta[call]
-    stateordonet_widened = widenconst(stateordonet)
-    𝕃ᵢ = typeinf_lattice(interp)
 
-    # Try to unroll the iteration up to max_tuple_splat, which covers any finite
-    # length iterators, or interesting prefix
-    while true
-        if stateordonet_widened === Nothing
-            return AbstractIterationResult(ret, AbstractIterationInfo(calls, true))
-        end
-        if Nothing <: stateordonet_widened || length(ret) >= InferenceParams(interp).max_tuple_splat
-            break
-        end
-        if !isa(stateordonet_widened, DataType) || !(stateordonet_widened <: Tuple) || isvatuple(stateordonet_widened) || length(stateordonet_widened.parameters) != 2
-            break
-        end
-        nstatetype = getfield_tfunc(𝕃ᵢ, stateordonet, Const(2))
-        # If there's no new information in this statetype, don't bother continuing,
-        # the iterator won't be finite.
-        if ⊑(𝕃ᵢ, nstatetype, statetype)
-            return AbstractIterationResult(Any[Bottom], AbstractIterationInfo(calls, false), EFFECTS_THROWS)
-        end
-        valtype = getfield_tfunc(𝕃ᵢ, stateordonet, Const(1))
-        push!(ret, valtype)
-        statetype = nstatetype
-        call = abstract_call_known(interp, iteratef, ArgInfo(nothing, Any[Const(iteratef), itertype, statetype]), StmtInfo(true), sv)
-        stateordonet = call.rt
+    iterateresult = Future{AbstractIterationResult}()
+    call1future = abstract_call_known(interp, iteratef, ArgInfo(nothing, Any[itft, itertype]), StmtInfo(true), sv)::Future
+    function inferiterate(interp, sv)
+        call1 = call1future[]
+        stateordonet = call1.rt
+        # Return Bottom if this is not an iterator.
+        # WARNING: Changes to the iteration protocol must be reflected here,
+        # this is not just an optimization.
+        # TODO: this doesn't realize that Array, GenericMemory, SimpleVector, Tuple, and NamedTuple do not use the iterate protocol
+        if stateordonet === Bottom
+            iterateresult[] = AbstractIterationResult(Any[Bottom], AbstractIterationInfo(CallMeta[CallMeta(Bottom, Any, call1.effects, call1.info)], true))
+            return true
+        end
         stateordonet_widened = widenconst(stateordonet)
-        push!(calls, call)
-    end
-    # From here on, we start asking for results on the widened types, rather than
-    # the precise (potentially const) state type
-    # statetype and valtype are reinitialized in the first iteration below from the
-    # (widened) stateordonet, which has not yet been fully analyzed in the loop above
-    valtype = statetype = Bottom
-    may_have_terminated = Nothing <: stateordonet_widened
-    while valtype !== Any
-        nounion = typeintersect(stateordonet_widened, Tuple{Any,Any})
-        if nounion !== Union{} && !isa(nounion, DataType)
-            # nounion is of a type we cannot handle
-            valtype = Any
-            break
-        end
-        if nounion === Union{} || (nounion.parameters[1] <: valtype && nounion.parameters[2] <: statetype)
-            # reached a fixpoint or iterator failed/gave invalid answer
-            if !hasintersect(stateordonet_widened, Nothing)
-                # ... but cannot terminate
-                if !may_have_terminated
-                    #  ... and cannot have terminated prior to this loop
-                    return AbstractIterationResult(Any[Bottom], AbstractIterationInfo(calls, false), Effects())
-                else
-                    # iterator may have terminated prior to this loop, but not during it
-                    valtype = Bottom
+        calls = CallMeta[call1]
+        valtype = statetype = Bottom
+        ret = Any[]
+        𝕃ᵢ = typeinf_lattice(interp)
+        may_have_terminated = false
+        local call2future::Future{CallMeta}
+
+        nextstate::UInt8 = 0x0
+        function inferiterate_2arg(interp, sv)
+            if nextstate === 0x1
+                nextstate = 0xff
+                @goto state1
+            elseif nextstate === 0x2
+                nextstate = 0xff
+                @goto state2
+            else
+                @assert nextstate === 0x0
+                nextstate = 0xff
+            end
+
+            # Try to unroll the iteration up to max_tuple_splat, which covers any finite
+            # length iterators, or interesting prefix
+            while true
+                if stateordonet_widened === Nothing
+                    iterateresult[] = AbstractIterationResult(ret, AbstractIterationInfo(calls, true))
+                    return true
+                end
+                if Nothing <: stateordonet_widened || length(ret) >= InferenceParams(interp).max_tuple_splat
+                    break
+                end
+                if !isa(stateordonet_widened, DataType) || !(stateordonet_widened <: Tuple) || isvatuple(stateordonet_widened) || length(stateordonet_widened.parameters) != 2
+                    break
+                end
+                nstatetype = getfield_tfunc(𝕃ᵢ, stateordonet, Const(2))
+                # If there's no new information in this statetype, don't bother continuing,
+                # the iterator won't be finite.
+                if ⊑(𝕃ᵢ, nstatetype, statetype)
+                    iterateresult[] = AbstractIterationResult(Any[Bottom], AbstractIterationInfo(calls, false), EFFECTS_THROWS)
+                    return true
+                end
+                valtype = getfield_tfunc(𝕃ᵢ, stateordonet, Const(1))
+                push!(ret, valtype)
+                statetype = nstatetype
+                call2future = abstract_call_known(interp, iteratef, ArgInfo(nothing, Any[Const(iteratef), itertype, statetype]), StmtInfo(true), sv)::Future
+                if !isready(call2future)
+                    nextstate = 0x1
+                    return false
+                    @label state1
+                end
+                let call = call2future[]
+                    push!(calls, call)
+                    stateordonet = call.rt
+                    stateordonet_widened = widenconst(stateordonet)
                 end
             end
-            break
-        end
-        valtype = tmerge(valtype, nounion.parameters[1])
-        statetype = tmerge(statetype, nounion.parameters[2])
-        call = abstract_call_known(interp, iteratef, ArgInfo(nothing, Any[Const(iteratef), itertype, statetype]), StmtInfo(true), sv)
-        push!(calls, call)
-        stateordonet = call.rt
-        stateordonet_widened = widenconst(stateordonet)
-    end
-    if valtype !== Union{}
-        push!(ret, Vararg{valtype})
+            # From here on, we start asking for results on the widened types, rather than
+            # the precise (potentially const) state type
+            # statetype and valtype are reinitialized in the first iteration below from the
+            # (widened) stateordonet, which has not yet been fully analyzed in the loop above
+            valtype = statetype = Bottom
+            may_have_terminated = Nothing <: stateordonet_widened
+            while valtype !== Any
+                nounion = typeintersect(stateordonet_widened, Tuple{Any,Any})
+                if nounion !== Union{} && !isa(nounion, DataType)
+                    # nounion is of a type we cannot handle
+                    valtype = Any
+                    break
+                end
+                if nounion === Union{} || (nounion.parameters[1] <: valtype && nounion.parameters[2] <: statetype)
+                    # reached a fixpoint or iterator failed/gave invalid answer
+                    if !hasintersect(stateordonet_widened, Nothing)
+                        # ... but cannot terminate
+                        if may_have_terminated
+                            # ... and iterator may have terminated prior to this loop, but not during it
+                            valtype = Bottom
+                        else
+                            #  ... or cannot have terminated prior to this loop
+                            iterateresult[] = AbstractIterationResult(Any[Bottom], AbstractIterationInfo(calls, false), Effects())
+                            return true
+                        end
+                    end
+                    break
+                end
+                valtype = tmerge(valtype, nounion.parameters[1])
+                statetype = tmerge(statetype, nounion.parameters[2])
+                call2future = abstract_call_known(interp, iteratef, ArgInfo(nothing, Any[Const(iteratef), itertype, statetype]), StmtInfo(true), sv)::Future
+                if !isready(call2future)
+                    nextstate = 0x2
+                    return false
+                    @label state2
+                end
+                let call = call2future[]
+                    push!(calls, call)
+                    stateordonet = call.rt
+                    stateordonet_widened = widenconst(stateordonet)
+                end
+            end
+            if valtype !== Union{}
+                push!(ret, Vararg{valtype})
+            end
+            iterateresult[] = AbstractIterationResult(ret, AbstractIterationInfo(calls, false))
+            return true
+        end # inferiterate_2arg
+        # continue making progress as much as possible, on iterate(arg, state)
+        inferiterate_2arg(interp, sv) || push!(sv.tasks, inferiterate_2arg)
+        return true
+    end # inferiterate
+    # continue making progress as soon as possible, on iterate(arg)
+    if !(isready(call1future) && inferiterate(interp, sv))
+        push!(sv.tasks, inferiterate)
     end
-    return AbstractIterationResult(ret, AbstractIterationInfo(calls, false))
+    return iterateresult
 end
 
 # do apply(af, fargs...), where af is a function value
 function abstract_apply(interp::AbstractInterpreter, argtypes::Vector{Any}, si::StmtInfo,
                         sv::AbsIntState, max_methods::Int=get_max_methods(interp, sv))
-    itft = argtype_by_index(argtypes, 2)
+    itft = Core.Box(argtype_by_index(argtypes, 2))
     aft = argtype_by_index(argtypes, 3)
-    (itft === Bottom || aft === Bottom) && return CallMeta(Bottom, Any, EFFECTS_THROWS, NoCallInfo())
+    (itft.contents === Bottom || aft === Bottom) && return Future(CallMeta(Bottom, Any, EFFECTS_THROWS, NoCallInfo()))
     aargtypes = argtype_tail(argtypes, 4)
     aftw = widenconst(aft)
     if !isa(aft, Const) && !isa(aft, PartialOpaque) && (!isType(aftw) || has_free_typevars(aftw))
@@ -1703,100 +1715,155 @@ function abstract_apply(interp::AbstractInterpreter, argtypes::Vector{Any}, si::
             add_remark!(interp, sv, "Core._apply_iterate called on a function of a non-concrete type")
             # bail now, since it seems unlikely that abstract_call will be able to do any better after splitting
             # this also ensures we don't call abstract_call_gf_by_type below on an IntrinsicFunction or Builtin
-            return CallMeta(Any, Any, Effects(), NoCallInfo())
+            return Future(CallMeta(Any, Any, Effects(), NoCallInfo()))
         end
     end
     res = Union{}
-    nargs = length(aargtypes)
     splitunions = 1 < unionsplitcost(typeinf_lattice(interp), aargtypes) <= InferenceParams(interp).max_apply_union_enum
-    ctypes = [Any[aft]]
-    infos = Vector{MaybeAbstractIterationInfo}[MaybeAbstractIterationInfo[]]
-    effects = EFFECTS_TOTAL
-    for i = 1:nargs
-        ctypes´ = Vector{Any}[]
-        infos′ = Vector{MaybeAbstractIterationInfo}[]
-        for ti in (splitunions ? uniontypes(aargtypes[i]) : Any[aargtypes[i]])
-            if !isvarargtype(ti)
-                (;cti, info, ai_effects) = precise_container_type(interp, itft, ti, sv)
-            else
-                (;cti, info, ai_effects) = precise_container_type(interp, itft, unwrapva(ti), sv)
-                # We can't represent a repeating sequence of the same types,
-                # so tmerge everything together to get one type that represents
-                # everything.
-                argt = cti[end]
-                if isvarargtype(argt)
-                    argt = unwrapva(argt)
+    ctypes::Vector{Vector{Any}} = [Any[aft]]
+    infos::Vector{Vector{MaybeAbstractIterationInfo}} = Vector{MaybeAbstractIterationInfo}[MaybeAbstractIterationInfo[]]
+    all_effects::Effects = EFFECTS_TOTAL
+    retinfos = ApplyCallInfo[]
+    retinfo = UnionSplitApplyCallInfo(retinfos)
+    exctype = Union{}
+    ctypes´ = Vector{Any}[]
+    infos´ = Vector{MaybeAbstractIterationInfo}[]
+    local ti, argtypesi
+    local ctfuture::Future{AbstractIterationResult}
+    local callfuture::Future{CallMeta}
+
+    applyresult = Future{CallMeta}()
+    # split the rest into a resumable state machine
+    i::Int = 1
+    j::Int = 1
+    nextstate::UInt8 = 0x0
+    function infercalls(interp, sv)
+        # n.b. Remember that variables will lose their values across restarts,
+        # so be sure to manually hoist any values that must be preserved and do
+        # not rely on program order.
+        # This is a little more complex than the closure continuations often used elsewhere, but avoids needing to manage all of that indentation
+        if nextstate === 0x1
+            nextstate = 0xff
+            @goto state1
+        elseif nextstate === 0x2
+            nextstate = 0xff
+            @goto state2
+        elseif nextstate === 0x3
+            nextstate = 0xff
+            @goto state3
+        else
+            @assert nextstate === 0x0
+            nextstate = 0xff
+        end
+        while i <= length(aargtypes)
+            argtypesi = (splitunions ? uniontypes(aargtypes[i]) : Any[aargtypes[i]])
+            i += 1
+            j = 1
+            while j <= length(argtypesi)
+                ti = argtypesi[j]
+                j += 1
+                if !isvarargtype(ti)
+                    ctfuture = precise_container_type(interp, itft.contents, ti, sv)::Future
+                    if !isready(ctfuture)
+                        nextstate = 0x1
+                        return false
+                        @label state1
+                    end
+                    (;cti, info, ai_effects) = ctfuture[]
+                else
+                    ctfuture = precise_container_type(interp, itft.contents, unwrapva(ti), sv)::Future
+                    if !isready(ctfuture)
+                        nextstate = 0x2
+                        return false
+                        @label state2
+                    end
+                    (;cti, info, ai_effects) = ctfuture[]
+                    # We can't represent a repeating sequence of the same types,
+                    # so tmerge everything together to get one type that represents
+                    # everything.
+                    argt = cti[end]
+                    if isvarargtype(argt)
+                        argt = unwrapva(argt)
+                    end
+                    for k in 1:(length(cti)-1)
+                        argt = tmerge(argt, cti[k])
+                    end
+                    cti = Any[Vararg{argt}]
                 end
-                for i in 1:(length(cti)-1)
-                    argt = tmerge(argt, cti[i])
+                all_effects = merge_effects(all_effects, ai_effects)
+                if info !== nothing
+                    for call in info.each
+                        all_effects = merge_effects(all_effects, call.effects)
+                    end
+                end
+                if any(@nospecialize(t) -> t === Bottom, cti)
+                    continue
+                end
+                for k = 1:length(ctypes)
+                    ct = ctypes[k]
+                    if isvarargtype(ct[end])
+                        # This is vararg, we're not gonna be able to do any inlining,
+                        # drop the info
+                        info = nothing
+                        tail = tuple_tail_elem(typeinf_lattice(interp), unwrapva(ct[end]), cti)
+                        push!(ctypes´, push!(ct[1:(end - 1)], tail))
+                    else
+                        push!(ctypes´, append!(ct[:], cti))
+                    end
+                    push!(infos´, push!(copy(infos[k]), info))
                 end
-                cti = Any[Vararg{argt}]
             end
-            effects = merge_effects(effects, ai_effects)
-            if info !== nothing
-                for call in info.each
-                    effects = merge_effects(effects, call.effects)
+            # swap for the new array and empty the temporary one
+            ctypes´, ctypes = ctypes, ctypes´
+            infos´, infos = infos, infos´
+            empty!(ctypes´)
+            empty!(infos´)
+        end
+        all_effects.nothrow || (exctype = Any)
+
+        i = 1
+        while i <= length(ctypes)
+            ct = ctypes[i]
+            lct = length(ct)
+            # truncate argument list at the first Vararg
+            for k = 1:lct-1
+                cti = ct[k]
+                if isvarargtype(cti)
+                    ct[k] = tuple_tail_elem(typeinf_lattice(interp), unwrapva(cti), ct[(k+1):lct])
+                    resize!(ct, k)
+                    break
                 end
             end
-            if any(@nospecialize(t) -> t === Bottom, cti)
-                continue
+            callfuture = abstract_call(interp, ArgInfo(nothing, ct), si, sv, max_methods)::Future
+            if !isready(callfuture)
+                nextstate = 0x3
+                return false
+                @label state3
             end
-            for j = 1:length(ctypes)
-                ct = ctypes[j]::Vector{Any}
-                if isvarargtype(ct[end])
-                    # This is vararg, we're not gonna be able to do any inlining,
-                    # drop the info
-                    info = nothing
-                    tail = tuple_tail_elem(typeinf_lattice(interp), unwrapva(ct[end]), cti)
-                    push!(ctypes´, push!(ct[1:(end - 1)], tail))
-                else
-                    push!(ctypes´, append!(ct[:], cti))
+            let (; info, rt, exct, effects) = callfuture[]
+                push!(retinfos, ApplyCallInfo(info, infos[i]))
+                res = tmerge(typeinf_lattice(interp), res, rt)
+                exctype = tmerge(typeinf_lattice(interp), exctype, exct)
+                all_effects = merge_effects(all_effects, effects)
+                if i < length(ctypes) && bail_out_apply(interp, InferenceLoopState(ctypes[i], res, all_effects), sv)
+                    add_remark!(interp, sv, "_apply_iterate inference reached maximally imprecise information. Bailing on.")
+                    # there is unanalyzed candidate, widen type and effects to the top
+                    let retinfo = NoCallInfo() # NOTE this is necessary to prevent the inlining processing
+                        applyresult[] = CallMeta(Any, Any, Effects(), retinfo)
+                        return true
+                    end
                 end
-                push!(infos′, push!(copy(infos[j]), info))
             end
+            i += 1
         end
-        ctypes = ctypes´
-        infos = infos′
-    end
-    retinfos = ApplyCallInfo[]
-    retinfo = UnionSplitApplyCallInfo(retinfos)
-    napplicable = length(ctypes)
-    seen = 0
-    exct = effects.nothrow ? Union{} : Any
-    for i = 1:napplicable
-        ct = ctypes[i]
-        arginfo = infos[i]
-        lct = length(ct)
-        # truncate argument list at the first Vararg
-        for i = 1:lct-1
-            cti = ct[i]
-            if isvarargtype(cti)
-                ct[i] = tuple_tail_elem(typeinf_lattice(interp), unwrapva(cti), ct[(i+1):lct])
-                resize!(ct, i)
-                break
-            end
-        end
-        call = abstract_call(interp, ArgInfo(nothing, ct), si, sv, max_methods)
-        seen += 1
-        push!(retinfos, ApplyCallInfo(call.info, arginfo))
-        res = tmerge(typeinf_lattice(interp), res, call.rt)
-        exct = tmerge(typeinf_lattice(interp), exct, call.exct)
-        effects = merge_effects(effects, call.effects)
-        if bail_out_apply(interp, InferenceLoopState(ct, res, effects), sv)
-            add_remark!(interp, sv, "_apply_iterate inference reached maximally imprecise information. Bailing on.")
-            break
-        end
-    end
-    if seen ≠ napplicable
-        # there is unanalyzed candidate, widen type and effects to the top
-        res = Any
-        exct = Any
-        effects = Effects()
-        retinfo = NoCallInfo() # NOTE this is necessary to prevent the inlining processing
+        # TODO: Add a special info type to capture all the iteration info.
+        # For now, only propagate info if we don't also union-split the iteration
+        applyresult[] = CallMeta(res, exctype, all_effects, retinfo)
+        return true
     end
-    # TODO: Add a special info type to capture all the iteration info.
-    # For now, only propagate info if we don't also union-split the iteration
-    return CallMeta(res, exct, effects, retinfo)
+    # start making progress on the first call
+    infercalls(interp, sv) || push!(sv.tasks, infercalls)
+    return applyresult
 end
 
 function argtype_by_index(argtypes::Vector{Any}, i::Int)
@@ -2135,66 +2202,69 @@ function abstract_invoke(interp::AbstractInterpreter, arginfo::ArgInfo, si::Stmt
     argtypes = arginfo.argtypes
     ft′ = argtype_by_index(argtypes, 2)
     ft = widenconst(ft′)
-    ft === Bottom && return CallMeta(Bottom, Any, EFFECTS_THROWS, NoCallInfo())
+    ft === Bottom && return Future(CallMeta(Bottom, Any, EFFECTS_THROWS, NoCallInfo()))
     (types, isexact, isconcrete, istype) = instanceof_tfunc(argtype_by_index(argtypes, 3), false)
-    isexact || return CallMeta(Any, Any, Effects(), NoCallInfo())
+    isexact || return Future(CallMeta(Any, Any, Effects(), NoCallInfo()))
     unwrapped = unwrap_unionall(types)
-    types === Bottom && return CallMeta(Bottom, Any, EFFECTS_THROWS, NoCallInfo())
+    types === Bottom && return Future(CallMeta(Bottom, Any, EFFECTS_THROWS, NoCallInfo()))
     if !(unwrapped isa DataType && unwrapped.name === Tuple.name)
-        return CallMeta(Bottom, TypeError, EFFECTS_THROWS, NoCallInfo())
+        return Future(CallMeta(Bottom, TypeError, EFFECTS_THROWS, NoCallInfo()))
     end
     argtype = argtypes_to_type(argtype_tail(argtypes, 4))
     nargtype = typeintersect(types, argtype)
-    nargtype === Bottom && return CallMeta(Bottom, TypeError, EFFECTS_THROWS, NoCallInfo())
-    nargtype isa DataType || return CallMeta(Any, Any, Effects(), NoCallInfo()) # other cases are not implemented below
-    isdispatchelem(ft) || return CallMeta(Any, Any, Effects(), NoCallInfo()) # check that we might not have a subtype of `ft` at runtime, before doing supertype lookup below
+    nargtype === Bottom && return Future(CallMeta(Bottom, TypeError, EFFECTS_THROWS, NoCallInfo()))
+    nargtype isa DataType || return Future(CallMeta(Any, Any, Effects(), NoCallInfo())) # other cases are not implemented below
+    isdispatchelem(ft) || return Future(CallMeta(Any, Any, Effects(), NoCallInfo())) # check that we might not have a subtype of `ft` at runtime, before doing supertype lookup below
     ft = ft::DataType
     lookupsig = rewrap_unionall(Tuple{ft, unwrapped.parameters...}, types)::Type
     nargtype = Tuple{ft, nargtype.parameters...}
     argtype = Tuple{ft, argtype.parameters...}
     match, valid_worlds = findsup(lookupsig, method_table(interp))
-    match === nothing && return CallMeta(Any, Any, Effects(), NoCallInfo())
+    match === nothing && return Future(CallMeta(Any, Any, Effects(), NoCallInfo()))
     update_valid_age!(sv, valid_worlds)
     method = match.method
     tienv = ccall(:jl_type_intersection_with_env, Any, (Any, Any), nargtype, method.sig)::SimpleVector
-    ti = tienv[1]; env = tienv[2]::SimpleVector
-    result = abstract_call_method(interp, method, ti, env, false, si, sv)
-    (; rt, exct, edge, effects, volatile_inf_result) = result
+    ti = tienv[1]
+    env = tienv[2]::SimpleVector
+    mresult = abstract_call_method(interp, method, ti, env, false, si, sv)::Future
     match = MethodMatch(ti, env, method, argtype <: method.sig)
-    res = nothing
-    sig = match.spec_types
-    argtypes′ = invoke_rewrite(argtypes)
-    fargs = arginfo.fargs
-    fargs′ = fargs === nothing ? nothing : invoke_rewrite(fargs)
-    arginfo = ArgInfo(fargs′, argtypes′)
-    # # typeintersect might have narrowed signature, but the accuracy gain doesn't seem worth the cost involved with the lattice comparisons
-    # for i in 1:length(argtypes′)
-    #     t, a = ti.parameters[i], argtypes′[i]
-    #     argtypes′[i] = t ⊑ a ? t : a
-    # end
-    𝕃ₚ = ipo_lattice(interp)
-    ⊑, ⋤, ⊔ = partialorder(𝕃ₚ), strictneqpartialorder(𝕃ₚ), join(𝕃ₚ)
-    f = singleton_type(ft′)
-    invokecall = InvokeCall(types, lookupsig)
-    const_call_result = abstract_call_method_with_const_args(interp,
-        result, f, arginfo, si, match, sv, invokecall)
-    const_result = volatile_inf_result
-    if const_call_result !== nothing
-        if const_call_result.rt ⊑ rt
-            (; rt, effects, const_result, edge) = const_call_result
+    return Future{CallMeta}(mresult, interp, sv) do result, interp, sv
+        (; rt, exct, edge, effects, volatile_inf_result) = result
+        res = nothing
+        sig = match.spec_types
+        argtypes′ = invoke_rewrite(argtypes)
+        fargs = arginfo.fargs
+        fargs′ = fargs === nothing ? nothing : invoke_rewrite(fargs)
+        arginfo = ArgInfo(fargs′, argtypes′)
+        # # typeintersect might have narrowed signature, but the accuracy gain doesn't seem worth the cost involved with the lattice comparisons
+        # for i in 1:length(argtypes′)
+        #     t, a = ti.parameters[i], argtypes′[i]
+        #     argtypes′[i] = t ⊑ a ? t : a
+        # end
+        𝕃ₚ = ipo_lattice(interp)
+        ⊑, ⋤, ⊔ = partialorder(𝕃ₚ), strictneqpartialorder(𝕃ₚ), join(𝕃ₚ)
+        f = singleton_type(ft′)
+        invokecall = InvokeCall(types, lookupsig)
+        const_call_result = abstract_call_method_with_const_args(interp,
+            result, f, arginfo, si, match, sv, invokecall)
+        const_result = volatile_inf_result
+        if const_call_result !== nothing
+            if const_call_result.rt ⊑ rt
+                (; rt, effects, const_result, edge) = const_call_result
+            end
+            if const_call_result.exct ⋤ exct
+                (; exct, const_result, edge) = const_call_result
+            end
         end
-        if const_call_result.exct ⋤ exct
-            (; exct, const_result, edge) = const_call_result
+        rt = from_interprocedural!(interp, rt, sv, arginfo, sig)
+        info = InvokeCallInfo(match, const_result)
+        edge !== nothing && add_invoke_backedge!(sv, lookupsig, edge)
+        if !match.fully_covers
+            effects = Effects(effects; nothrow=false)
+            exct = exct ⊔ TypeError
         end
+        return CallMeta(rt, exct, effects, info)
     end
-    rt = from_interprocedural!(interp, rt, sv, arginfo, sig)
-    info = InvokeCallInfo(match, const_result)
-    edge !== nothing && add_invoke_backedge!(sv, lookupsig, edge)
-    if !match.fully_covers
-        effects = Effects(effects; nothrow=false)
-        exct = exct ⊔ TypeError
-    end
-    return CallMeta(rt, exct, effects, info)
 end
 
 function invoke_rewrite(xs::Vector{Any})
@@ -2207,10 +2277,12 @@ end
 function abstract_finalizer(interp::AbstractInterpreter, argtypes::Vector{Any}, sv::AbsIntState)
     if length(argtypes) == 3
         finalizer_argvec = Any[argtypes[2], argtypes[3]]
-        call = abstract_call(interp, ArgInfo(nothing, finalizer_argvec), StmtInfo(false), sv, #=max_methods=#1)
-        return CallMeta(Nothing, Any, Effects(), FinalizerInfo(call.info, call.effects))
+        call = abstract_call(interp, ArgInfo(nothing, finalizer_argvec), StmtInfo(false), sv, #=max_methods=#1)::Future
+        return Future{CallMeta}(call, interp, sv) do call, interp, sv
+            return CallMeta(Nothing, Any, Effects(), FinalizerInfo(call.info, call.effects))
+        end
     end
-    return CallMeta(Nothing, Any, Effects(), NoCallInfo())
+    return Future(CallMeta(Nothing, Any, Effects(), NoCallInfo()))
 end
 
 function abstract_throw(interp::AbstractInterpreter, argtypes::Vector{Any}, ::AbsIntState)
@@ -2228,7 +2300,7 @@ function abstract_throw(interp::AbstractInterpreter, argtypes::Vector{Any}, ::Ab
     else
         exct = ArgumentError
     end
-    return CallMeta(Union{}, exct, EFFECTS_THROWS, NoCallInfo())
+    return Future(CallMeta(Union{}, exct, EFFECTS_THROWS, NoCallInfo()))
 end
 
 function abstract_throw_methoderror(interp::AbstractInterpreter, argtypes::Vector{Any}, ::AbsIntState)
@@ -2240,7 +2312,7 @@ function abstract_throw_methoderror(interp::AbstractInterpreter, argtypes::Vecto
         ⊔ = join(typeinf_lattice(interp))
         MethodError ⊔ ArgumentError
     end
-    return CallMeta(Union{}, exct, EFFECTS_THROWS, NoCallInfo())
+    return Future(CallMeta(Union{}, exct, EFFECTS_THROWS, NoCallInfo()))
 end
 
 # call where the function is known exactly
@@ -2285,60 +2357,70 @@ function abstract_call_known(interp::AbstractInterpreter, @nospecialize(f),
                 end
             end
         end
-        return CallMeta(rt, exct, effects, NoCallInfo(), refinements)
+        return Future(CallMeta(rt, exct, effects, NoCallInfo(), refinements))
     elseif isa(f, Core.OpaqueClosure)
         # calling an OpaqueClosure about which we have no information returns no information
-        return CallMeta(typeof(f).parameters[2], Any, Effects(), NoCallInfo())
+        return Future(CallMeta(typeof(f).parameters[2], Any, Effects(), NoCallInfo()))
     elseif f === TypeVar && !isvarargtype(argtypes[end])
         # Manually look through the definition of TypeVar to
         # make sure to be able to get `PartialTypeVar`s out.
-        2 ≤ la ≤ 4 || return CallMeta(Bottom, Any, EFFECTS_THROWS, NoCallInfo())
-        n = argtypes[2]
-        ub_var = Const(Any)
-        lb_var = Const(Union{})
-        if la == 4
-            ub_var = argtypes[4]
-            lb_var = argtypes[3]
-        elseif la == 3
-            ub_var = argtypes[3]
-        end
+        2 ≤ la ≤ 4 || return Future(CallMeta(Bottom, Any, EFFECTS_THROWS, NoCallInfo()))
         # make sure generic code is prepared for inlining if needed later
-        call = let T = Any[Type{TypeVar}, Any, Any, Any]
+        let T = Any[Type{TypeVar}, Any, Any, Any]
             resize!(T, la)
             atype = Tuple{T...}
             T[1] = Const(TypeVar)
-            abstract_call_gf_by_type(interp, f, ArgInfo(nothing, T), si, atype, sv, max_methods)
-        end
-        pT = typevar_tfunc(𝕃ᵢ, n, lb_var, ub_var)
-        typevar_argtypes = Any[n, lb_var, ub_var]
-        effects = builtin_effects(𝕃ᵢ, Core._typevar, typevar_argtypes, pT)
-        if effects.nothrow
-            exct = Union{}
-        else
-            exct = builtin_exct(𝕃ᵢ, Core._typevar, typevar_argtypes, pT)
+            let call = abstract_call_gf_by_type(interp, f, ArgInfo(nothing, T), si, atype, sv, max_methods)::Future
+                return Future{CallMeta}(call, interp, sv) do call, interp, sv
+                    n = argtypes[2]
+                    ub_var = Const(Any)
+                    lb_var = Const(Union{})
+                    if la == 4
+                        ub_var = argtypes[4]
+                        lb_var = argtypes[3]
+                    elseif la == 3
+                        ub_var = argtypes[3]
+                    end
+                    pT = typevar_tfunc(𝕃ᵢ, n, lb_var, ub_var)
+                    typevar_argtypes = Any[n, lb_var, ub_var]
+                    effects = builtin_effects(𝕃ᵢ, Core._typevar, typevar_argtypes, pT)
+                    if effects.nothrow
+                        exct = Union{}
+                    else
+                        exct = builtin_exct(𝕃ᵢ, Core._typevar, typevar_argtypes, pT)
+                    end
+                    return CallMeta(pT, exct, effects, call.info)
+                end
+            end
         end
-        return CallMeta(pT, exct, effects, call.info)
     elseif f === UnionAll
-        call = abstract_call_gf_by_type(interp, f, ArgInfo(nothing, Any[Const(UnionAll), Any, Any]), si, Tuple{Type{UnionAll}, Any, Any}, sv, max_methods)
-        return abstract_call_unionall(interp, argtypes, call)
+        let call = abstract_call_gf_by_type(interp, f, ArgInfo(nothing, Any[Const(UnionAll), Any, Any]), si, Tuple{Type{UnionAll}, Any, Any}, sv, max_methods)::Future
+            return Future{CallMeta}(call, interp, sv) do call, interp, sv
+                return abstract_call_unionall(interp, argtypes, call)
+            end
+        end
     elseif f === Tuple && la == 2
         aty = argtypes[2]
         ty = isvarargtype(aty) ? unwrapva(aty) : widenconst(aty)
         if !isconcretetype(ty)
-            return CallMeta(Tuple, Any, EFFECTS_UNKNOWN, NoCallInfo())
+            return Future(CallMeta(Tuple, Any, EFFECTS_UNKNOWN, NoCallInfo()))
         end
     elseif is_return_type(f)
         return return_type_tfunc(interp, argtypes, si, sv)
     elseif la == 3 && f === Core.:(!==)
         # mark !== as exactly a negated call to ===
-        call = abstract_call_gf_by_type(interp, f, ArgInfo(fargs, Any[Const(f), Any, Any]), si, Tuple{typeof(f), Any, Any}, sv, max_methods)
-        rty = abstract_call_known(interp, (===), arginfo, si, sv, max_methods).rt
-        if isa(rty, Conditional)
-            return CallMeta(Conditional(rty.slot, rty.elsetype, rty.thentype), Bottom, EFFECTS_TOTAL, NoCallInfo()) # swap if-else
-        elseif isa(rty, Const)
-            return CallMeta(Const(rty.val === false), Bottom, EFFECTS_TOTAL, MethodResultPure())
-        end
-        return call
+        let callfuture = abstract_call_gf_by_type(interp, f, ArgInfo(fargs, Any[Const(f), Any, Any]), si, Tuple{typeof(f), Any, Any}, sv, max_methods)::Future,
+            rtfuture = abstract_call_known(interp, (===), arginfo, si, sv, max_methods)::Future
+            return Future{CallMeta}(isready(callfuture) && isready(rtfuture), interp, sv) do interp, sv
+                local rty = rtfuture[].rt
+                if isa(rty, Conditional)
+                    return CallMeta(Conditional(rty.slot, rty.elsetype, rty.thentype), Bottom, EFFECTS_TOTAL, NoCallInfo()) # swap if-else
+                elseif isa(rty, Const)
+                    return CallMeta(Const(rty.val === false), Bottom, EFFECTS_TOTAL, MethodResultPure())
+                end
+                return callfuture[]
+            end
+        end
     elseif la == 3 && f === Core.:(>:)
         # mark issupertype as a exact alias for issubtype
         # swap T1 and T2 arguments and call <:
@@ -2350,12 +2432,12 @@ function abstract_call_known(interp::AbstractInterpreter, @nospecialize(f),
         argtypes = Any[typeof(<:), argtypes[3], argtypes[2]]
         return abstract_call_known(interp, <:, ArgInfo(fargs, argtypes), si, sv, max_methods)
     elseif la == 2 && f === Core.typename
-        return CallMeta(typename_static(argtypes[2]), Bottom, EFFECTS_TOTAL, MethodResultPure())
+        return Future(CallMeta(typename_static(argtypes[2]), Bottom, EFFECTS_TOTAL, MethodResultPure()))
     elseif f === Core._hasmethod
-        return _hasmethod_tfunc(interp, argtypes, sv)
+        return Future(_hasmethod_tfunc(interp, argtypes, sv))
     end
     atype = argtypes_to_type(argtypes)
-    return abstract_call_gf_by_type(interp, f, arginfo, si, atype, sv, max_methods)
+    return abstract_call_gf_by_type(interp, f, arginfo, si, atype, sv, max_methods)::Future
 end
 
 function abstract_call_opaque_closure(interp::AbstractInterpreter,
@@ -2364,40 +2446,44 @@ function abstract_call_opaque_closure(interp::AbstractInterpreter,
     tt = closure.typ
     ocargsig = rewrap_unionall((unwrap_unionall(tt)::DataType).parameters[1], tt)
     ocargsig′ = unwrap_unionall(ocargsig)
-    ocargsig′ isa DataType || return CallMeta(Any, Any, Effects(), NoCallInfo())
+    ocargsig′ isa DataType || return Future(CallMeta(Any, Any, Effects(), NoCallInfo()))
     ocsig = rewrap_unionall(Tuple{Tuple, ocargsig′.parameters...}, ocargsig)
-    hasintersect(sig, ocsig) || return CallMeta(Union{}, Union{MethodError,TypeError}, EFFECTS_THROWS, NoCallInfo())
+    hasintersect(sig, ocsig) || return Future(CallMeta(Union{}, Union{MethodError,TypeError}, EFFECTS_THROWS, NoCallInfo()))
     ocmethod = closure.source::Method
-    result = abstract_call_method(interp, ocmethod, sig, Core.svec(), false, si, sv)
-    (; rt, exct, edge, effects, volatile_inf_result) = result
     match = MethodMatch(sig, Core.svec(), ocmethod, sig <: ocsig)
-    𝕃ₚ = ipo_lattice(interp)
-    ⊑, ⋤, ⊔ = partialorder(𝕃ₚ), strictneqpartialorder(𝕃ₚ), join(𝕃ₚ)
-    const_result = volatile_inf_result
-    if !result.edgecycle
-        const_call_result = abstract_call_method_with_const_args(interp, result,
-            nothing, arginfo, si, match, sv)
-        if const_call_result !== nothing
-            if const_call_result.rt ⊑ rt
-                (; rt, effects, const_result, edge) = const_call_result
-            end
-            if const_call_result.exct ⋤ exct
-                (; exct, const_result, edge) = const_call_result
+    mresult = abstract_call_method(interp, ocmethod, sig, Core.svec(), false, si, sv)
+    ocsig_box = Core.Box(ocsig)
+    return Future{CallMeta}(mresult, interp, sv) do result, interp, sv
+        (; rt, exct, edge, effects, volatile_inf_result, edgecycle) = result
+        𝕃ₚ = ipo_lattice(interp)
+        ⊑, ⋤, ⊔ = partialorder(𝕃ₚ), strictneqpartialorder(𝕃ₚ), join(𝕃ₚ)
+        const_result = volatile_inf_result
+        if !edgecycle
+            const_call_result = abstract_call_method_with_const_args(interp, result,
+                nothing, arginfo, si, match, sv)
+            if const_call_result !== nothing
+                if const_call_result.rt ⊑ rt
+                    (; rt, effects, const_result, edge) = const_call_result
+                end
+                if const_call_result.exct ⋤ exct
+                    (; exct, const_result, edge) = const_call_result
+                end
             end
         end
-    end
-    if check # analyze implicit type asserts on argument and return type
-        rty = (unwrap_unionall(tt)::DataType).parameters[2]
-        rty = rewrap_unionall(rty isa TypeVar ? rty.ub : rty, tt)
-        if !(rt ⊑ rty && sig ⊑ ocsig)
-            effects = Effects(effects; nothrow=false)
-            exct = exct ⊔ TypeError
+        if check # analyze implicit type asserts on argument and return type
+            ftt = closure.typ
+            rty = (unwrap_unionall(ftt)::DataType).parameters[2]
+            rty = rewrap_unionall(rty isa TypeVar ? rty.ub : rty, ftt)
+            if !(rt ⊑ rty && sig ⊑ ocsig_box.contents)
+                effects = Effects(effects; nothrow=false)
+                exct = exct ⊔ TypeError
+            end
         end
+        rt = from_interprocedural!(interp, rt, sv, arginfo, match.spec_types)
+        info = OpaqueClosureCallInfo(match, const_result)
+        edge !== nothing && add_backedge!(sv, edge)
+        return CallMeta(rt, exct, effects, info)
     end
-    rt = from_interprocedural!(interp, rt, sv, arginfo, match.spec_types)
-    info = OpaqueClosureCallInfo(match, const_result)
-    edge !== nothing && add_backedge!(sv, edge)
-    return CallMeta(rt, exct, effects, info)
 end
 
 function most_general_argtypes(closure::PartialOpaque)
@@ -2422,17 +2508,17 @@ function abstract_call_unknown(interp::AbstractInterpreter, @nospecialize(ft),
     wft = widenconst(ft)
     if hasintersect(wft, Builtin)
         add_remark!(interp, sv, "Could not identify method table for call")
-        return CallMeta(Any, Any, Effects(), NoCallInfo())
+        return Future(CallMeta(Any, Any, Effects(), NoCallInfo()))
     elseif hasintersect(wft, Core.OpaqueClosure)
         uft = unwrap_unionall(wft)
         if isa(uft, DataType)
-            return CallMeta(rewrap_unionall(uft.parameters[2], wft), Any, Effects(), NoCallInfo())
+            return Future(CallMeta(rewrap_unionall(uft.parameters[2], wft), Any, Effects(), NoCallInfo()))
         end
-        return CallMeta(Any, Any, Effects(), NoCallInfo())
+        return Future(CallMeta(Any, Any, Effects(), NoCallInfo()))
     end
     # non-constant function, but the number of arguments is known and the `f` is not a builtin or intrinsic
     atype = argtypes_to_type(arginfo.argtypes)
-    return abstract_call_gf_by_type(interp, nothing, arginfo, si, atype, sv, max_methods)
+    return abstract_call_gf_by_type(interp, nothing, arginfo, si, atype, sv, max_methods)::Future
 end
 
 # call where the function is any lattice element
@@ -2503,7 +2589,7 @@ function abstract_eval_cfunction(interp::AbstractInterpreter, e::Expr, vtypes::U
     # this may be the wrong world for the call,
     # but some of the result is likely to be valid anyways
     # and that may help generate better codegen
-    abstract_call(interp, ArgInfo(nothing, at), StmtInfo(false), sv)
+    abstract_call(interp, ArgInfo(nothing, at), StmtInfo(false), sv)::Future
     rt = e.args[1]
     isa(rt, Type) || (rt = Any)
     return RTEffects(rt, Any, EFFECTS_UNKNOWN)
@@ -2544,6 +2630,7 @@ function abstract_eval_value_expr(interp::AbstractInterpreter, e::Expr, sv::AbsI
         # TODO: We still have non-linearized cglobal
         @assert e.args[1] === Core.tuple || e.args[1] === GlobalRef(Core, :tuple)
     else
+        @assert e.head !== :(=)
         # Some of our tests expect us to handle invalid IR here and error later
         # - permit that for now.
         # @assert false "Unexpected EXPR head in value position"
@@ -2592,8 +2679,13 @@ function abstract_call(interp::AbstractInterpreter, arginfo::ArgInfo, sv::Infere
         add_curr_ssaflag!(sv, IR_FLAG_UNUSED)
     end
     si = StmtInfo(!unused)
-    call = abstract_call(interp, arginfo, si, sv)
-    sv.stmt_info[sv.currpc] = call.info
+    call = abstract_call(interp, arginfo, si, sv)::Future
+    Future{Nothing}(call, interp, sv) do call, interp, sv
+        # this only is needed for the side-effect, sequenced before any task tries to consume the return value,
+        # which this will do even without returning this Future
+        sv.stmt_info[sv.currpc] = call.info
+        nothing
+    end
     return call
 end
 
@@ -2602,11 +2694,14 @@ function abstract_eval_call(interp::AbstractInterpreter, e::Expr, vtypes::Union{
     ea = e.args
     argtypes = collect_argtypes(interp, ea, vtypes, sv)
     if argtypes === nothing
-        return RTEffects(Bottom, Any, Effects())
+        return Future(RTEffects(Bottom, Any, Effects()))
     end
     arginfo = ArgInfo(ea, argtypes)
-    (; rt, exct, effects, refinements) = abstract_call(interp, arginfo, sv)
-    return RTEffects(rt, exct, effects, refinements)
+    call = abstract_call(interp, arginfo, sv)::Future
+    return Future{RTEffects}(call, interp, sv) do call, interp, sv
+        (; rt, exct, effects, refinements) = call
+        return RTEffects(rt, exct, effects, refinements)
+    end
 end
 
 function abstract_eval_new(interp::AbstractInterpreter, e::Expr, vtypes::Union{VarTable,Nothing},
@@ -2736,12 +2831,15 @@ function abstract_eval_new_opaque_closure(interp::AbstractInterpreter, e::Expr,
                 argtypes = most_general_argtypes(rt)
                 pushfirst!(argtypes, rt.env)
                 callinfo = abstract_call_opaque_closure(interp, rt,
-                    ArgInfo(nothing, argtypes), StmtInfo(true), sv, #=check=#false)
-                sv.stmt_info[sv.currpc] = OpaqueClosureCreateInfo(callinfo)
+                    ArgInfo(nothing, argtypes), StmtInfo(true), sv, #=check=#false)::Future
+                Future{Nothing}(callinfo, interp, sv) do callinfo, interp, sv
+                    sv.stmt_info[sv.currpc] = OpaqueClosureCreateInfo(callinfo)
+                    nothing
+                end
             end
         end
     end
-    return RTEffects(rt, Any, effects)
+    return Future(RTEffects(rt, Any, effects))
 end
 
 function abstract_eval_copyast(interp::AbstractInterpreter, e::Expr, vtypes::Union{VarTable,Nothing},
@@ -2837,7 +2935,7 @@ function abstract_eval_static_parameter(::AbstractInterpreter, e::Expr, sv::AbsI
 end
 
 function abstract_eval_statement_expr(interp::AbstractInterpreter, e::Expr, vtypes::Union{VarTable,Nothing},
-                                      sv::AbsIntState)
+                                      sv::AbsIntState)::Future{RTEffects}
     ehead = e.head
     if ehead === :call
         return abstract_eval_call(interp, e, vtypes, sv)
@@ -2935,43 +3033,7 @@ function stmt_taints_inbounds_consistency(sv::AbsIntState)
     return has_curr_ssaflag(sv, IR_FLAG_INBOUNDS)
 end
 
-function abstract_eval_statement(interp::AbstractInterpreter, @nospecialize(e), vtypes::VarTable, sv::InferenceState)
-    if !isa(e, Expr)
-        if isa(e, PhiNode)
-            add_curr_ssaflag!(sv, IR_FLAGS_REMOVABLE)
-            # Implement convergence for PhiNodes. In particular, PhiNodes need to tmerge over
-            # the incoming values from all iterations, but `abstract_eval_phi` will only tmerge
-            # over the first and last iterations. By tmerging in the current old_rt, we ensure that
-            # we will not lose an intermediate value.
-            rt = abstract_eval_phi(interp, e, vtypes, sv)
-            old_rt = sv.ssavaluetypes[sv.currpc]
-            rt = old_rt === NOT_FOUND ? rt : tmerge(typeinf_lattice(interp), old_rt, rt)
-            return RTEffects(rt, Union{}, EFFECTS_TOTAL)
-        end
-        (; rt, exct, effects, refinements) = abstract_eval_special_value(interp, e, vtypes, sv)
-    else
-        (; rt, exct, effects, refinements) = abstract_eval_statement_expr(interp, e, vtypes, sv)
-        if effects.noub === NOUB_IF_NOINBOUNDS
-            if has_curr_ssaflag(sv, IR_FLAG_INBOUNDS)
-                effects = Effects(effects; noub=ALWAYS_FALSE)
-            elseif !propagate_inbounds(sv)
-                # The callee read our inbounds flag, but unless we propagate inbounds,
-                # we ourselves don't read our parent's inbounds.
-                effects = Effects(effects; noub=ALWAYS_TRUE)
-            end
-        end
-        e = e::Expr
-        @assert !isa(rt, TypeVar) "unhandled TypeVar"
-        rt = maybe_singleton_const(rt)
-        if !isempty(sv.pclimitations)
-            if rt isa Const || rt === Union{}
-                empty!(sv.pclimitations)
-            else
-                rt = LimitedAccuracy(rt, sv.pclimitations)
-                sv.pclimitations = IdSet{InferenceState}()
-            end
-        end
-    end
+function merge_override_effects!(interp::AbstractInterpreter, effects::Effects, sv::InferenceState)
     # N.B.: This only applies to the effects of the statement itself.
     # It is possible for arguments (GlobalRef/:static_parameter) to throw,
     # but these will be recomputed during SSA construction later.
@@ -2979,8 +3041,11 @@ function abstract_eval_statement(interp::AbstractInterpreter, @nospecialize(e),
     effects = override_effects(effects, override)
     set_curr_ssaflag!(sv, flags_for_effects(effects), IR_FLAGS_EFFECTS)
     merge_effects!(interp, sv, effects)
+    return effects
+end
 
-    return RTEffects(rt, exct, effects, refinements)
+function abstract_eval_statement(interp::AbstractInterpreter, @nospecialize(e), vtypes::VarTable, sv::InferenceState)
+    @assert !isa(e, Union{Expr, PhiNode, NewvarNode})
 end
 
 function override_effects(effects::Effects, override::EffectsOverride)
@@ -3226,60 +3291,6 @@ function handle_control_backedge!(interp::AbstractInterpreter, frame::InferenceS
     return nothing
 end
 
-struct BasicStmtChange
-    changes::Union{Nothing,StateUpdate}
-    rt::Any # extended lattice element or `nothing` - `nothing` if this statement may not be used as an SSA Value
-    exct::Any
-    # TODO effects::Effects
-    refinements # ::Union{Nothing,SlotRefinement,Vector{Any}}
-    function BasicStmtChange(changes::Union{Nothing,StateUpdate}, rt::Any, exct::Any,
-                             refinements=nothing)
-        @nospecialize rt exct refinements
-        return new(changes, rt, exct, refinements)
-    end
-end
-
-@inline function abstract_eval_basic_statement(interp::AbstractInterpreter,
-    @nospecialize(stmt), pc_vartable::VarTable, frame::InferenceState)
-    if isa(stmt, NewvarNode)
-        changes = StateUpdate(stmt.slot, VarState(Bottom, true))
-        return BasicStmtChange(changes, nothing, Union{})
-    elseif !isa(stmt, Expr)
-        (; rt, exct) = abstract_eval_statement(interp, stmt, pc_vartable, frame)
-        return BasicStmtChange(nothing, rt, exct)
-    end
-    changes = nothing
-    hd = stmt.head
-    if hd === :(=)
-        (; rt, exct, refinements) = abstract_eval_statement(interp, stmt.args[2], pc_vartable, frame)
-        if rt === Bottom
-            return BasicStmtChange(nothing, Bottom, exct, refinements)
-        end
-        lhs = stmt.args[1]
-        if isa(lhs, SlotNumber)
-            changes = StateUpdate(lhs, VarState(rt, false))
-        elseif isa(lhs, GlobalRef)
-            handle_global_assignment!(interp, frame, lhs, rt)
-        elseif !isa(lhs, SSAValue)
-            merge_effects!(interp, frame, EFFECTS_UNKNOWN)
-        end
-        return BasicStmtChange(changes, rt, exct, refinements)
-    elseif hd === :method
-        fname = stmt.args[1]
-        if isa(fname, SlotNumber)
-            changes = StateUpdate(fname, VarState(Any, false))
-        end
-        return BasicStmtChange(changes, nothing, Union{})
-    elseif (hd === :code_coverage_effect || (
-            hd !== :boundscheck && # :boundscheck can be narrowed to Bool
-            is_meta_expr(stmt)))
-        return BasicStmtChange(nothing, Nothing, Bottom)
-    else
-        (; rt, exct, refinements) = abstract_eval_statement(interp, stmt, pc_vartable, frame)
-        return BasicStmtChange(nothing, rt, exct, refinements)
-    end
-end
-
 function update_bbstate!(𝕃ᵢ::AbstractLattice, frame::InferenceState, bb::Int, vartable::VarTable)
     bbtable = frame.bb_vartables[bb]
     if bbtable === nothing
@@ -3379,27 +3390,45 @@ function update_cycle_worklists!(callback, frame::InferenceState)
 end
 
 # make as much progress on `frame` as possible (without handling cycles)
-function typeinf_local(interp::AbstractInterpreter, frame::InferenceState)
+struct CurrentState
+    result::Future
+    currstate::VarTable
+    bbstart::Int
+    bbend::Int
+    CurrentState(result::Future, currstate::VarTable, bbstart::Int, bbend::Int) = new(result, currstate, bbstart, bbend)
+    CurrentState() = new()
+end
+function typeinf_local(interp::AbstractInterpreter, frame::InferenceState, nextresult::CurrentState)
     @assert !is_inferred(frame)
     W = frame.ip
     ssavaluetypes = frame.ssavaluetypes
     bbs = frame.cfg.blocks
     nbbs = length(bbs)
     𝕃ᵢ = typeinf_lattice(interp)
-
+    states = frame.bb_vartables
     currbb = frame.currbb
+    currpc = frame.currpc
+
+    if isdefined(nextresult, :result)
+        # for reasons that are fairly unclear, some state is arbitrarily on the stack instead in the InferenceState as normal
+        bbstart = nextresult.bbstart
+        bbend = nextresult.bbend
+        currstate = nextresult.currstate
+        @goto injectresult
+    end
+
     if currbb != 1
         currbb = frame.currbb = _bits_findnext(W.bits, 1)::Int # next basic block
     end
-
-    states = frame.bb_vartables
     currstate = copy(states[currbb]::VarTable)
     while currbb <= nbbs
         delete!(W, currbb)
         bbstart = first(bbs[currbb].stmts)
         bbend = last(bbs[currbb].stmts)
 
-        for currpc in bbstart:bbend
+        currpc = bbstart - 1
+        while currpc < bbend
+            currpc += 1
             frame.currpc = currpc
             empty_backedges!(frame, currpc)
             stmt = frame.src.code[currpc]
@@ -3511,14 +3540,14 @@ function typeinf_local(interp::AbstractInterpreter, frame::InferenceState)
                             return caller.ssavaluetypes[caller_pc] !== Any
                         end
                     end
-                    ssavaluetypes[frame.currpc] = Any
+                    ssavaluetypes[currpc] = Any
                     @goto find_next_bb
                 elseif isa(stmt, EnterNode)
                     ssavaluetypes[currpc] = Any
                     add_curr_ssaflag!(frame, IR_FLAG_NOTHROW)
                     if isdefined(stmt, :scope)
                         scopet = abstract_eval_value(interp, stmt.scope, currstate, frame)
-                        handler = gethandler(frame, frame.currpc+1)::TryCatchFrame
+                        handler = gethandler(frame, currpc + 1)::TryCatchFrame
                         @assert handler.scopet !== nothing
                         if !⊑(𝕃ᵢ, scopet, handler.scopet)
                             handler.scopet = tmerge(𝕃ᵢ, scopet, handler.scopet)
@@ -3537,8 +3566,91 @@ function typeinf_local(interp::AbstractInterpreter, frame::InferenceState)
                 # Fall through terminator - treat as regular stmt
             end
             # Process non control-flow statements
-            (; changes, rt, exct, refinements) = abstract_eval_basic_statement(interp,
-                stmt, currstate, frame)
+            @assert isempty(frame.tasks)
+            rt = nothing
+            exct = Bottom
+            changes = nothing
+            refinements = nothing
+            effects = nothing
+            if isa(stmt, NewvarNode)
+                changes = StateUpdate(stmt.slot, VarState(Bottom, true))
+            elseif isa(stmt, PhiNode)
+                add_curr_ssaflag!(frame, IR_FLAGS_REMOVABLE)
+                # Implement convergence for PhiNodes. In particular, PhiNodes need to tmerge over
+                # the incoming values from all iterations, but `abstract_eval_phi` will only tmerge
+                # over the first and last iterations. By tmerging in the current old_rt, we ensure that
+                # we will not lose an intermediate value.
+                rt = abstract_eval_phi(interp, stmt, currstate, frame)
+                old_rt = frame.ssavaluetypes[currpc]
+                rt = old_rt === NOT_FOUND ? rt : tmerge(typeinf_lattice(interp), old_rt, rt)
+            else
+                lhs = nothing
+                if isexpr(stmt, :(=))
+                    lhs = stmt.args[1]
+                    stmt = stmt.args[2]
+                end
+                if !isa(stmt, Expr)
+                    (; rt, exct, effects, refinements) = abstract_eval_special_value(interp, stmt, currstate, frame)
+                else
+                    hd = stmt.head
+                    if hd === :method
+                        fname = stmt.args[1]
+                        if isa(fname, SlotNumber)
+                            changes = StateUpdate(fname, VarState(Any, false))
+                        end
+                    elseif (hd === :code_coverage_effect || (
+                            hd !== :boundscheck && # :boundscheck can be narrowed to Bool
+                            is_meta_expr(stmt)))
+                        rt = Nothing
+                    else
+                        result = abstract_eval_statement_expr(interp, stmt, currstate, frame)::Future
+                        if !isready(result) || !isempty(frame.tasks)
+                            return CurrentState(result, currstate, bbstart, bbend)
+                            @label injectresult
+                            # reload local variables
+                            stmt = frame.src.code[currpc]
+                            changes = nothing
+                            lhs = nothing
+                            if isexpr(stmt, :(=))
+                                lhs = stmt.args[1]
+                                stmt = stmt.args[2]
+                            end
+                            result = nextresult.result::Future{RTEffects}
+                        end
+                        result = result[]
+                        (; rt, exct, effects, refinements) = result
+                        if effects.noub === NOUB_IF_NOINBOUNDS
+                            if has_curr_ssaflag(frame, IR_FLAG_INBOUNDS)
+                                effects = Effects(effects; noub=ALWAYS_FALSE)
+                            elseif !propagate_inbounds(frame)
+                                # The callee read our inbounds flag, but unless we propagate inbounds,
+                                # we ourselves don't read our parent's inbounds.
+                                effects = Effects(effects; noub=ALWAYS_TRUE)
+                            end
+                        end
+                        @assert !isa(rt, TypeVar) "unhandled TypeVar"
+                        rt = maybe_singleton_const(rt)
+                        if !isempty(frame.pclimitations)
+                            if rt isa Const || rt === Union{}
+                                empty!(frame.pclimitations)
+                            else
+                                rt = LimitedAccuracy(rt, frame.pclimitations)
+                                frame.pclimitations = IdSet{InferenceState}()
+                            end
+                        end
+                    end
+                end
+                effects === nothing || merge_override_effects!(interp, effects, frame)
+                if lhs !== nothing && rt !== Bottom
+                    if isa(lhs, SlotNumber)
+                        changes = StateUpdate(lhs, VarState(rt, false))
+                    elseif isa(lhs, GlobalRef)
+                        handle_global_assignment!(interp, frame, lhs, rt)
+                    elseif !isa(lhs, SSAValue)
+                        merge_effects!(interp, frame, EFFECTS_UNKNOWN)
+                    end
+                end
+            end
             if !has_curr_ssaflag(frame, IR_FLAG_NOTHROW)
                 if exct !== Union{}
                     update_exc_bestguess!(interp, exct, frame)
@@ -3601,7 +3713,7 @@ function typeinf_local(interp::AbstractInterpreter, frame::InferenceState)
         end
     end # while currbb <= nbbs
 
-    nothing
+    return CurrentState()
 end
 
 function apply_refinement!(𝕃ᵢ::AbstractLattice, slot::SlotNumber, @nospecialize(newtyp),
@@ -3652,31 +3764,81 @@ function condition_object_change(currstate::VarTable, condt::Conditional,
 end
 
 # make as much progress on `frame` as possible (by handling cycles)
-function typeinf_nocycle(interp::AbstractInterpreter, frame::InferenceState)
-    typeinf_local(interp, frame)
-    @assert isempty(frame.ip)
+warnlength::Int = 2500
+function typeinf(interp::AbstractInterpreter, frame::InferenceState)
     callstack = frame.callstack::Vector{AbsIntState}
-    frame.cycleid == length(callstack) && return true
-
-    no_active_ips_in_callers = false
-    while true
-        # If the current frame is not the top part of a cycle, continue to the top of the cycle before resuming work
-        frame.cycleid == frame.frameid || return false
-        # If done, return and finalize this cycle
-        no_active_ips_in_callers && return true
-        # Otherwise, do at least one iteration over the entire current cycle
-        no_active_ips_in_callers = true
-        for i = reverse(frame.cycleid:length(callstack))
-            caller = callstack[i]::InferenceState
-            if !isempty(caller.ip)
-                # Note that `typeinf_local(interp, caller)` can potentially modify the other frames
-                # `frame.cycleid`, which is why making incremental progress requires the
-                # outer while loop.
-                typeinf_local(interp, caller)
-                no_active_ips_in_callers = false
-            end
-            update_valid_age!(caller, frame.valid_worlds)
+    nextstates = CurrentState[]
+    takenext = frame.frameid
+    minwarn = warnlength
+    takeprev = 0
+    while takenext >= frame.frameid
+        callee = takenext == 0 ? frame : callstack[takenext]::InferenceState
+        if !isempty(callstack)
+            if length(callstack) - frame.frameid >= minwarn
+                topmethod = callstack[1].linfo
+                topmethod.def isa Method || (topmethod = callstack[2].linfo)
+                print(Core.stderr, "info: inference of ", topmethod, " exceeding ", length(callstack), " frames (may be slow).\n")
+                minwarn *= 2
+            end
+            topcallee = (callstack[end]::InferenceState)
+            if topcallee.cycleid != callee.cycleid
+                callee = topcallee
+                takenext = length(callstack)
+            end
+        end
+        nextstateid = takenext + 1 - frame.frameid
+        while length(nextstates) < nextstateid
+            push!(nextstates, CurrentState())
+        end
+        if doworkloop(interp, callee)
+            # First drain the workloop. Note that since some scheduled work doesn't
+            # affect the result (e.g. cfunction or abstract_call_method on
+            # get_compileable_sig), but still must be finished up since it may see and
+            # change the local variables of the InferenceState at currpc, we do this
+            # even if the nextresult status is already completed.
+            continue
+        elseif isdefined(nextstates[nextstateid], :result) || !isempty(callee.ip)
+            # Next make progress on this frame
+            prev = length(callee.tasks) + 1
+            nextstates[nextstateid] = typeinf_local(interp, callee, nextstates[nextstateid])
+            reverse!(callee.tasks, prev)
+        elseif callee.cycleid == length(callstack)
+            # With no active ip's and no cycles, frame is done
+            finish_nocycle(interp, callee)
+            callee.frameid == 0 && break
+            takenext = length(callstack)
+            nextstateid = takenext + 1 - frame.frameid
+            #@assert length(nextstates) == nextstateid + 1
+            #@assert all(i -> !isdefined(nextstates[i], :result), nextstateid+1:length(nextstates))
+            resize!(nextstates, nextstateid)
+        elseif callee.cycleid == callee.frameid
+            # If the current frame is the top part of a cycle, check if the whole cycle
+            # is done, and if not, pick the next item to work on.
+            no_active_ips_in_cycle = true
+            for i = callee.cycleid:length(callstack)
+                caller = callstack[i]::InferenceState
+                @assert caller.cycleid == callee.cycleid
+                if !isempty(caller.tasks) || isdefined(nextstates[i+1-frame.frameid], :result) || !isempty(caller.ip)
+                    no_active_ips_in_cycle = false
+                    break
+                end
+            end
+            if no_active_ips_in_cycle
+                finish_cycle(interp, callstack, callee.cycleid)
+            end
+            takenext = length(callstack)
+            nextstateid = takenext + 1 - frame.frameid
+            if no_active_ips_in_cycle
+                #@assert all(i -> !isdefined(nextstates[i], :result), nextstateid+1:length(nextstates))
+                resize!(nextstates, nextstateid)
+            else
+                #@assert length(nextstates) == nextstateid
+            end
+        else
+            # Continue to the next frame in this cycle
+            takenext = takenext - 1
         end
     end
-    return true
+    #@assert all(nextresult -> !isdefined(nextresult, :result), nextstates)
+    return is_inferred(frame)
 end
diff --git a/base/compiler/inferencestate.jl b/base/compiler/inferencestate.jl
index 6953dea5b9bd7..5f8fb82caaa34 100644
--- a/base/compiler/inferencestate.jl
+++ b/base/compiler/inferencestate.jl
@@ -236,7 +236,7 @@ mutable struct InferenceState
     slottypes::Vector{Any}
     src::CodeInfo
     cfg::CFG
-    method_info::MethodInfo
+    spec_info::SpecInfo
 
     #= intermediate states for local abstract interpretation =#
     currbb::Int
@@ -251,6 +251,7 @@ mutable struct InferenceState
     stmt_info::Vector{CallInfo}
 
     #= intermediate states for interprocedural abstract interpretation =#
+    tasks::Vector{WorkThunk}
     pclimitations::IdSet{InferenceState} # causes of precision restrictions (LimitedAccuracy) on currpc ssavalue
     limitations::IdSet{InferenceState} # causes of precision restrictions (LimitedAccuracy) on return
     cycle_backedges::Vector{Tuple{InferenceState, Int}} # call-graph backedges connecting from callee to caller
@@ -293,7 +294,7 @@ mutable struct InferenceState
         sptypes = sptypes_from_meth_instance(mi)
         code = src.code::Vector{Any}
         cfg = compute_basic_blocks(code)
-        method_info = MethodInfo(src)
+        spec_info = SpecInfo(src)
 
         currbb = currpc = 1
         ip = BitSet(1) # TODO BitSetBoundedMinPrioritySet(1)
@@ -328,6 +329,7 @@ mutable struct InferenceState
         limitations = IdSet{InferenceState}()
         cycle_backedges = Vector{Tuple{InferenceState,Int}}()
         callstack = AbsIntState[]
+        tasks = WorkThunk[]
 
         valid_worlds = WorldRange(1, get_world_counter())
         bestguess = Bottom
@@ -349,9 +351,9 @@ mutable struct InferenceState
         restrict_abstract_call_sites = isa(def, Module)
 
         this = new(
-            mi, world, mod, sptypes, slottypes, src, cfg, method_info,
+            mi, world, mod, sptypes, slottypes, src, cfg, spec_info,
             currbb, currpc, ip, handler_info, ssavalue_uses, bb_vartables, ssavaluetypes, stmt_edges, stmt_info,
-            pclimitations, limitations, cycle_backedges, callstack, 0, 0, 0,
+            tasks, pclimitations, limitations, cycle_backedges, callstack, 0, 0, 0,
             result, unreachable, valid_worlds, bestguess, exc_bestguess, ipo_effects,
             restrict_abstract_call_sites, cache_mode, insert_coverage,
             interp)
@@ -789,7 +791,7 @@ end
 
 # TODO add `result::InferenceResult` and put the irinterp result into the inference cache?
 mutable struct IRInterpretationState
-    const method_info::MethodInfo
+    const spec_info::SpecInfo
     const ir::IRCode
     const mi::MethodInstance
     const world::UInt
@@ -800,13 +802,14 @@ mutable struct IRInterpretationState
     const ssa_refined::BitSet
     const lazyreachability::LazyCFGReachability
     valid_worlds::WorldRange
+    const tasks::Vector{WorkThunk}
     const edges::Vector{Any}
     callstack #::Vector{AbsIntState}
     frameid::Int
     parentid::Int
 
     function IRInterpretationState(interp::AbstractInterpreter,
-        method_info::MethodInfo, ir::IRCode, mi::MethodInstance, argtypes::Vector{Any},
+        spec_info::SpecInfo, ir::IRCode, mi::MethodInstance, argtypes::Vector{Any},
         world::UInt, min_world::UInt, max_world::UInt)
         curridx = 1
         given_argtypes = Vector{Any}(undef, length(argtypes))
@@ -825,10 +828,11 @@ mutable struct IRInterpretationState
         ssa_refined = BitSet()
         lazyreachability = LazyCFGReachability(ir)
         valid_worlds = WorldRange(min_world, max_world == typemax(UInt) ? get_world_counter() : max_world)
+        tasks = WorkThunk[]
         edges = Any[]
         callstack = AbsIntState[]
-        return new(method_info, ir, mi, world, curridx, argtypes_refined, ir.sptypes, tpdum,
-                ssa_refined, lazyreachability, valid_worlds, edges, callstack, 0, 0)
+        return new(spec_info, ir, mi, world, curridx, argtypes_refined, ir.sptypes, tpdum,
+                ssa_refined, lazyreachability, valid_worlds, tasks, edges, callstack, 0, 0)
     end
 end
 
@@ -841,14 +845,13 @@ function IRInterpretationState(interp::AbstractInterpreter,
     else
         isa(src, CodeInfo) || return nothing
     end
-    method_info = MethodInfo(src)
+    spec_info = SpecInfo(src)
     ir = inflate_ir(src, mi)
     argtypes = va_process_argtypes(optimizer_lattice(interp), argtypes, src.nargs, src.isva)
-    return IRInterpretationState(interp, method_info, ir, mi, argtypes, world,
+    return IRInterpretationState(interp, spec_info, ir, mi, argtypes, world,
                                  codeinst.min_world, codeinst.max_world)
 end
 
-
 # AbsIntState
 # ===========
 
@@ -870,6 +873,7 @@ function print_callstack(frame::AbsIntState)
         print(frame_instance(sv))
         is_cached(sv) || print("  [uncached]")
         sv.parentid == idx - 1 || print(" [parent=", sv.parentid, "]")
+        isempty(callers_in_cycle(sv)) || print(" [cycle=", sv.cycleid, "]")
         println()
         @assert sv.frameid == idx
     end
@@ -922,11 +926,11 @@ is_constproped(::IRInterpretationState) = true
 is_cached(sv::InferenceState) = !iszero(sv.cache_mode & CACHE_MODE_GLOBAL)
 is_cached(::IRInterpretationState) = false
 
-method_info(sv::InferenceState) = sv.method_info
-method_info(sv::IRInterpretationState) = sv.method_info
+spec_info(sv::InferenceState) = sv.spec_info
+spec_info(sv::IRInterpretationState) = sv.spec_info
 
-propagate_inbounds(sv::AbsIntState) = method_info(sv).propagate_inbounds
-method_for_inference_limit_heuristics(sv::AbsIntState) = method_info(sv).method_for_inference_limit_heuristics
+propagate_inbounds(sv::AbsIntState) = spec_info(sv).propagate_inbounds
+method_for_inference_limit_heuristics(sv::AbsIntState) = spec_info(sv).method_for_inference_limit_heuristics
 
 frame_world(sv::InferenceState) = sv.world
 frame_world(sv::IRInterpretationState) = sv.world
@@ -994,7 +998,10 @@ of the same cycle, only if it is part of a cycle with multiple frames.
 function callers_in_cycle(sv::InferenceState)
     callstack = sv.callstack::Vector{AbsIntState}
     cycletop = cycleid = sv.cycleid
-    while cycletop < length(callstack) && (callstack[cycletop + 1]::InferenceState).cycleid == cycleid
+    while cycletop < length(callstack)
+        frame = callstack[cycletop + 1]
+        frame isa InferenceState || break
+        frame.cycleid == cycleid || break
         cycletop += 1
     end
     return AbsIntCycle(callstack, cycletop == cycleid ? 0 : cycleid, cycletop)
@@ -1054,6 +1061,7 @@ function merge_effects!(::AbstractInterpreter, caller::InferenceState, effects::
         effects = Effects(effects; effect_free=ALWAYS_TRUE)
     end
     caller.ipo_effects = merge_effects(caller.ipo_effects, effects)
+    nothing
 end
 merge_effects!(::AbstractInterpreter, ::IRInterpretationState, ::Effects) = return
 
@@ -1116,3 +1124,90 @@ function get_max_methods_for_module(mod::Module)
     max_methods < 0 && return nothing
     return max_methods
 end
+
+"""
+    Future{T}
+
+Delayed return value for a value of type `T`, similar to RefValue{T}, but
+explicitly represents completed as a `Bool` rather than as `isdefined`.
+Set once with `f[] = v` and accessed with `f[]` afterwards.
+
+Can also be constructed with the `completed` flag value and a closure to
+produce `x`, as well as the additional arguments to avoid always capturing the
+same couple of values.
+"""
+struct Future{T}
+    later::Union{Nothing,RefValue{T}}
+    now::Union{Nothing,T}
+    Future{T}() where {T} = new{T}(RefValue{T}(), nothing)
+    Future{T}(x) where {T} = new{T}(nothing, x)
+    Future(x::T) where {T} = new{T}(nothing, x)
+end
+isready(f::Future) = f.later === nothing
+getindex(f::Future{T}) where {T} = (later = f.later; later === nothing ? f.now::T : later[])
+setindex!(f::Future, v) = something(f.later)[] = v
+convert(::Type{Future{T}}, x) where {T} = Future{T}(x) # support return type conversion
+convert(::Type{Future{T}}, x::Future) where {T} = x::Future{T}
+function Future{T}(f, immediate::Bool, interp::AbstractInterpreter, sv::AbsIntState) where {T}
+    if immediate
+        return Future{T}(f(interp, sv))
+    else
+        @assert applicable(f, interp, sv)
+        result = Future{T}()
+        push!(sv.tasks, function (interp, sv)
+            result[] = f(interp, sv)
+            return true
+        end)
+        return result
+    end
+end
+function Future{T}(f, prev::Future{S}, interp::AbstractInterpreter, sv::AbsIntState) where {T, S}
+    later = prev.later
+    if later === nothing
+        return Future{T}(f(prev[], interp, sv))
+    else
+        @assert Core._hasmethod(Tuple{Core.Typeof(f), S, typeof(interp), typeof(sv)})
+        result = Future{T}()
+        push!(sv.tasks, function (interp, sv)
+            result[] = f(later[], interp, sv) # capture just later, instead of all of prev
+            return true
+        end)
+        return result
+    end
+end
+
+
+"""
+    doworkloop(args...)
+
+Run a tasks inside the abstract interpreter, returning false if there are none.
+Tasks will be run in DFS post-order tree order, such that all child tasks will
+be run in the order scheduled, prior to running any subsequent tasks. This
+allows tasks to generate more child tasks, which will be run before anything else.
+Each task will be run repeatedly when returning `false`, until it returns `true`.
+"""
+function doworkloop(interp::AbstractInterpreter, sv::AbsIntState)
+    tasks = sv.tasks
+    prev = length(tasks)
+    prev == 0 && return false
+    task = pop!(tasks)
+    completed = task(interp, sv)
+    tasks = sv.tasks # allow dropping gc root over the previous call
+    completed isa Bool || throw(TypeError(:return, "", Bool, task)) # print the task on failure as part of the error message, instead of just "@ workloop:line"
+    completed || push!(tasks, task)
+    # efficient post-order visitor: items pushed are executed in reverse post order such
+    # that later items are executed before earlier ones, but are fully executed
+    # (including any dependencies scheduled by them) before going on to the next item
+    reverse!(tasks, #=start=#prev)
+    return true
+end
+
+
+#macro workthunk(name::Symbol, body)
+#    name = esc(name)
+#    body = esc(body)
+#    return replace_linenums!(
+#        :(function $name($(esc(interp)), $(esc(sv)))
+#              $body
+#          end), __source__)
+#end
diff --git a/base/compiler/optimize.jl b/base/compiler/optimize.jl
index 6b0cf981930ad..02f6b46e2e73f 100644
--- a/base/compiler/optimize.jl
+++ b/base/compiler/optimize.jl
@@ -644,10 +644,10 @@ function ((; code_cache)::GetNativeEscapeCache)(mi::MethodInstance)
     return false
 end
 
-function refine_effects!(interp::AbstractInterpreter, sv::PostOptAnalysisState)
+function refine_effects!(interp::AbstractInterpreter, opt::OptimizationState, sv::PostOptAnalysisState)
     if !is_effect_free(sv.result.ipo_effects) && sv.all_effect_free && !isempty(sv.ea_analysis_pending)
         ir = sv.ir
-        nargs = length(ir.argtypes)
+        nargs = Int(opt.src.nargs)
         estate = EscapeAnalysis.analyze_escapes(ir, nargs, optimizer_lattice(interp), GetNativeEscapeCache(interp))
         argescapes = EscapeAnalysis.ArgEscapeCache(estate)
         stack_analysis_result!(sv.result, argescapes)
@@ -939,7 +939,8 @@ function check_inconsistentcy!(sv::PostOptAnalysisState, scanner::BBScanner)
     end
 end
 
-function ipo_dataflow_analysis!(interp::AbstractInterpreter, ir::IRCode, result::InferenceResult)
+function ipo_dataflow_analysis!(interp::AbstractInterpreter, opt::OptimizationState,
+                                ir::IRCode, result::InferenceResult)
     if !is_ipo_dataflow_analysis_profitable(result.ipo_effects)
         return false
     end
@@ -967,13 +968,13 @@ function ipo_dataflow_analysis!(interp::AbstractInterpreter, ir::IRCode, result:
         end
     end
 
-    return refine_effects!(interp, sv)
+    return refine_effects!(interp, opt, sv)
 end
 
 # run the optimization work
 function optimize(interp::AbstractInterpreter, opt::OptimizationState, caller::InferenceResult)
-    @timeit "optimizer" ir = run_passes_ipo_safe(opt.src, opt, caller)
-    ipo_dataflow_analysis!(interp, ir, caller)
+    @timeit "optimizer" ir = run_passes_ipo_safe(opt.src, opt)
+    ipo_dataflow_analysis!(interp, opt, ir, caller)
     return finish(interp, opt, ir, caller)
 end
 
@@ -995,7 +996,6 @@ matchpass(::Nothing, _, _) = false
 function run_passes_ipo_safe(
     ci::CodeInfo,
     sv::OptimizationState,
-    caller::InferenceResult,
     optimize_until = nothing,  # run all passes by default
 )
     __stage__ = 0  # used by @pass
diff --git a/base/compiler/ssair/inlining.jl b/base/compiler/ssair/inlining.jl
index 727e015b67062..5017b619469ff 100644
--- a/base/compiler/ssair/inlining.jl
+++ b/base/compiler/ssair/inlining.jl
@@ -12,6 +12,8 @@ struct InliningTodo
     mi::MethodInstance
     # The IR of the inlinee
     ir::IRCode
+    # The SpecInfo for the inlinee
+    spec_info::SpecInfo
     # The DebugInfo table for the inlinee
     di::DebugInfo
     # If the function being inlined is a single basic block we can use a
@@ -20,8 +22,8 @@ struct InliningTodo
     # Effects of the call statement
     effects::Effects
 end
-function InliningTodo(mi::MethodInstance, (ir, di)::Tuple{IRCode, DebugInfo}, effects::Effects)
-    return InliningTodo(mi, ir, di, linear_inline_eligible(ir), effects)
+function InliningTodo(mi::MethodInstance, ir::IRCode, spec_info::SpecInfo, di::DebugInfo, effects::Effects)
+    return InliningTodo(mi, ir, spec_info, di, linear_inline_eligible(ir), effects)
 end
 
 struct ConstantCase
@@ -321,7 +323,8 @@ function ir_inline_linetable!(debuginfo::DebugInfoStream, inlinee_debuginfo::Deb
 end
 
 function ir_prepare_inlining!(insert_node!::Inserter, inline_target::Union{IRCode, IncrementalCompact},
-                              ir::IRCode, di::DebugInfo, mi::MethodInstance, inlined_at::NTuple{3,Int32}, argexprs::Vector{Any})
+                              ir::IRCode, spec_info::SpecInfo, di::DebugInfo, mi::MethodInstance,
+                              inlined_at::NTuple{3,Int32}, argexprs::Vector{Any})
     def = mi.def::Method
     debuginfo = inline_target isa IRCode ? inline_target.debuginfo : inline_target.ir.debuginfo
     topline = new_inlined_at = ir_inline_linetable!(debuginfo, di, inlined_at)
@@ -334,8 +337,8 @@ function ir_prepare_inlining!(insert_node!::Inserter, inline_target::Union{IRCod
         spvals_ssa = insert_node!(
             removable_if_unused(NewInstruction(Expr(:call, Core._compute_sparams, def, argexprs...), SimpleVector, topline)))
     end
-    if def.isva
-        nargs_def = Int(def.nargs::Int32)
+    if spec_info.isva
+        nargs_def = spec_info.nargs
         if nargs_def > 0
             argexprs = fix_va_argexprs!(insert_node!, inline_target, argexprs, nargs_def, topline)
         end
@@ -362,7 +365,7 @@ function ir_inline_item!(compact::IncrementalCompact, idx::Int, argexprs::Vector
                          item::InliningTodo, boundscheck::Symbol, todo_bbs::Vector{Tuple{Int, Int}})
     # Ok, do the inlining here
     inlined_at = compact.result[idx][:line]
-    ssa_substitute = ir_prepare_inlining!(InsertHere(compact), compact, item.ir, item.di, item.mi, inlined_at, argexprs)
+    ssa_substitute = ir_prepare_inlining!(InsertHere(compact), compact, item.ir, item.spec_info, item.di, item.mi, inlined_at, argexprs)
     boundscheck = has_flag(compact.result[idx], IR_FLAG_INBOUNDS) ? :off : boundscheck
 
     # If the iterator already moved on to the next basic block,
@@ -860,15 +863,14 @@ function resolve_todo(mi::MethodInstance, result::Union{Nothing,InferenceResult,
     if inferred_result isa ConstantCase
         add_inlining_backedge!(et, mi)
         return inferred_result
-    end
-    if inferred_result isa InferredResult
+    elseif inferred_result isa InferredResult
         (; src, effects) = inferred_result
     elseif inferred_result isa CodeInstance
         src = @atomic :monotonic inferred_result.inferred
         effects = decode_effects(inferred_result.ipo_purity_bits)
-    else
-        src = nothing
-        effects = Effects()
+    else # there is no cached source available, bail out
+        return compileable_specialization(mi, Effects(), et, info;
+            compilesig_invokes=OptimizationParams(state.interp).compilesig_invokes)
     end
 
     # the duplicated check might have been done already within `analyze_method!`, but still
@@ -883,9 +885,12 @@ function resolve_todo(mi::MethodInstance, result::Union{Nothing,InferenceResult,
             compilesig_invokes=OptimizationParams(state.interp).compilesig_invokes)
 
     add_inlining_backedge!(et, mi)
-    ir = inferred_result isa CodeInstance  ? retrieve_ir_for_inlining(inferred_result, src) :
-                                             retrieve_ir_for_inlining(mi, src, preserve_local_sources)
-    return InliningTodo(mi, ir, effects)
+    if inferred_result isa CodeInstance
+        ir, spec_info, debuginfo = retrieve_ir_for_inlining(inferred_result, src)
+    else
+        ir, spec_info, debuginfo = retrieve_ir_for_inlining(mi, src, preserve_local_sources)
+    end
+    return InliningTodo(mi, ir, spec_info, debuginfo, effects)
 end
 
 # the special resolver for :invoke-d call
@@ -901,23 +906,17 @@ function resolve_todo(mi::MethodInstance, @nospecialize(info::CallInfo), flag::U
     if cached_result isa ConstantCase
         add_inlining_backedge!(et, mi)
         return cached_result
-    end
-    if cached_result isa InferredResult
-        (; src, effects) = cached_result
     elseif cached_result isa CodeInstance
         src = @atomic :monotonic cached_result.inferred
         effects = decode_effects(cached_result.ipo_purity_bits)
-    else
-        src = nothing
-        effects = Effects()
+    else # there is no cached source available, bail out
+        return nothing
     end
 
-    preserve_local_sources = true
     src_inlining_policy(state.interp, src, info, flag) || return nothing
-    ir = cached_result isa CodeInstance  ? retrieve_ir_for_inlining(cached_result, src) :
-                                           retrieve_ir_for_inlining(mi, src, preserve_local_sources)
+    ir, spec_info, debuginfo = retrieve_ir_for_inlining(cached_result, src)
     add_inlining_backedge!(et, mi)
-    return InliningTodo(mi, ir, effects)
+    return InliningTodo(mi, ir, spec_info, debuginfo, effects)
 end
 
 function validate_sparams(sparams::SimpleVector)
@@ -971,22 +970,29 @@ function analyze_method!(match::MethodMatch, argtypes::Vector{Any},
     return resolve_todo(mi, volatile_inf_result, info, flag, state; invokesig)
 end
 
-function retrieve_ir_for_inlining(cached_result::CodeInstance, src::MaybeCompressed)
-    src = _uncompressed_ir(cached_result, src)::CodeInfo
-    return inflate_ir!(src, cached_result.def), src.debuginfo
+function retrieve_ir_for_inlining(cached_result::CodeInstance, src::String)
+    src = _uncompressed_ir(cached_result, src)
+    return inflate_ir!(src, cached_result.def), SpecInfo(src), src.debuginfo
+end
+function retrieve_ir_for_inlining(cached_result::CodeInstance, src::CodeInfo)
+    return inflate_ir!(copy(src), cached_result.def), SpecInfo(src), src.debuginfo
 end
 function retrieve_ir_for_inlining(mi::MethodInstance, src::CodeInfo, preserve_local_sources::Bool)
     if preserve_local_sources
         src = copy(src)
     end
-    return inflate_ir!(src, mi), src.debuginfo
+    return inflate_ir!(src, mi), SpecInfo(src), src.debuginfo
 end
 function retrieve_ir_for_inlining(mi::MethodInstance, ir::IRCode, preserve_local_sources::Bool)
     if preserve_local_sources
         ir = copy(ir)
     end
+    # COMBAK this is not correct, we should make `InferenceResult` propagate `SpecInfo`
+    spec_info = let m = mi.def::Method
+        SpecInfo(Int(m.nargs), m.isva, false, nothing)
+    end
     ir.debuginfo.def = mi
-    return ir, DebugInfo(ir.debuginfo, length(ir.stmts))
+    return ir, spec_info, DebugInfo(ir.debuginfo, length(ir.stmts))
 end
 
 function handle_single_case!(todo::Vector{Pair{Int,Any}},
@@ -1466,8 +1472,8 @@ function semiconcrete_result_item(result::SemiConcreteResult,
 
     add_inlining_backedge!(et, mi)
     preserve_local_sources = OptimizationParams(state.interp).preserve_local_sources
-    ir = retrieve_ir_for_inlining(mi, result.ir, preserve_local_sources)
-    return InliningTodo(mi, ir, result.effects)
+    ir, _, debuginfo = retrieve_ir_for_inlining(mi, result.ir, preserve_local_sources)
+    return InliningTodo(mi, ir, result.spec_info, debuginfo, result.effects)
 end
 
 function handle_semi_concrete_result!(cases::Vector{InliningCase}, result::SemiConcreteResult,
@@ -1597,7 +1603,6 @@ function handle_finalizer_call!(ir::IRCode, idx::Int, stmt::Expr, info::Finalize
             push!(stmt.args, item1.invoke)
         elseif isa(item1, ConstantCase)
             push!(stmt.args, nothing)
-            push!(stmt.args, item1.val)
         end
     end
     return nothing
diff --git a/base/compiler/ssair/ir.jl b/base/compiler/ssair/ir.jl
index 960da88ddffc8..fdcb4621c5c0f 100644
--- a/base/compiler/ssair/ir.jl
+++ b/base/compiler/ssair/ir.jl
@@ -1432,6 +1432,7 @@ function process_node!(compact::IncrementalCompact, result_idx::Int, inst::Instr
     elseif isa(stmt, OldSSAValue)
         ssa_rename[idx] = ssa_rename[stmt.id]
     elseif isa(stmt, GotoNode) && cfg_transforms_enabled
+        stmt.label < 0 && (println(stmt); println(compact))
         label = bb_rename_succ[stmt.label]
         @assert label > 0
         ssa_rename[idx] = SSAValue(result_idx)
diff --git a/base/compiler/ssair/irinterp.jl b/base/compiler/ssair/irinterp.jl
index 1aeb87accbcd7..ca8ca770df413 100644
--- a/base/compiler/ssair/irinterp.jl
+++ b/base/compiler/ssair/irinterp.jl
@@ -51,8 +51,11 @@ end
 
 function abstract_call(interp::AbstractInterpreter, arginfo::ArgInfo, irsv::IRInterpretationState)
     si = StmtInfo(true) # TODO better job here?
-    call = abstract_call(interp, arginfo, si, irsv)
-    irsv.ir.stmts[irsv.curridx][:info] = call.info
+    call = abstract_call(interp, arginfo, si, irsv)::Future
+    Future{Nothing}(call, interp, irsv) do call, interp, irsv
+        irsv.ir.stmts[irsv.curridx][:info] = call.info
+        nothing
+    end
     return call
 end
 
@@ -143,7 +146,19 @@ function reprocess_instruction!(interp::AbstractInterpreter, inst::Instruction,
         head = stmt.head
         if (head === :call || head === :foreigncall || head === :new || head === :splatnew ||
             head === :static_parameter || head === :isdefined || head === :boundscheck)
-            (; rt, effects) = abstract_eval_statement_expr(interp, stmt, nothing, irsv)
+            @assert isempty(irsv.tasks) # TODO: this whole function needs to be converted to a stackless design to be a valid AbsIntState, but this should work here for now
+            result = abstract_eval_statement_expr(interp, stmt, nothing, irsv)
+            reverse!(irsv.tasks)
+            while true
+                if length(irsv.callstack) > irsv.frameid
+                    typeinf(interp, irsv.callstack[irsv.frameid + 1])
+                elseif !doworkloop(interp, irsv)
+                    break
+                end
+            end
+            @assert length(irsv.callstack) == irsv.frameid && isempty(irsv.tasks)
+            result isa Future && (result = result[])
+            (; rt, effects) = result
             add_flag!(inst, flags_for_effects(effects))
         elseif head === :invoke
             rt, (nothrow, noub) = abstract_eval_invoke_inst(interp, inst, irsv)
@@ -293,7 +308,7 @@ function is_all_const_call(@nospecialize(stmt), interp::AbstractInterpreter, irs
     return true
 end
 
-function _ir_abstract_constant_propagation(interp::AbstractInterpreter, irsv::IRInterpretationState;
+function ir_abstract_constant_propagation(interp::AbstractInterpreter, irsv::IRInterpretationState;
         externally_refined::Union{Nothing,BitSet} = nothing)
     (; ir, tpdum, ssa_refined) = irsv
 
@@ -449,18 +464,3 @@ function _ir_abstract_constant_propagation(interp::AbstractInterpreter, irsv::IR
 
     return Pair{Any,Tuple{Bool,Bool}}(maybe_singleton_const(ultimate_rt), (nothrow, noub))
 end
-
-function ir_abstract_constant_propagation(interp::NativeInterpreter, irsv::IRInterpretationState)
-    if __measure_typeinf__[]
-        inf_frame = Timings.InferenceFrameInfo(irsv.mi, irsv.world, VarState[], Any[], length(irsv.ir.argtypes))
-        Timings.enter_new_timer(inf_frame)
-        ret = _ir_abstract_constant_propagation(interp, irsv)
-        append!(inf_frame.slottypes, irsv.ir.argtypes)
-        Timings.exit_current_timer(inf_frame)
-        return ret
-    else
-        return _ir_abstract_constant_propagation(interp, irsv)
-    end
-end
-ir_abstract_constant_propagation(interp::AbstractInterpreter, irsv::IRInterpretationState) =
-    _ir_abstract_constant_propagation(interp, irsv)
diff --git a/base/compiler/ssair/passes.jl b/base/compiler/ssair/passes.jl
index 37d79e2bd7b0c..e227249b48598 100644
--- a/base/compiler/ssair/passes.jl
+++ b/base/compiler/ssair/passes.jl
@@ -1532,7 +1532,7 @@ function try_inline_finalizer!(ir::IRCode, argexprs::Vector{Any}, idx::Int,
     end
 
     src_inlining_policy(inlining.interp, src, info, IR_FLAG_NULL) || return false
-    src, di = retrieve_ir_for_inlining(code, src)
+    src, spec_info, di = retrieve_ir_for_inlining(code, src)
 
     # For now: Require finalizer to only have one basic block
     length(src.cfg.blocks) == 1 || return false
@@ -1542,7 +1542,7 @@ function try_inline_finalizer!(ir::IRCode, argexprs::Vector{Any}, idx::Int,
 
     # TODO: Should there be a special line number node for inlined finalizers?
     inline_at = ir[SSAValue(idx)][:line]
-    ssa_substitute = ir_prepare_inlining!(InsertBefore(ir, SSAValue(idx)), ir, src, di, mi, inline_at, argexprs)
+    ssa_substitute = ir_prepare_inlining!(InsertBefore(ir, SSAValue(idx)), ir, src, spec_info, di, mi, inline_at, argexprs)
 
     # TODO: Use the actual inliner here rather than open coding this special purpose inliner.
     ssa_rename = Vector{Any}(undef, length(src.stmts))
@@ -1564,10 +1564,12 @@ end
 
 is_nothrow(ir::IRCode, ssa::SSAValue) = has_flag(ir[ssa], IR_FLAG_NOTHROW)
 
-function reachable_blocks(cfg::CFG, from_bb::Int, to_bb::Union{Nothing,Int} = nothing)
+function reachable_blocks(cfg::CFG, from_bb::Int, to_bb::Int)
     worklist = Int[from_bb]
     visited = BitSet(from_bb)
-    if to_bb !== nothing
+    if to_bb == from_bb
+        return visited
+    else
         push!(visited, to_bb)
     end
     function visit!(bb::Int)
@@ -1582,100 +1584,78 @@ function reachable_blocks(cfg::CFG, from_bb::Int, to_bb::Union{Nothing,Int} = no
     return visited
 end
 
-function try_resolve_finalizer!(ir::IRCode, idx::Int, finalizer_idx::Int, defuse::SSADefUse,
+function try_resolve_finalizer!(ir::IRCode, alloc_idx::Int, finalizer_idx::Int, defuse::SSADefUse,
         inlining::InliningState, lazydomtree::LazyDomtree,
         lazypostdomtree::LazyPostDomtree, @nospecialize(info::CallInfo))
     # For now, require that:
     # 1. The allocation dominates the finalizer registration
-    # 2. The finalizer registration dominates all uses reachable from the
-    #    finalizer registration.
-    # 3. The insertion block for the finalizer is the post-dominator of all
-    #    uses and the finalizer registration block. The insertion block must
-    #    be dominated by the finalizer registration block.
-    # 4. The path from the finalizer registration to the finalizer inlining
+    # 2. The insertion block for the finalizer is the post-dominator of all
+    #    uses (including the finalizer registration).
+    # 3. The path from the finalizer registration to the finalizer inlining
     #    location is nothrow
     #
-    # TODO: We could relax item 3, by inlining the finalizer multiple times.
+    # TODO: We could relax the check 2, by inlining the finalizer multiple times.
 
     # Check #1: The allocation dominates the finalizer registration
     domtree = get!(lazydomtree)
     finalizer_bb = block_for_inst(ir, finalizer_idx)
-    alloc_bb = block_for_inst(ir, idx)
+    alloc_bb = block_for_inst(ir, alloc_idx)
     dominates(domtree, alloc_bb, finalizer_bb) || return nothing
 
-    bb_insert_block::Int = finalizer_bb
-    bb_insert_idx::Union{Int,Nothing} = finalizer_idx
-    function note_block_use!(usebb::Int, useidx::Int)
-        new_bb_insert_block = nearest_common_dominator(get!(lazypostdomtree),
-            bb_insert_block, usebb)
-        if new_bb_insert_block == bb_insert_block && bb_insert_idx !== nothing
-            bb_insert_idx = max(bb_insert_idx::Int, useidx)
-        elseif new_bb_insert_block == usebb
-            bb_insert_idx = useidx
+    # Check #2: The insertion block for the finalizer is the post-dominator of all uses
+    insert_bb::Int = finalizer_bb
+    insert_idx::Union{Int,Nothing} = finalizer_idx
+    function note_defuse!(x::Union{Int,SSAUse})
+        defuse_idx = x isa SSAUse ? x.idx : x
+        defuse_idx == finalizer_idx && return nothing
+        defuse_bb = block_for_inst(ir, defuse_idx)
+        new_insert_bb = nearest_common_dominator(get!(lazypostdomtree),
+            insert_bb, defuse_bb)
+        if new_insert_bb == insert_bb && insert_idx !== nothing
+            insert_idx = max(insert_idx::Int, defuse_idx)
+        elseif new_insert_bb == defuse_bb
+            insert_idx = defuse_idx
         else
-            bb_insert_idx = nothing
+            insert_idx = nothing
         end
-        bb_insert_block = new_bb_insert_block
+        insert_bb = new_insert_bb
         nothing
     end
-
-    # Collect all reachable blocks between the finalizer registration and the
-    # insertion point
-    blocks = reachable_blocks(ir.cfg, finalizer_bb, alloc_bb)
-
-    # Check #2
-    function check_defuse(x::Union{Int,SSAUse})
-        duidx = x isa SSAUse ? x.idx : x
-        duidx == finalizer_idx && return true
-        bb = block_for_inst(ir, duidx)
-        # Not reachable from finalizer registration - we're ok
-        bb ∉ blocks && return true
-        note_block_use!(bb, duidx)
-        if dominates(domtree, finalizer_bb, bb)
-            return true
-        else
-            return false
-        end
-    end
-    all(check_defuse, defuse.uses) || return nothing
-    all(check_defuse, defuse.defs) || return nothing
-    bb_insert_block != 0 || return nothing # verify post-dominator of all uses exists
-
-    # Check #3
-    dominates(domtree, finalizer_bb, bb_insert_block) || return nothing
+    foreach(note_defuse!, defuse.uses)
+    foreach(note_defuse!, defuse.defs)
+    insert_bb != 0 || return nothing # verify post-dominator of all uses exists
 
     if !OptimizationParams(inlining.interp).assume_fatal_throw
         # Collect all reachable blocks between the finalizer registration and the
         # insertion point
-        blocks = finalizer_bb == bb_insert_block ? Int[finalizer_bb] :
-            reachable_blocks(ir.cfg, finalizer_bb, bb_insert_block)
+        blocks = reachable_blocks(ir.cfg, finalizer_bb, insert_bb)
 
-        # Check #4
-        function check_range_nothrow(ir::IRCode, s::Int, e::Int)
+        # Check #3
+        function check_range_nothrow(s::Int, e::Int)
             return all(s:e) do sidx::Int
                 sidx == finalizer_idx && return true
-                sidx == idx && return true
+                sidx == alloc_idx && return true
                 return is_nothrow(ir, SSAValue(sidx))
             end
         end
         for bb in blocks
             range = ir.cfg.blocks[bb].stmts
             s, e = first(range), last(range)
-            if bb == bb_insert_block
-                bb_insert_idx === nothing && continue
-                e = bb_insert_idx
+            if bb == insert_bb
+                insert_idx === nothing && continue
+                e = insert_idx
             end
             if bb == finalizer_bb
                 s = finalizer_idx
             end
-            check_range_nothrow(ir, s, e) || return nothing
+            check_range_nothrow(s, e) || return nothing
         end
     end
 
     # Ok, legality check complete. Figure out the exact statement where we're
     # going to inline the finalizer.
-    loc = bb_insert_idx === nothing ? first(ir.cfg.blocks[bb_insert_block].stmts) : bb_insert_idx::Int
-    attach_after = bb_insert_idx !== nothing
+    loc = insert_idx === nothing ? first(ir.cfg.blocks[insert_bb].stmts) : insert_idx::Int
+    attach_after = insert_idx !== nothing
 
     finalizer_stmt = ir[SSAValue(finalizer_idx)][:stmt]
     argexprs = Any[finalizer_stmt.args[2], finalizer_stmt.args[3]]
@@ -1702,11 +1682,10 @@ function try_resolve_finalizer!(ir::IRCode, idx::Int, finalizer_idx::Int, defuse
     return nothing
 end
 
-function sroa_mutables!(ir::IRCode, defuses::IdDict{Int, Tuple{SPCSet, SSADefUse}}, used_ssas::Vector{Int}, lazydomtree::LazyDomtree, inlining::Union{Nothing, InliningState})
+function sroa_mutables!(ir::IRCode, defuses::IdDict{Int,Tuple{SPCSet,SSADefUse}}, used_ssas::Vector{Int}, lazydomtree::LazyDomtree, inlining::Union{Nothing,InliningState})
     𝕃ₒ = inlining === nothing ? SimpleInferenceLattice.instance : optimizer_lattice(inlining.interp)
     lazypostdomtree = LazyPostDomtree(ir)
     for (defidx, (intermediaries, defuse)) in defuses
-        intermediaries = collect(intermediaries)
         # Check if there are any uses we did not account for. If so, the variable
         # escapes and we cannot eliminate the allocation. This works, because we're guaranteed
         # not to include any intermediaries that have dead uses. As a result, missing uses will only ever
@@ -1906,7 +1885,7 @@ function sroa_mutables!(ir::IRCode, defuses::IdDict{Int, Tuple{SPCSet, SSADefUse
     end
 end
 
-function form_new_preserves(origex::Expr, intermediates::Vector{Int}, new_preserves::Vector{Any})
+function form_new_preserves(origex::Expr, intermediaries::Union{Vector{Int},SPCSet}, new_preserves::Vector{Any})
     newex = Expr(:foreigncall)
     nccallargs = length(origex.args[3]::SimpleVector)
     for i in 1:(6+nccallargs-1)
@@ -1915,7 +1894,7 @@ function form_new_preserves(origex::Expr, intermediates::Vector{Int}, new_preser
     for i in (6+nccallargs):length(origex.args)
         x = origex.args[i]
         # don't need to preserve intermediaries
-        if isa(x, SSAValue) && x.id in intermediates
+        if isa(x, SSAValue) && x.id in intermediaries
             continue
         end
         push!(newex.args, x)
diff --git a/base/compiler/ssair/verify.jl b/base/compiler/ssair/verify.jl
index a4286177e93a4..268991282c483 100644
--- a/base/compiler/ssair/verify.jl
+++ b/base/compiler/ssair/verify.jl
@@ -1,9 +1,11 @@
 # This file is a part of Julia. License is MIT: https://julialang.org/license
 
 function maybe_show_ir(ir::IRCode)
-    if isdefined(Core, :Main)
+    if isdefined(Core, :Main) && isdefined(Core.Main, :Base)
         # ensure we use I/O that does not yield, as this gets called during compilation
         invokelatest(Core.Main.Base.show, Core.stdout, "text/plain", ir)
+    else
+        Core.show(ir)
     end
 end
 
@@ -25,6 +27,7 @@ is_toplevel_expr_head(head::Symbol) = head === :global || head === :method || he
 is_value_pos_expr_head(head::Symbol) = head === :static_parameter
 function check_op(ir::IRCode, domtree::DomTree, @nospecialize(op), use_bb::Int, use_idx::Int, printed_use_idx::Int, print::Bool, isforeigncall::Bool, arg_idx::Int, allow_frontend_forms::Bool)
     if isa(op, SSAValue)
+        op.id > 0 || @verify_error "Def ($(op.id)) is invalid in final IR"
         if op.id > length(ir.stmts)
             def_bb = block_for_inst(ir.cfg, ir.new_nodes.info[op.id - length(ir.stmts)].pos)
         else
diff --git a/base/compiler/stmtinfo.jl b/base/compiler/stmtinfo.jl
index ac5ffbdd5d76d..9dba7a4459f9e 100644
--- a/base/compiler/stmtinfo.jl
+++ b/base/compiler/stmtinfo.jl
@@ -94,6 +94,7 @@ struct SemiConcreteResult <: ConstResult
     mi::MethodInstance
     ir::IRCode
     effects::Effects
+    spec_info::SpecInfo
 end
 
 # XXX Technically this does not represent a result of constant inference, but rather that of
diff --git a/base/compiler/tfuncs.jl b/base/compiler/tfuncs.jl
index ab3b50763deec..a6b7e53c6f320 100644
--- a/base/compiler/tfuncs.jl
+++ b/base/compiler/tfuncs.jl
@@ -135,8 +135,8 @@ function instanceof_tfunc(@nospecialize(t), astag::Bool=false, @nospecialize(tro
         end
         return tr, isexact, isconcrete, istype
     elseif isa(t, Union)
-        ta, isexact_a, isconcrete_a, istype_a = instanceof_tfunc(t.a, astag, troot)
-        tb, isexact_b, isconcrete_b, istype_b = instanceof_tfunc(t.b, astag, troot)
+        ta, isexact_a, isconcrete_a, istype_a = instanceof_tfunc(unwraptv(t.a), astag, troot)
+        tb, isexact_b, isconcrete_b, istype_b = instanceof_tfunc(unwraptv(t.b), astag, troot)
         isconcrete = isconcrete_a && isconcrete_b
         istype = istype_a && istype_b
         # most users already handle the Union case, so here we assume that
@@ -563,9 +563,9 @@ add_tfunc(Core.sizeof, 1, 1, sizeof_tfunc, 1)
         end
     end
     if isa(x, Union)
-        na = nfields_tfunc(𝕃, x.a)
+        na = nfields_tfunc(𝕃, unwraptv(x.a))
         na === Int && return Int
-        return tmerge(na, nfields_tfunc(𝕃, x.b))
+        return tmerge(𝕃, na, nfields_tfunc(𝕃, unwraptv(x.b)))
     end
     return Int
 end
@@ -1383,10 +1383,10 @@ end
 
     nargs = length(argtypes)
     if !isempty(argtypes) && isvarargtype(argtypes[nargs])
-        nargs - 1 <= maxargs || return CallMeta(Bottom, Any, EFFECTS_THROWS, NoCallInfo())
-        nargs + 1 >= op_argi || return CallMeta(Any, Any, Effects(), NoCallInfo())
+        nargs - 1 <= maxargs || return Future(CallMeta(Bottom, Any, EFFECTS_THROWS, NoCallInfo()))
+        nargs + 1 >= op_argi || return Future(CallMeta(Any, Any, Effects(), NoCallInfo()))
     else
-        minargs <= nargs <= maxargs || return CallMeta(Bottom, Any, EFFECTS_THROWS, NoCallInfo())
+        minargs <= nargs <= maxargs || return Future(CallMeta(Bottom, Any, EFFECTS_THROWS, NoCallInfo()))
     end
     𝕃ᵢ = typeinf_lattice(interp)
     if ff === modifyfield!
@@ -1417,15 +1417,22 @@ end
         op = unwrapva(argtypes[op_argi])
         v = unwrapva(argtypes[v_argi])
         callinfo = abstract_call(interp, ArgInfo(nothing, Any[op, TF, v]), StmtInfo(true), sv, #=max_methods=#1)
-        TF2 = tmeet(callinfo.rt, widenconst(TF))
-        if TF2 === Bottom
-            RT = Bottom
-        elseif isconcretetype(RT) && has_nontrivial_extended_info(𝕃ᵢ, TF2) # isconcrete condition required to form a PartialStruct
-            RT = PartialStruct(RT, Any[TF, TF2])
+        TF = Core.Box(TF)
+        RT = Core.Box(RT)
+        return Future{CallMeta}(callinfo, interp, sv) do callinfo, interp, sv
+            TF = TF.contents
+            RT = RT.contents
+            TF2 = tmeet(callinfo.rt, widenconst(TF))
+            if TF2 === Bottom
+                RT = Bottom
+            elseif isconcretetype(RT) && has_nontrivial_extended_info(𝕃ᵢ, TF2) # isconcrete condition required to form a PartialStruct
+                RT = PartialStruct(RT, Any[TF, TF2])
+            end
+            info = ModifyOpInfo(callinfo.info)
+            return CallMeta(RT, Any, Effects(), info)
         end
-        info = ModifyOpInfo(callinfo.info)
     end
-    return CallMeta(RT, Any, Effects(), info)
+    return Future(CallMeta(RT, Any, Effects(), info))
 end
 
 # we could use tuple_tfunc instead of widenconst, but `o` is mutable, so that is unlikely to be beneficial
@@ -2895,17 +2902,17 @@ end
 function return_type_tfunc(interp::AbstractInterpreter, argtypes::Vector{Any}, si::StmtInfo, sv::AbsIntState)
     UNKNOWN = CallMeta(Type, Any, Effects(EFFECTS_THROWS; nortcall=false), NoCallInfo())
     if !(2 <= length(argtypes) <= 3)
-        return UNKNOWN
+        return Future(UNKNOWN)
     end
 
     tt = widenslotwrapper(argtypes[end])
     if !isa(tt, Const) && !(isType(tt) && !has_free_typevars(tt))
-        return UNKNOWN
+        return Future(UNKNOWN)
     end
 
     af_argtype = isa(tt, Const) ? tt.val : (tt::DataType).parameters[1]
     if !isa(af_argtype, DataType) || !(af_argtype <: Tuple)
-        return UNKNOWN
+        return Future(UNKNOWN)
     end
 
     if length(argtypes) == 3
@@ -2918,7 +2925,7 @@ function return_type_tfunc(interp::AbstractInterpreter, argtypes::Vector{Any}, s
     end
     if !(isa(aft, Const) || (isType(aft) && !has_free_typevars(aft)) ||
             (isconcretetype(aft) && !(aft <: Builtin) && !iskindtype(aft)))
-        return UNKNOWN
+        return Future(UNKNOWN)
     end
 
     # effects are not an issue if we know this statement will get removed, but if it does not get removed,
@@ -2926,7 +2933,7 @@ function return_type_tfunc(interp::AbstractInterpreter, argtypes::Vector{Any}, s
     RT_CALL_EFFECTS = Effects(EFFECTS_TOTAL; nortcall=false)
 
     if contains_is(argtypes_vec, Union{})
-        return CallMeta(Const(Union{}), Union{}, RT_CALL_EFFECTS, NoCallInfo())
+        return Future(CallMeta(Const(Union{}), Union{}, RT_CALL_EFFECTS, NoCallInfo()))
     end
 
     # Run the abstract_call without restricting abstract call
@@ -2935,42 +2942,45 @@ function return_type_tfunc(interp::AbstractInterpreter, argtypes::Vector{Any}, s
     if isa(sv, InferenceState)
         old_restrict = sv.restrict_abstract_call_sites
         sv.restrict_abstract_call_sites = false
-        call = abstract_call(interp, ArgInfo(nothing, argtypes_vec), si, sv, #=max_methods=#-1)
-        sv.restrict_abstract_call_sites = old_restrict
-    else
-        call = abstract_call(interp, ArgInfo(nothing, argtypes_vec), si, sv, #=max_methods=#-1)
-    end
-    info = verbose_stmt_info(interp) ? MethodResultPure(ReturnTypeCallInfo(call.info)) : MethodResultPure()
-    rt = widenslotwrapper(call.rt)
-    if isa(rt, Const)
-        # output was computed to be constant
-        return CallMeta(Const(typeof(rt.val)), Union{}, RT_CALL_EFFECTS, info)
-    end
-    rt = widenconst(rt)
-    if rt === Bottom || (isconcretetype(rt) && !iskindtype(rt))
-        # output cannot be improved so it is known for certain
-        return CallMeta(Const(rt), Union{}, RT_CALL_EFFECTS, info)
-    elseif isa(sv, InferenceState) && !isempty(sv.pclimitations)
-        # conservatively express uncertainty of this result
-        # in two ways: both as being a subtype of this, and
-        # because of LimitedAccuracy causes
-        return CallMeta(Type{<:rt}, Union{}, RT_CALL_EFFECTS, info)
-    elseif isa(tt, Const) || isconstType(tt)
-        # input arguments were known for certain
-        # XXX: this doesn't imply we know anything about rt
-        return CallMeta(Const(rt), Union{}, RT_CALL_EFFECTS, info)
-    elseif isType(rt)
-        return CallMeta(Type{rt}, Union{}, RT_CALL_EFFECTS, info)
-    else
-        return CallMeta(Type{<:rt}, Union{}, RT_CALL_EFFECTS, info)
+    end
+    call = abstract_call(interp, ArgInfo(nothing, argtypes_vec), si, sv, #=max_methods=#-1)
+    tt = Core.Box(tt)
+    return Future{CallMeta}(call, interp, sv) do call, interp, sv
+        if isa(sv, InferenceState)
+            sv.restrict_abstract_call_sites = old_restrict
+        end
+        info = verbose_stmt_info(interp) ? MethodResultPure(ReturnTypeCallInfo(call.info)) : MethodResultPure()
+        rt = widenslotwrapper(call.rt)
+        if isa(rt, Const)
+            # output was computed to be constant
+            return CallMeta(Const(typeof(rt.val)), Union{}, RT_CALL_EFFECTS, info)
+        end
+        rt = widenconst(rt)
+        if rt === Bottom || (isconcretetype(rt) && !iskindtype(rt))
+            # output cannot be improved so it is known for certain
+            return CallMeta(Const(rt), Union{}, RT_CALL_EFFECTS, info)
+        elseif isa(sv, InferenceState) && !isempty(sv.pclimitations)
+            # conservatively express uncertainty of this result
+            # in two ways: both as being a subtype of this, and
+            # because of LimitedAccuracy causes
+            return CallMeta(Type{<:rt}, Union{}, RT_CALL_EFFECTS, info)
+        elseif isa(tt.contents, Const) || isconstType(tt.contents)
+            # input arguments were known for certain
+            # XXX: this doesn't imply we know anything about rt
+            return CallMeta(Const(rt), Union{}, RT_CALL_EFFECTS, info)
+        elseif isType(rt)
+            return CallMeta(Type{rt}, Union{}, RT_CALL_EFFECTS, info)
+        else
+            return CallMeta(Type{<:rt}, Union{}, RT_CALL_EFFECTS, info)
+        end
     end
 end
 
 # a simplified model of abstract_call_gf_by_type for applicable
 function abstract_applicable(interp::AbstractInterpreter, argtypes::Vector{Any},
                              sv::AbsIntState, max_methods::Int)
-    length(argtypes) < 2 && return CallMeta(Bottom, Any, EFFECTS_THROWS, NoCallInfo())
-    isvarargtype(argtypes[2]) && return CallMeta(Bool, Any, EFFECTS_THROWS, NoCallInfo())
+    length(argtypes) < 2 && return Future(CallMeta(Bottom, Any, EFFECTS_THROWS, NoCallInfo()))
+    isvarargtype(argtypes[2]) && return Future(CallMeta(Bool, Any, EFFECTS_THROWS, NoCallInfo()))
     argtypes = argtypes[2:end]
     atype = argtypes_to_type(argtypes)
     matches = find_method_matches(interp, argtypes, atype; max_methods)
@@ -2997,7 +3007,7 @@ function abstract_applicable(interp::AbstractInterpreter, argtypes::Vector{Any},
         # added that did not intersect with any existing method
         add_uncovered_edges!(sv, matches, atype)
     end
-    return CallMeta(rt, Union{}, EFFECTS_TOTAL, NoCallInfo())
+    return Future(CallMeta(rt, Union{}, EFFECTS_TOTAL, NoCallInfo()))
 end
 add_tfunc(applicable, 1, INT_INF, @nospecs((𝕃::AbstractLattice, f, args...)->Bool), 40)
 
diff --git a/base/compiler/typeinfer.jl b/base/compiler/typeinfer.jl
index 315a068e611fe..8b85f7c6f35f1 100644
--- a/base/compiler/typeinfer.jl
+++ b/base/compiler/typeinfer.jl
@@ -56,7 +56,7 @@ end
 Timing(mi_info, start_time, cur_start_time, time, children) = Timing(mi_info, start_time, cur_start_time, time, children, nothing)
 Timing(mi_info, start_time) = Timing(mi_info, start_time, start_time, UInt64(0), Timing[])
 
-_time_ns() = ccall(:jl_hrtime, UInt64, ())  # Re-implemented here because Base not yet available.
+_time_ns() = ccall(:jl_hrtime, UInt64, ())
 
 # We keep a stack of the Timings for each of the MethodInstances currently being timed.
 # Since type inference currently operates via a depth-first search (during abstract
@@ -77,114 +77,14 @@ const ROOTmi = Core.Compiler.specialize_method(
 Empty out the previously recorded type inference timings (`Core.Compiler._timings`), and
 start the ROOT() timer again. `ROOT()` measures all time spent _outside_ inference.
 """
-function reset_timings()
-    empty!(_timings)
-    push!(_timings, Timing(
-        # The MethodInstance for ROOT(), and default empty values for other fields.
-        InferenceFrameInfo(ROOTmi, 0x0, Core.Compiler.VarState[], Any[Core.Const(ROOT)], 1),
-        _time_ns()))
-    return nothing
-end
-reset_timings()
-
-# (This is split into a function so that it can be called both in this module, at the top
-# of `enter_new_timer()`, and once at the Very End of the operation, by whoever started
-# the operation and called `reset_timings()`.)
-# NOTE: the @inline annotations here are not to make it faster, but to reduce the gap between
-# timer manipulations and the tasks we're timing.
-@inline function close_current_timer()
-    stop_time = _time_ns()
-    parent_timer = _timings[end]
-    accum_time = stop_time - parent_timer.cur_start_time
-
-    # Add in accum_time ("modify" the immutable struct)
-    @inbounds begin
-        _timings[end] = Timing(
-            parent_timer.mi_info,
-            parent_timer.start_time,
-            parent_timer.cur_start_time,
-            parent_timer.time + accum_time,
-            parent_timer.children,
-            parent_timer.bt,
-        )
-    end
-    return nothing
-end
-
-@inline function enter_new_timer(frame)
-    # Very first thing, stop the active timer: get the current time and add in the
-    # time since it was last started to its aggregate exclusive time.
-    close_current_timer()
-
-    mi_info = _typeinf_identifier(frame)
-
-    # Start the new timer right before returning
-    push!(_timings, Timing(mi_info, UInt64(0)))
-    len = length(_timings)
-    new_timer = @inbounds _timings[len]
-    # Set the current time _after_ appending the node, to try to exclude the
-    # overhead from measurement.
-    start = _time_ns()
-
-    @inbounds begin
-        _timings[len] = Timing(
-            new_timer.mi_info,
-            start,
-            start,
-            new_timer.time,
-            new_timer.children,
-        )
-    end
-
-    return nothing
-end
-
-# _expected_frame_ is not needed within this function; it is used in the `@assert`, to
-# assert that indeed we are always returning to a parent after finishing all of its
-# children (that is, asserting that inference proceeds via depth-first-search).
-@inline function exit_current_timer(_expected_frame_)
-    # Finish the new timer
-    stop_time = _time_ns()
-
-    expected_mi_info = _typeinf_identifier(_expected_frame_)
-
-    # Grab the new timer again because it might have been modified in _timings
-    # (since it's an immutable struct)
-    # And remove it from the current timings stack
-    new_timer = pop!(_timings)
-    Core.Compiler.@assert new_timer.mi_info.mi === expected_mi_info.mi
-
-    # Prepare to unwind one level of the stack and record in the parent
-    parent_timer = _timings[end]
-
-    accum_time = stop_time - new_timer.cur_start_time
-    # Add in accum_time ("modify" the immutable struct)
-    new_timer = Timing(
-        new_timer.mi_info,
-        new_timer.start_time,
-        new_timer.cur_start_time,
-        new_timer.time + accum_time,
-        new_timer.children,
-        parent_timer.mi_info.mi === ROOTmi ? backtrace() : nothing,
-    )
-    # Record the final timing with the original parent timer
-    push!(parent_timer.children, new_timer)
-
-    # And finally restart the parent timer:
-    len = length(_timings)
-    @inbounds begin
-        _timings[len] = Timing(
-            parent_timer.mi_info,
-            parent_timer.start_time,
-            _time_ns(),
-            parent_timer.time,
-            parent_timer.children,
-            parent_timer.bt,
-        )
-    end
-
-    return nothing
-end
+function reset_timings() end
+push!(_timings, Timing(
+    # The MethodInstance for ROOT(), and default empty values for other fields.
+    InferenceFrameInfo(ROOTmi, 0x0, Core.Compiler.VarState[], Any[Core.Const(ROOT)], 1),
+    _time_ns()))
+function close_current_timer() end
+function enter_new_timer(frame) end
+function exit_current_timer(_expected_frame_) end
 
 end  # module Timings
 
@@ -194,19 +94,7 @@ end  # module Timings
 If set to `true`, record per-method-instance timings within type inference in the Compiler.
 """
 __set_measure_typeinf(onoff::Bool) = __measure_typeinf__[] = onoff
-const __measure_typeinf__ = fill(false)
-
-# Wrapper around `_typeinf` that optionally records the exclusive time for each invocation.
-function typeinf(interp::AbstractInterpreter, frame::InferenceState)
-    if __measure_typeinf__[]
-        Timings.enter_new_timer(frame)
-        v = _typeinf(interp, frame)
-        Timings.exit_current_timer(frame)
-        return v
-    else
-        return _typeinf(interp, frame)
-    end
-end
+const __measure_typeinf__ = RefValue{Bool}(false)
 
 function finish!(interp::AbstractInterpreter, caller::InferenceState;
                  can_discard_trees::Bool=may_discard_trees(interp))
@@ -258,19 +146,6 @@ function finish!(interp::AbstractInterpreter, caller::InferenceState;
     return nothing
 end
 
-function _typeinf(interp::AbstractInterpreter, frame::InferenceState)
-    typeinf_nocycle(interp, frame) || return false # frame is now part of a higher cycle
-    # with no active ip's, frame is done
-    frames = frame.callstack::Vector{AbsIntState}
-    if length(frames) == frame.cycleid
-        finish_nocycle(interp, frame)
-    else
-        @assert frame.cycleid != 0
-        finish_cycle(interp, frames, frame.cycleid)
-    end
-    return true
-end
-
 function finish_nocycle(::AbstractInterpreter, frame::InferenceState)
     finishinfer!(frame, frame.interp)
     opt = frame.result.src
@@ -762,16 +637,11 @@ function merge_call_chain!(interp::AbstractInterpreter, parent::InferenceState,
         add_cycle_backedge!(parent, child)
         parent.cycleid === ancestorid && break
         child = parent
-        parent = frame_parent(child)
-        while !isa(parent, InferenceState)
-            # XXX we may miss some edges here?
-            parent = frame_parent(parent::IRInterpretationState)
-        end
+        parent = frame_parent(child)::InferenceState
     end
     # ensure that walking the callstack has the same cycleid (DAG)
     for frame = reverse(ancestorid:length(frames))
-        frame = frames[frame]
-        frame isa InferenceState || continue
+        frame = frames[frame]::InferenceState
         frame.cycleid == ancestorid && break
         @assert frame.cycleid > ancestorid
         frame.cycleid = ancestorid
@@ -796,9 +666,9 @@ end
 # returned instead.
 function resolve_call_cycle!(interp::AbstractInterpreter, mi::MethodInstance, parent::AbsIntState)
     # TODO (#48913) implement a proper recursion handling for irinterp:
-    # This works just because currently the `:terminate` condition guarantees that
-    # irinterp doesn't fail into unresolved cycles, but it's not a good solution.
-    # We should revisit this once we have a better story for handling cycles in irinterp.
+    # This works currently just because the irinterp code doesn't get used much with
+    # `@assume_effects`, so it never sees a cycle normally, but that may not be a sustainable solution.
+    parent isa InferenceState || return false
     frames = parent.callstack::Vector{AbsIntState}
     uncached = false
     for frame = reverse(1:length(frames))
@@ -837,15 +707,43 @@ struct EdgeCallResult
 end
 
 # return cached result of regular inference
-function return_cached_result(::AbstractInterpreter, codeinst::CodeInstance, caller::AbsIntState)
+function return_cached_result(interp::AbstractInterpreter, method::Method, codeinst::CodeInstance, caller::AbsIntState, edgecycle::Bool, edgelimited::Bool)
     rt = cached_return_type(codeinst)
     effects = ipo_effects(codeinst)
     update_valid_age!(caller, WorldRange(min_world(codeinst), max_world(codeinst)))
-    return EdgeCallResult(rt, codeinst.exctype, codeinst.def, effects)
+    return Future(EdgeCall_to_MethodCall_Result(interp, caller, method, EdgeCallResult(rt, codeinst.exctype, codeinst.def, effects), edgecycle, edgelimited))
+end
+
+function EdgeCall_to_MethodCall_Result(interp::AbstractInterpreter, sv::AbsIntState, method::Method, result::EdgeCallResult, edgecycle::Bool, edgelimited::Bool)
+    (; rt, exct, edge, effects, volatile_inf_result) = result
+
+    if edge === nothing
+        edgecycle = edgelimited = true
+    end
+
+    # we look for the termination effect override here as well, since the :terminates effect
+    # may have been tainted due to recursion at this point even if it's overridden
+    if is_effect_overridden(sv, :terminates_globally)
+        # this frame is known to terminate
+        effects = Effects(effects, terminates=true)
+    elseif is_effect_overridden(method, :terminates_globally)
+        # this edge is known to terminate
+        effects = Effects(effects; terminates=true)
+    elseif edgecycle
+        # Some sort of recursion was detected.
+        if edge !== nothing && !edgelimited && !is_edge_recursed(edge, sv)
+            # no `MethodInstance` cycles -- don't taint :terminate
+        else
+            # we cannot guarantee that the call will terminate
+            effects = Effects(effects; terminates=false)
+        end
+    end
+
+    return MethodCallResult(rt, exct, edgecycle, edgelimited, edge, effects, volatile_inf_result)
 end
 
 # compute (and cache) an inferred AST and return the current best estimate of the result type
-function typeinf_edge(interp::AbstractInterpreter, method::Method, @nospecialize(atype), sparams::SimpleVector, caller::AbsIntState)
+function typeinf_edge(interp::AbstractInterpreter, method::Method, @nospecialize(atype), sparams::SimpleVector, caller::AbsIntState, edgecycle::Bool, edgelimited::Bool)
     mi = specialize_method(method, atype, sparams)::MethodInstance
     cache_mode = CACHE_MODE_GLOBAL # cache edge targets globally by default
     force_inline = is_stmt_inline(get_curr_ssaflag(caller))
@@ -859,13 +757,13 @@ function typeinf_edge(interp::AbstractInterpreter, method::Method, @nospecialize
                 cache_mode = CACHE_MODE_VOLATILE
             else
                 @assert codeinst.def === mi "MethodInstance for cached edge does not match"
-                return return_cached_result(interp, codeinst, caller)
+                return return_cached_result(interp, method, codeinst, caller, edgecycle, edgelimited)
             end
         end
     end
     if ccall(:jl_get_module_infer, Cint, (Any,), method.module) == 0 && !generating_output(#=incremental=#false)
         add_remark!(interp, caller, "[typeinf_edge] Inference is disabled for the target module")
-        return EdgeCallResult(Any, Any, nothing, Effects())
+        return Future(EdgeCall_to_MethodCall_Result(interp, caller, method, EdgeCallResult(Any, Any, nothing, Effects()), edgecycle, edgelimited))
     end
     if !is_cached(caller) && frame_parent(caller) === nothing
         # this caller exists to return to the user
@@ -886,7 +784,7 @@ function typeinf_edge(interp::AbstractInterpreter, method::Method, @nospecialize
                         cache_mode = CACHE_MODE_VOLATILE
                     else
                         @assert codeinst.def === mi "MethodInstance for cached edge does not match"
-                        return return_cached_result(interp, codeinst, caller)
+                        return return_cached_result(interp, method, codeinst, caller, edgecycle, edgelimited)
                     end
                 end
             end
@@ -902,31 +800,40 @@ function typeinf_edge(interp::AbstractInterpreter, method::Method, @nospecialize
             if cache_mode == CACHE_MODE_GLOBAL
                 engine_reject(interp, ci)
             end
-            return EdgeCallResult(Any, Any, nothing, Effects())
+            return Future(EdgeCall_to_MethodCall_Result(interp, caller, method, EdgeCallResult(Any, Any, nothing, Effects()), edgecycle, edgelimited))
         end
         assign_parentchild!(frame, caller)
-        typeinf(interp, frame)
-        update_valid_age!(caller, frame.valid_worlds)
-        isinferred = is_inferred(frame)
-        edge = isinferred ? mi : nothing
-        effects = isinferred ? frame.result.ipo_effects : # effects are adjusted already within `finish` for ipo_effects
-            adjust_effects(effects_for_cycle(frame.ipo_effects), method)
-        exc_bestguess = refine_exception_type(frame.exc_bestguess, effects)
-        # propagate newly inferred source to the inliner, allowing efficient inlining w/o deserialization:
-        # note that this result is cached globally exclusively, so we can use this local result destructively
-        volatile_inf_result = isinferred ? VolatileInferenceResult(result) : nothing
-        return EdgeCallResult(frame.bestguess, exc_bestguess, edge, effects, volatile_inf_result)
+        # the actual inference task for this edge is going to be scheduled within `typeinf_local` via the callstack queue
+        # while splitting off the rest of the work for this caller into a separate workq thunk
+        let mresult = Future{MethodCallResult}()
+            push!(caller.tasks, function get_infer_result(interp, caller)
+                update_valid_age!(caller, frame.valid_worlds)
+                local isinferred = is_inferred(frame)
+                local edge = isinferred ? mi : nothing
+                local effects = isinferred ? frame.result.ipo_effects : # effects are adjusted already within `finish` for ipo_effects
+                    adjust_effects(effects_for_cycle(frame.ipo_effects), method)
+                local exc_bestguess = refine_exception_type(frame.exc_bestguess, effects)
+                # propagate newly inferred source to the inliner, allowing efficient inlining w/o deserialization:
+                # note that this result is cached globally exclusively, so we can use this local result destructively
+                local volatile_inf_result = isinferred ? VolatileInferenceResult(result) : nothing
+                local edgeresult = EdgeCallResult(frame.bestguess, exc_bestguess, edge, effects, volatile_inf_result)
+                mresult[] = EdgeCall_to_MethodCall_Result(interp, caller, method, edgeresult, edgecycle, edgelimited)
+                return true
+            end)
+            return mresult
+        end
     elseif frame === true
         # unresolvable cycle
         add_remark!(interp, caller, "[typeinf_edge] Unresolvable cycle")
-        return EdgeCallResult(Any, Any, nothing, Effects())
+        return Future(EdgeCall_to_MethodCall_Result(interp, caller, method, EdgeCallResult(Any, Any, nothing, Effects()), edgecycle, edgelimited))
     end
     # return the current knowledge about this cycle
     frame = frame::InferenceState
     update_valid_age!(caller, frame.valid_worlds)
     effects = adjust_effects(effects_for_cycle(frame.ipo_effects), method)
     exc_bestguess = refine_exception_type(frame.exc_bestguess, effects)
-    return EdgeCallResult(frame.bestguess, exc_bestguess, nothing, effects)
+    edgeresult = EdgeCallResult(frame.bestguess, exc_bestguess, nothing, effects)
+    return Future(EdgeCall_to_MethodCall_Result(interp, caller, method, edgeresult, edgecycle, edgelimited))
 end
 
 # The `:terminates` effect bit must be conservatively tainted unless recursion cycle has
@@ -1034,7 +941,7 @@ function typeinf_ircode(interp::AbstractInterpreter, mi::MethodInstance,
     end
     (; result) = frame
     opt = OptimizationState(frame, interp)
-    ir = run_passes_ipo_safe(opt.src, opt, result, optimize_until)
+    ir = run_passes_ipo_safe(opt.src, opt, optimize_until)
     rt = widenconst(ignorelimited(result.result))
     return ir, rt
 end
diff --git a/base/compiler/types.jl b/base/compiler/types.jl
index b475e360dac02..ecf2417fd6199 100644
--- a/base/compiler/types.jl
+++ b/base/compiler/types.jl
@@ -1,4 +1,12 @@
 # This file is a part of Julia. License is MIT: https://julialang.org/license
+#
+
+const WorkThunk = Any
+# #@eval struct WorkThunk
+#    thunk::Core.OpaqueClosure{Tuple{Vector{Tasks}}, Bool}
+#    WorkThunk(work) = new($(Expr(:opaque_closure, :(Tuple{Vector{Tasks}}), :Bool, :Bool, :((tasks) -> work(tasks))))) # @opaque Vector{Tasks}->Bool (tasks)->work(tasks)
+# end
+# (p::WorkThunk)() = p.thunk()
 
 """
     AbstractInterpreter
@@ -33,11 +41,14 @@ struct StmtInfo
     used::Bool
 end
 
-struct MethodInfo
+struct SpecInfo
+    nargs::Int
+    isva::Bool
     propagate_inbounds::Bool
     method_for_inference_limit_heuristics::Union{Nothing,Method}
 end
-MethodInfo(src::CodeInfo) = MethodInfo(
+SpecInfo(src::CodeInfo) = SpecInfo(
+    Int(src.nargs), src.isva,
     src.propagate_inbounds,
     src.method_for_inference_limit_heuristics::Union{Nothing,Method})
 
diff --git a/base/docs/basedocs.jl b/base/docs/basedocs.jl
index e28b3a21659a8..a142ecffdb732 100644
--- a/base/docs/basedocs.jl
+++ b/base/docs/basedocs.jl
@@ -937,11 +937,14 @@ expression, rather than the side effects that evaluating `b` or `c` may have.
 See the manual section on [control flow](@ref man-conditional-evaluation) for more details.
 
 # Examples
-```
+```jldoctest
 julia> x = 1; y = 2;
 
-julia> x > y ? println("x is larger") : println("y is larger")
-y is larger
+julia> x > y ? println("x is larger") : println("x is not larger")
+x is not larger
+
+julia> x > y ? "x is larger" : x == y ? "x and y are equal" : "y is larger"
+"y is larger"
 ```
 """
 kw"?", kw"?:"
diff --git a/base/errorshow.jl b/base/errorshow.jl
index 9c8aad8b6ee2c..20bdee1de6ec0 100644
--- a/base/errorshow.jl
+++ b/base/errorshow.jl
@@ -1052,7 +1052,7 @@ function nonsetable_type_hint_handler(io, ex, arg_types, kwargs)
             print(io, "\nAre you trying to index into an array? For multi-dimensional arrays, separate the indices with commas: ")
             printstyled(io, "a[1, 2]", color=:cyan)
             print(io, " rather than a[1][2]")
-        else isType(T)
+        elseif isType(T)
             Tx = T.parameters[1]
             print(io, "\nYou attempted to index the type $Tx, rather than an instance of the type. Make sure you create the type using its constructor: ")
             printstyled(io, "d = $Tx([...])", color=:cyan)
diff --git a/base/experimental.jl b/base/experimental.jl
index 58c7258120f3f..648b5da0ed9a1 100644
--- a/base/experimental.jl
+++ b/base/experimental.jl
@@ -319,9 +319,9 @@ function show_error_hints(io, ex, args...)
     for handler in hinters
         try
             @invokelatest handler(io, ex, args...)
-        catch err
+        catch
             tn = typeof(handler).name
-            @error "Hint-handler $handler for $(typeof(ex)) in $(tn.module) caused an error"
+            @error "Hint-handler $handler for $(typeof(ex)) in $(tn.module) caused an error" exception=current_exceptions()
         end
     end
 end
@@ -457,4 +457,18 @@ without adding them to the global method table.
 """
 :@MethodTable
 
+"""
+    Base.Experimental.entrypoint(f, argtypes::Tuple)
+
+Mark a method for inclusion when the `--trim` option is specified.
+"""
+function entrypoint(@nospecialize(f), @nospecialize(argtypes::Tuple))
+    entrypoint(Tuple{Core.Typeof(f), argtypes...})
+end
+
+function entrypoint(@nospecialize(argt::Type))
+    ccall(:jl_add_entrypoint, Int32, (Any,), argt)
+    nothing
+end
+
 end
diff --git a/base/expr.jl b/base/expr.jl
index c4f64b89de8b6..478ccd7d7cc20 100644
--- a/base/expr.jl
+++ b/base/expr.jl
@@ -902,8 +902,8 @@ end
 unwrap_macrocalls(@nospecialize(x)) = x
 function unwrap_macrocalls(ex::Expr)
     inner = ex
-    while inner.head === :macrocall
-        inner = inner.args[end]::Expr
+    while isexpr(inner, :macrocall)
+        inner = inner.args[end]
     end
     return inner
 end
diff --git a/base/floatfuncs.jl b/base/floatfuncs.jl
index 67e7899b4107c..2c26f7cff1133 100644
--- a/base/floatfuncs.jl
+++ b/base/floatfuncs.jl
@@ -232,7 +232,9 @@ function isapprox(x::Integer, y::Integer;
     if norm === abs && atol < 1 && rtol == 0
         return x == y
     else
-        return norm(x - y) <= max(atol, rtol*max(norm(x), norm(y)))
+        # We need to take the difference `max` - `min` when comparing unsigned integers.
+        _x, _y = x < y ? (x, y) : (y, x)
+        return norm(_y - _x) <= max(atol, rtol*max(norm(_x), norm(_y)))
     end
 end
 
diff --git a/base/genericmemory.jl b/base/genericmemory.jl
index 6537839320206..c4ebbc6ca14e1 100644
--- a/base/genericmemory.jl
+++ b/base/genericmemory.jl
@@ -320,11 +320,13 @@ end
 
 # get, set(once), modify, swap and replace at index, atomically
 function getindex_atomic(mem::GenericMemory, order::Symbol, i::Int)
+    @_propagate_inbounds_meta
     memref = memoryref(mem, i)
     return memoryrefget(memref, order, @_boundscheck)
 end
 
 function setindex_atomic!(mem::GenericMemory, order::Symbol, val, i::Int)
+    @_propagate_inbounds_meta
     T = eltype(mem)
     memref = memoryref(mem, i)
     return memoryrefset!(
@@ -342,6 +344,7 @@ function setindexonce_atomic!(
     val,
     i::Int,
 )
+    @_propagate_inbounds_meta
     T = eltype(mem)
     memref = memoryref(mem, i)
     return Core.memoryrefsetonce!(
@@ -354,11 +357,13 @@ function setindexonce_atomic!(
 end
 
 function modifyindex_atomic!(mem::GenericMemory, order::Symbol, op, val, i::Int)
+    @_propagate_inbounds_meta
     memref = memoryref(mem, i)
     return Core.memoryrefmodify!(memref, op, val, order, @_boundscheck)
 end
 
 function swapindex_atomic!(mem::GenericMemory, order::Symbol, val, i::Int)
+    @_propagate_inbounds_meta
     T = eltype(mem)
     memref = memoryref(mem, i)
     return Core.memoryrefswap!(
@@ -377,6 +382,7 @@ function replaceindex_atomic!(
     desired,
     i::Int,
 )
+    @_propagate_inbounds_meta
     T = eltype(mem)
     memref = memoryref(mem, i)
     return Core.memoryrefreplace!(
diff --git a/base/irrationals.jl b/base/irrationals.jl
index eafe388162353..b3073c503238a 100644
--- a/base/irrationals.jl
+++ b/base/irrationals.jl
@@ -216,7 +216,7 @@ function irrational(sym, val, def)
     esym = esc(sym)
     qsym = esc(Expr(:quote, sym))
     bigconvert = isa(def,Symbol) ? quote
-        function Base.BigFloat(::Irrational{$qsym}, r::MPFR.MPFRRoundingMode=MPFR.ROUNDING_MODE[]; precision=precision(BigFloat))
+        function Base.BigFloat(::Irrational{$qsym}, r::MPFR.MPFRRoundingMode=Rounding.rounding_raw(BigFloat); precision=precision(BigFloat))
             c = BigFloat(;precision=precision)
             ccall(($(string("mpfr_const_", def)), :libmpfr),
                   Cint, (Ref{BigFloat}, MPFR.MPFRRoundingMode), c, r)
diff --git a/base/libuv.jl b/base/libuv.jl
index 143201598fde0..306854e9f4436 100644
--- a/base/libuv.jl
+++ b/base/libuv.jl
@@ -26,10 +26,10 @@ for r in uv_req_types
 @eval const $(Symbol("_sizeof_", lowercase(string(r)))) = uv_sizeof_req($r)
 end
 
-uv_handle_data(handle) = ccall(:jl_uv_handle_data, Ptr{Cvoid}, (Ptr{Cvoid},), handle)
-uv_req_data(handle) = ccall(:jl_uv_req_data, Ptr{Cvoid}, (Ptr{Cvoid},), handle)
-uv_req_set_data(req, data) = ccall(:jl_uv_req_set_data, Cvoid, (Ptr{Cvoid}, Any), req, data)
-uv_req_set_data(req, data::Ptr{Cvoid}) = ccall(:jl_uv_req_set_data, Cvoid, (Ptr{Cvoid}, Ptr{Cvoid}), req, data)
+uv_handle_data(handle) = ccall(:uv_handle_get_data, Ptr{Cvoid}, (Ptr{Cvoid},), handle)
+uv_req_data(handle) = ccall(:uv_req_get_data, Ptr{Cvoid}, (Ptr{Cvoid},), handle)
+uv_req_set_data(req, data) = ccall(:uv_req_set_data, Cvoid, (Ptr{Cvoid}, Any), req, data)
+uv_req_set_data(req, data::Ptr{Cvoid}) = ccall(:uv_handle_set_data, Cvoid, (Ptr{Cvoid}, Ptr{Cvoid}), req, data)
 
 macro handle_as(hand, typ)
     return quote
@@ -133,7 +133,10 @@ function uv_return_spawn end
 function uv_asynccb end
 function uv_timercb end
 
-function reinit_stdio()
+reinit_stdio() = _reinit_stdio()
+# we need this so it can be called by codegen to print errors, even after
+# reinit_stdio has been redefined by the juliac build script.
+function _reinit_stdio()
     global stdin = init_stdio(ccall(:jl_stdin_stream, Ptr{Cvoid}, ()))::IO
     global stdout = init_stdio(ccall(:jl_stdout_stream, Ptr{Cvoid}, ()))::IO
     global stderr = init_stdio(ccall(:jl_stderr_stream, Ptr{Cvoid}, ()))::IO
diff --git a/base/loading.jl b/base/loading.jl
index cf7e41a0b5b2b..9080a2271fb27 100644
--- a/base/loading.jl
+++ b/base/loading.jl
@@ -1387,7 +1387,9 @@ function run_module_init(mod::Module, i::Int=1)
 end
 
 function run_package_callbacks(modkey::PkgId)
-    run_extension_callbacks(modkey)
+    if !precompiling_extension
+        run_extension_callbacks(modkey)
+    end
     assert_havelock(require_lock)
     unlock(require_lock)
     try
@@ -2843,7 +2845,7 @@ end
 
 const PRECOMPILE_TRACE_COMPILE = Ref{String}()
 function create_expr_cache(pkg::PkgId, input::String, output::String, output_o::Union{Nothing, String},
-                           concrete_deps::typeof(_concrete_dependencies), flags::Cmd=``, internal_stderr::IO = stderr, internal_stdout::IO = stdout)
+                           concrete_deps::typeof(_concrete_dependencies), flags::Cmd=``, internal_stderr::IO = stderr, internal_stdout::IO = stdout, isext::Bool=false)
     @nospecialize internal_stderr internal_stdout
     rm(output, force=true)   # Remove file if it exists
     output_o === nothing || rm(output_o, force=true)
@@ -2912,7 +2914,7 @@ function create_expr_cache(pkg::PkgId, input::String, output::String, output_o::
     write(io.in, """
         empty!(Base.EXT_DORMITORY) # If we have a custom sysimage with `EXT_DORMITORY` prepopulated
         Base.track_nested_precomp($precomp_stack)
-        Base.precompiling_extension = $(loading_extension)
+        Base.precompiling_extension = $(loading_extension | isext)
         Base.precompiling_package = true
         Base.include_package_for_output($(pkg_str(pkg)), $(repr(abspath(input))), $(repr(depot_path)), $(repr(dl_load_path)),
             $(repr(load_path)), $deps, $(repr(source_path(nothing))))
@@ -2970,18 +2972,18 @@ This can be used to reduce package load times. Cache files are stored in
 `DEPOT_PATH[1]/compiled`. See [Module initialization and precompilation](@ref)
 for important notes.
 """
-function compilecache(pkg::PkgId, internal_stderr::IO = stderr, internal_stdout::IO = stdout; flags::Cmd=``, reasons::Union{Dict{String,Int},Nothing}=Dict{String,Int}())
+function compilecache(pkg::PkgId, internal_stderr::IO = stderr, internal_stdout::IO = stdout; flags::Cmd=``, reasons::Union{Dict{String,Int},Nothing}=Dict{String,Int}(), isext::Bool=false)
     @nospecialize internal_stderr internal_stdout
     path = locate_package(pkg)
     path === nothing && throw(ArgumentError("$(repr("text/plain", pkg)) not found during precompilation"))
-    return compilecache(pkg, path, internal_stderr, internal_stdout; flags, reasons)
+    return compilecache(pkg, path, internal_stderr, internal_stdout; flags, reasons, isext)
 end
 
 const MAX_NUM_PRECOMPILE_FILES = Ref(10)
 
 function compilecache(pkg::PkgId, path::String, internal_stderr::IO = stderr, internal_stdout::IO = stdout,
                       keep_loaded_modules::Bool = true; flags::Cmd=``, cacheflags::CacheFlags=CacheFlags(),
-                      reasons::Union{Dict{String,Int},Nothing}=Dict{String,Int}())
+                      reasons::Union{Dict{String,Int},Nothing}=Dict{String,Int}(), isext::Bool=false)
 
     @nospecialize internal_stderr internal_stdout
     # decide where to put the resulting cache file
@@ -3021,7 +3023,7 @@ function compilecache(pkg::PkgId, path::String, internal_stderr::IO = stderr, in
             close(tmpio_o)
             close(tmpio_so)
         end
-        p = create_expr_cache(pkg, path, tmppath, tmppath_o, concrete_deps, flags, internal_stderr, internal_stdout)
+        p = create_expr_cache(pkg, path, tmppath, tmppath_o, concrete_deps, flags, internal_stderr, internal_stdout, isext)
 
         if success(p)
             if cache_objects
@@ -3163,16 +3165,9 @@ mutable struct CacheHeaderIncludes
     const modpath::Vector{String}   # seemingly not needed in Base, but used by Revise
 end
 
-function replace_depot_path(path::AbstractString)
-    for depot in DEPOT_PATH
-        !isdir(depot) && continue
-
-        # Strip extraneous pathseps through normalization.
-        if isdirpath(depot)
-            depot = dirname(depot)
-        end
-
-        if startswith(path, depot)
+function replace_depot_path(path::AbstractString, depots::Vector{String}=normalize_depots_for_relocation())
+    for depot in depots
+        if startswith(path, string(depot, Filesystem.pathsep())) || path == depot
             path = replace(path, depot => "@depot"; count=1)
             break
         end
@@ -3180,6 +3175,19 @@ function replace_depot_path(path::AbstractString)
     return path
 end
 
+function normalize_depots_for_relocation()
+    depots = String[]
+    sizehint!(depots, length(DEPOT_PATH))
+    for d in DEPOT_PATH
+        isdir(d) || continue
+        if isdirpath(d)
+            d = dirname(d)
+        end
+        push!(depots, abspath(d))
+    end
+    return depots
+end
+
 function restore_depot_path(path::AbstractString, depot::AbstractString)
     replace(path, r"^@depot" => depot; count=1)
 end
diff --git a/base/mpfr.jl b/base/mpfr.jl
index d393469aa26a1..9d1a0843ebe06 100644
--- a/base/mpfr.jl
+++ b/base/mpfr.jl
@@ -18,12 +18,10 @@ import
         setrounding, maxintfloat, widen, significand, frexp, tryparse, iszero,
         isone, big, _string_n, decompose, minmax, _precision_with_base_2,
         sinpi, cospi, sincospi, tanpi, sind, cosd, tand, asind, acosd, atand,
-        uinttype, exponent_max, exponent_min, ieee754_representation, significand_mask,
-        RawBigIntRoundingIncrementHelper, truncated, RawBigInt
-
+        uinttype, exponent_max, exponent_min, ieee754_representation, significand_mask
 
 using .Base.Libc
-import ..Rounding:
+import ..Rounding: Rounding,
     rounding_raw, setrounding_raw, rounds_to_nearest, rounds_away_from_zero,
     tie_breaker_is_to_even, correct_rounding_requires_increment
 
@@ -39,7 +37,6 @@ else
     const libmpfr = "libmpfr.so.6"
 end
 
-
 version() = VersionNumber(unsafe_string(ccall((:mpfr_get_version,libmpfr), Ptr{Cchar}, ())))
 patches() = split(unsafe_string(ccall((:mpfr_get_patches,libmpfr), Ptr{Cchar}, ())),' ')
 
@@ -120,44 +117,116 @@ const mpfr_special_exponent_zero = typemin(Clong) + true
 const mpfr_special_exponent_nan = mpfr_special_exponent_zero + true
 const mpfr_special_exponent_inf = mpfr_special_exponent_nan + true
 
+struct BigFloatLayout
+    prec::Clong
+    sign::Cint
+    exp::Clong
+    d::Ptr{Limb}
+    # possible padding
+    p::Limb # Tuple{Vararg{Limb}}
+end
+const offset_prec = fieldoffset(BigFloatLayout, 1) % Int
+const offset_sign = fieldoffset(BigFloatLayout, 2) % Int
+const offset_exp = fieldoffset(BigFloatLayout, 3) % Int
+const offset_d = fieldoffset(BigFloatLayout, 4) % Int
+const offset_p_limbs = ((fieldoffset(BigFloatLayout, 5) % Int + sizeof(Limb) - 1) ÷ sizeof(Limb))
+const offset_p = offset_p_limbs * sizeof(Limb)
+
 """
     BigFloat <: AbstractFloat
 
 Arbitrary precision floating point number type.
 """
-mutable struct BigFloat <: AbstractFloat
-    prec::Clong
-    sign::Cint
-    exp::Clong
-    d::Ptr{Limb}
-    # _d::Buffer{Limb} # Julia gc handle for memory @ d
-    _d::String # Julia gc handle for memory @ d (optimized)
+struct BigFloat <: AbstractFloat
+    d::Memory{Limb}
 
     # Not recommended for general use:
     # used internally by, e.g. deepcopy
-    global function _BigFloat(prec::Clong, sign::Cint, exp::Clong, d::String)
-        # ccall-based version, inlined below
-        #z = new(zero(Clong), zero(Cint), zero(Clong), C_NULL, d)
-        #ccall((:mpfr_custom_init,libmpfr), Cvoid, (Ptr{Limb}, Clong), d, prec) # currently seems to be a no-op in mpfr
-        #NAN_KIND = Cint(0)
-        #ccall((:mpfr_custom_init_set,libmpfr), Cvoid, (Ref{BigFloat}, Cint, Clong, Ptr{Limb}), z, NAN_KIND, prec, d)
-        #return z
-        return new(prec, sign, exp, pointer(d), d)
-    end
+    global _BigFloat(d::Memory{Limb}) = new(d)
 
     function BigFloat(; precision::Integer=_precision_with_base_2(BigFloat))
         precision < 1 && throw(DomainError(precision, "`precision` cannot be less than 1."))
         nb = ccall((:mpfr_custom_get_size,libmpfr), Csize_t, (Clong,), precision)
-        nb = (nb + Core.sizeof(Limb) - 1) ÷ Core.sizeof(Limb) # align to number of Limb allocations required for this
-        #d = Vector{Limb}(undef, nb)
-        d = _string_n(nb * Core.sizeof(Limb))
-        EXP_NAN = mpfr_special_exponent_nan
-        return _BigFloat(Clong(precision), one(Cint), EXP_NAN, d) # +NAN
+        nl = (nb + offset_p + sizeof(Limb) - 1) ÷ Core.sizeof(Limb) # align to number of Limb allocations required for this
+        d = Memory{Limb}(undef, nl % Int)
+        # ccall-based version, inlined below
+        z = _BigFloat(d) # initialize to +NAN
+        #ccall((:mpfr_custom_init,libmpfr), Cvoid, (Ptr{Limb}, Clong), BigFloatData(d), prec) # currently seems to be a no-op in mpfr
+        #NAN_KIND = Cint(0)
+        #ccall((:mpfr_custom_init_set,libmpfr), Cvoid, (Ref{BigFloat}, Cint, Clong, Ptr{Limb}), z, NAN_KIND, prec, BigFloatData(d))
+        z.prec = Clong(precision)
+        z.sign = one(Cint)
+        z.exp = mpfr_special_exponent_nan
+        return z
     end
 end
 
-# The rounding mode here shouldn't matter.
-significand_limb_count(x::BigFloat) = div(sizeof(x._d), sizeof(Limb), RoundToZero)
+"""
+Segment of raw words of bits interpreted as a big integer. Less
+significant words come first. Each word is in machine-native bit-order.
+"""
+struct BigFloatData{Limb}
+    d::Memory{Limb}
+end
+
+# BigFloat interface
+@inline function Base.getproperty(x::BigFloat, s::Symbol)
+    d = getfield(x, :d)
+    p = Base.unsafe_convert(Ptr{Limb}, d)
+    if s === :prec
+        return GC.@preserve d unsafe_load(Ptr{Clong}(p) + offset_prec)
+    elseif s === :sign
+        return GC.@preserve d unsafe_load(Ptr{Cint}(p) + offset_sign)
+    elseif s === :exp
+        return GC.@preserve d unsafe_load(Ptr{Clong}(p) + offset_exp)
+    elseif s === :d
+        return BigFloatData(d)
+    else
+        return throw(FieldError(typeof(x), s))
+    end
+end
+
+@inline function Base.setproperty!(x::BigFloat, s::Symbol, v)
+    d = getfield(x, :d)
+    p = Base.unsafe_convert(Ptr{Limb}, d)
+    if s === :prec
+        return GC.@preserve d unsafe_store!(Ptr{Clong}(p) + offset_prec, v)
+    elseif s === :sign
+        return GC.@preserve d unsafe_store!(Ptr{Cint}(p) + offset_sign, v)
+    elseif s === :exp
+        return GC.@preserve d unsafe_store!(Ptr{Clong}(p) + offset_exp, v)
+    #elseif s === :d # not mutable
+    else
+        return throw(FieldError(x, s))
+    end
+end
+
+# Ref interface: make sure the conversion to C is done properly
+Base.unsafe_convert(::Type{Ref{BigFloat}}, x::Ptr{BigFloat}) = error("not compatible with mpfr")
+Base.unsafe_convert(::Type{Ref{BigFloat}}, x::Ref{BigFloat}) = error("not compatible with mpfr")
+Base.cconvert(::Type{Ref{BigFloat}}, x::BigFloat) = x.d # BigFloatData is the Ref type for BigFloat
+function Base.unsafe_convert(::Type{Ref{BigFloat}}, x::BigFloatData)
+    d = getfield(x, :d)
+    p = Base.unsafe_convert(Ptr{Limb}, d)
+    GC.@preserve d unsafe_store!(Ptr{Ptr{Limb}}(p) + offset_d, p + offset_p, :monotonic) # :monotonic ensure that TSAN knows that this isn't a data race
+    return Ptr{BigFloat}(p)
+end
+Base.unsafe_convert(::Type{Ptr{Limb}}, fd::BigFloatData) = Base.unsafe_convert(Ptr{Limb}, getfield(fd, :d)) + offset_p
+function Base.setindex!(fd::BigFloatData, v, i)
+    d = getfield(fd, :d)
+    @boundscheck 1 <= i <= length(d) - offset_p_limbs || throw(BoundsError(fd, i))
+    @inbounds d[i + offset_p_limbs] = v
+    return fd
+end
+function Base.getindex(fd::BigFloatData, i)
+    d = getfield(fd, :d)
+    @boundscheck 1 <= i <= length(d) - offset_p_limbs || throw(BoundsError(fd, i))
+    @inbounds d[i + offset_p_limbs]
+end
+Base.length(fd::BigFloatData) = length(getfield(fd, :d)) - offset_p_limbs
+Base.copyto!(fd::BigFloatData, limbs) = copyto!(getfield(fd, :d), offset_p_limbs + 1, limbs) # for Random
+
+include("rawbigfloats.jl")
 
 rounding_raw(::Type{BigFloat}) = something(Base.ScopedValues.get(CURRENT_ROUNDING_MODE), ROUNDING_MODE[])
 setrounding_raw(::Type{BigFloat}, r::MPFRRoundingMode) = ROUNDING_MODE[]=r
@@ -165,24 +234,12 @@ function setrounding_raw(f::Function, ::Type{BigFloat}, r::MPFRRoundingMode)
     Base.ScopedValues.@with(CURRENT_ROUNDING_MODE => r, f())
 end
 
-
 rounding(::Type{BigFloat}) = convert(RoundingMode, rounding_raw(BigFloat))
 setrounding(::Type{BigFloat}, r::RoundingMode) = setrounding_raw(BigFloat, convert(MPFRRoundingMode, r))
 setrounding(f::Function, ::Type{BigFloat}, r::RoundingMode) =
     setrounding_raw(f, BigFloat, convert(MPFRRoundingMode, r))
 
 
-# overload the definition of unsafe_convert to ensure that `x.d` is assigned
-# it may have been dropped in the event that the BigFloat was serialized
-Base.unsafe_convert(::Type{Ref{BigFloat}}, x::Ptr{BigFloat}) = x
-@inline function Base.unsafe_convert(::Type{Ref{BigFloat}}, x::Ref{BigFloat})
-    x = x[]
-    if x.d == C_NULL
-        x.d = pointer(x._d)
-    end
-    return convert(Ptr{BigFloat}, Base.pointer_from_objref(x))
-end
-
 """
     BigFloat(x::Union{Real, AbstractString} [, rounding::RoundingMode=rounding(BigFloat)]; [precision::Integer=precision(BigFloat)])
 
@@ -283,17 +340,18 @@ function BigFloat(x::Float64, r::MPFRRoundingMode=rounding_raw(BigFloat); precis
     nlimbs = (precision + 8*Core.sizeof(Limb) - 1) ÷ (8*Core.sizeof(Limb))
 
     # Limb is a CLong which is a UInt32 on windows (thank M$) which makes this more complicated and slower.
+    zd = z.d
     if Limb === UInt64
         for i in 1:nlimbs-1
-            unsafe_store!(z.d, 0x0, i)
+            @inbounds setindex!(zd, 0x0, i)
         end
-        unsafe_store!(z.d, val, nlimbs)
+        @inbounds setindex!(zd, val, nlimbs)
     else
         for i in 1:nlimbs-2
-            unsafe_store!(z.d, 0x0, i)
+            @inbounds setindex!(zd, 0x0, i)
         end
-        unsafe_store!(z.d, val % UInt32, nlimbs-1)
-        unsafe_store!(z.d, (val >> 32) % UInt32, nlimbs)
+        @inbounds setindex!(zd, val % UInt32, nlimbs-1)
+        @inbounds setindex!(zd, (val >> 32) % UInt32, nlimbs)
     end
     z
 end
@@ -440,12 +498,12 @@ function to_ieee754(::Type{T}, x::BigFloat, rm) where {T<:AbstractFloat}
     ret_u = if is_regular & !rounds_to_inf & !rounds_to_zero
         if !exp_is_huge_p
             # significand
-            v = RawBigInt{Limb}(x._d, significand_limb_count(x))
+            v = x.d::BigFloatData
             len = max(ieee_precision + min(exp_diff, 0), 0)::Int
             signif = truncated(U, v, len) & significand_mask(T)
 
             # round up if necessary
-            rh = RawBigIntRoundingIncrementHelper(v, len)
+            rh = BigFloatDataRoundingIncrementHelper(v, len)
             incr = correct_rounding_requires_increment(rh, rm, sb)
 
             # exponent
@@ -1193,10 +1251,8 @@ set_emin!(x) = check_exponent_err(ccall((:mpfr_set_emin, libmpfr), Cint, (Clong,
 
 function Base.deepcopy_internal(x::BigFloat, stackdict::IdDict)
     get!(stackdict, x) do
-        # d = copy(x._d)
-        d = x._d
-        d′ = GC.@preserve d unsafe_string(pointer(d), sizeof(d)) # creates a definitely-new String
-        y = _BigFloat(x.prec, x.sign, x.exp, d′)
+        d′ = copy(getfield(x, :d))
+        y = _BigFloat(d′)
         #ccall((:mpfr_custom_move,libmpfr), Cvoid, (Ref{BigFloat}, Ptr{Limb}), y, d) # unnecessary
         return y
     end::BigFloat
@@ -1210,7 +1266,8 @@ function decompose(x::BigFloat)::Tuple{BigInt, Int, Int}
     s.size = cld(x.prec, 8*sizeof(Limb)) # limbs
     b = s.size * sizeof(Limb)            # bytes
     ccall((:__gmpz_realloc2, libgmp), Cvoid, (Ref{BigInt}, Culong), s, 8b) # bits
-    memcpy(s.d, x.d, b)
+    xd = x.d
+    GC.@preserve xd memcpy(s.d, Base.unsafe_convert(Ptr{Limb}, xd), b)
     s, x.exp - 8b, x.sign
 end
 
diff --git a/base/options.jl b/base/options.jl
index 41ce3c9e20909..f535c27d99122 100644
--- a/base/options.jl
+++ b/base/options.jl
@@ -34,6 +34,7 @@ struct JLOptions
     can_inline::Int8
     polly::Int8
     trace_compile::Ptr{UInt8}
+    trace_dispatch::Ptr{UInt8}
     fast_math::Int8
     worker::Int8
     cookie::Ptr{UInt8}
@@ -58,6 +59,7 @@ struct JLOptions
     permalloc_pkgimg::Int8
     heap_size_hint::UInt64
     trace_compile_timing::Int8
+    trim::Int8
 end
 
 # This runs early in the sysimage != is not defined yet
diff --git a/base/path.jl b/base/path.jl
index f6d3266d9738c..69c8d22c63c54 100644
--- a/base/path.jl
+++ b/base/path.jl
@@ -614,6 +614,11 @@ for f in (:isdirpath, :splitdir, :splitdrive, :splitext, :normpath, :abspath)
     @eval $f(path::AbstractString) = $f(String(path))
 end
 
+# RFC3986 Section 2.1
+percent_escape(s) = '%' * join(map(b -> uppercase(string(b, base=16)), codeunits(s)), '%')
+# RFC3986 Section 2.3
+encode_uri_component(s) = replace(s, r"[^A-Za-z0-9\-_.~/]+" => percent_escape)
+
 """
     uripath(path::AbstractString)
 
@@ -636,10 +641,6 @@ function uripath end
 
 @static if Sys.iswindows()
     function uripath(path::String)
-        percent_escape(s) = # RFC3986 Section 2.1
-            '%' * join(map(b -> uppercase(string(b, base=16)), codeunits(s)), '%')
-        encode_uri_component(s) = # RFC3986 Section 2.3
-            replace(s, r"[^A-Za-z0-9\-_.~/]+" => percent_escape)
         path = abspath(path)
         if startswith(path, "\\\\") # UNC path, RFC8089 Appendix E.3
             unixpath = join(eachsplit(path, path_separator_re, keepempty=false), '/')
@@ -653,10 +654,6 @@ function uripath end
     end
 else
     function uripath(path::String)
-        percent_escape(s) = # RFC3986 Section 2.1
-            '%' * join(map(b -> uppercase(string(b, base=16)), codeunits(s)), '%')
-        encode_uri_component(s) = # RFC3986 Section 2.3
-            replace(s, r"[^A-Za-z0-9\-_.~/]+" => percent_escape)
         localpath = join(eachsplit(abspath(path), path_separator_re, keepempty=false), '/')
         host = if ispath("/proc/sys/fs/binfmt_misc/WSLInterop") # WSL sigil
             distro = get(ENV, "WSL_DISTRO_NAME", "") # See <https://patrickwu.space/wslconf/>
diff --git a/base/precompilation.jl b/base/precompilation.jl
index d3f076633f386..b351ce67cfbad 100644
--- a/base/precompilation.jl
+++ b/base/precompilation.jl
@@ -435,51 +435,6 @@ function precompilepkgs(pkgs::Vector{String}=String[];
     # consider exts of direct deps to be direct deps so that errors are reported
     append!(direct_deps, keys(filter(d->last(d) in keys(env.project_deps), exts)))
 
-    # An extension effectively depends on another extension if it has all the the
-    # dependencies of that other extension
-    function expand_dependencies(depsmap)
-        function visit!(visited, node, all_deps)
-            if node in visited
-                return
-            end
-            push!(visited, node)
-            for dep in get(Set{Base.PkgId}, depsmap, node)
-                if !(dep in all_deps)
-                    push!(all_deps, dep)
-                    visit!(visited, dep, all_deps)
-                end
-            end
-        end
-
-        depsmap_transitive = Dict{Base.PkgId, Set{Base.PkgId}}()
-        for package in keys(depsmap)
-            # Initialize a set to keep track of all dependencies for 'package'
-            all_deps = Set{Base.PkgId}()
-            visited = Set{Base.PkgId}()
-            visit!(visited, package, all_deps)
-            # Update depsmap with the complete set of dependencies for 'package'
-            depsmap_transitive[package] = all_deps
-        end
-        return depsmap_transitive
-    end
-
-    depsmap_transitive = expand_dependencies(depsmap)
-
-    for (_, extensions_1) in pkg_exts_map
-        for extension_1 in extensions_1
-            deps_ext_1 = depsmap_transitive[extension_1]
-            for (_, extensions_2) in pkg_exts_map
-                for extension_2 in extensions_2
-                    extension_1 == extension_2 && continue
-                    deps_ext_2 = depsmap_transitive[extension_2]
-                    if issubset(deps_ext_2, deps_ext_1)
-                        push!(depsmap[extension_1], extension_2)
-                    end
-                end
-            end
-        end
-    end
-
     @debug "precompile: deps collected"
     # this loop must be run after the full depsmap has been populated
     for (pkg, pkg_exts) in pkg_exts_map
@@ -852,7 +807,7 @@ function precompilepkgs(pkgs::Vector{String}=String[];
                             t = @elapsed ret = precompile_pkgs_maybe_cachefile_lock(io, print_lock, fancyprint, pkg_config, pkgspidlocked, hascolor) do
                                 Base.with_logger(Base.NullLogger()) do
                                     # The false here means we ignore loaded modules, so precompile for a fresh session
-                                    Base.compilecache(pkg, sourcepath, std_pipe, std_pipe, false; flags, cacheflags)
+                                    Base.compilecache(pkg, sourcepath, std_pipe, std_pipe, false; flags, cacheflags, isext = haskey(exts, pkg))
                                 end
                             end
                             if ret isa Base.PrecompilableError
diff --git a/base/rawbigints.jl b/base/rawbigfloats.jl
similarity index 54%
rename from base/rawbigints.jl
rename to base/rawbigfloats.jl
index 6508bea05be0f..4377edfc463d8 100644
--- a/base/rawbigints.jl
+++ b/base/rawbigfloats.jl
@@ -1,34 +1,21 @@
 # This file is a part of Julia. License is MIT: https://julialang.org/license
 
-"""
-Segment of raw words of bits interpreted as a big integer. Less
-significant words come first. Each word is in machine-native bit-order.
-"""
-struct RawBigInt{T<:Unsigned}
-    d::String
-    word_count::Int
-
-    function RawBigInt{T}(d::String, word_count::Int) where {T<:Unsigned}
-        new{T}(d, word_count)
-    end
-end
+# Some operations on BigFloat can be done more directly by treating the data portion ("BigFloatData") as a BigInt
 
-elem_count(x::RawBigInt, ::Val{:words}) = x.word_count
+elem_count(x::BigFloatData, ::Val{:words}) = length(x)
 elem_count(x::Unsigned, ::Val{:bits}) = sizeof(x) * 8
-word_length(::RawBigInt{T}) where {T} = elem_count(zero(T), Val(:bits))
-elem_count(x::RawBigInt{T}, ::Val{:bits}) where {T} = word_length(x) * elem_count(x, Val(:words))
+word_length(::BigFloatData{T}) where {T} = elem_count(zero(T), Val(:bits))
+elem_count(x::BigFloatData{T}, ::Val{:bits}) where {T} = word_length(x) * elem_count(x, Val(:words))
 reversed_index(n::Int, i::Int) = n - i - 1
 reversed_index(x, i::Int, v::Val) = reversed_index(elem_count(x, v), i)::Int
-split_bit_index(x::RawBigInt, i::Int) = divrem(i, word_length(x), RoundToZero)
+split_bit_index(x::BigFloatData, i::Int) = divrem(i, word_length(x), RoundToZero)
 
 """
 `i` is the zero-based index of the wanted word in `x`, starting from
 the less significant words.
 """
-function get_elem(x::RawBigInt{T}, i::Int, ::Val{:words}, ::Val{:ascending}) where {T}
-    # `i` must be non-negative and less than `x.word_count`
-    d = x.d
-    (GC.@preserve d unsafe_load(Ptr{T}(pointer(d)), i + 1))::T
+function get_elem(x::BigFloatData{T}, i::Int, ::Val{:words}, ::Val{:ascending}) where {T}
+    @inbounds return x[i + 1]::T
 end
 
 function get_elem(x, i::Int, v::Val, ::Val{:descending})
@@ -36,9 +23,9 @@ function get_elem(x, i::Int, v::Val, ::Val{:descending})
     get_elem(x, j, v, Val(:ascending))
 end
 
-word_is_nonzero(x::RawBigInt, i::Int, v::Val) = !iszero(get_elem(x, i, Val(:words), v))
+word_is_nonzero(x::BigFloatData, i::Int, v::Val) = !iszero(get_elem(x, i, Val(:words), v))
 
-word_is_nonzero(x::RawBigInt, v::Val) = let x = x
+word_is_nonzero(x::BigFloatData, v::Val) = let x = x
     i -> word_is_nonzero(x, i, v)
 end
 
@@ -46,7 +33,7 @@ end
 Returns a `Bool` indicating whether the `len` least significant words
 of `x` are nonzero.
 """
-function tail_is_nonzero(x::RawBigInt, len::Int, ::Val{:words})
+function tail_is_nonzero(x::BigFloatData, len::Int, ::Val{:words})
     any(word_is_nonzero(x, Val(:ascending)), 0:(len - 1))
 end
 
@@ -54,7 +41,7 @@ end
 Returns a `Bool` indicating whether the `len` least significant bits of
 the `i`-th (zero-based index) word of `x` are nonzero.
 """
-function tail_is_nonzero(x::RawBigInt, len::Int, i::Int, ::Val{:word})
+function tail_is_nonzero(x::BigFloatData, len::Int, i::Int, ::Val{:word})
     !iszero(len) &&
     !iszero(get_elem(x, i, Val(:words), Val(:ascending)) << (word_length(x) - len))
 end
@@ -63,7 +50,7 @@ end
 Returns a `Bool` indicating whether the `len` least significant bits of
 `x` are nonzero.
 """
-function tail_is_nonzero(x::RawBigInt, len::Int, ::Val{:bits})
+function tail_is_nonzero(x::BigFloatData, len::Int, ::Val{:bits})
     if 0 < len
         word_count, bit_count_in_word = split_bit_index(x, len)
         tail_is_nonzero(x, bit_count_in_word, word_count, Val(:word)) ||
@@ -83,7 +70,7 @@ end
 """
 Returns a `Bool` that is the `i`-th (zero-based index) bit of `x`.
 """
-function get_elem(x::RawBigInt, i::Int, ::Val{:bits}, v::Val{:ascending})
+function get_elem(x::BigFloatData, i::Int, ::Val{:bits}, v::Val{:ascending})
     vb = Val(:bits)
     if 0 ≤ i < elem_count(x, vb)
         word_index, bit_index_in_word = split_bit_index(x, i)
@@ -96,38 +83,44 @@ end
 
 """
 Returns an integer of type `R`, consisting of the `len` most
-significant bits of `x`.
+significant bits of `x`. If there are less than `len` bits in `x`,
+the least significant bits are zeroed.
 """
-function truncated(::Type{R}, x::RawBigInt, len::Int) where {R<:Integer}
+function truncated(::Type{R}, x::BigFloatData, len::Int) where {R<:Integer}
     ret = zero(R)
     if 0 < len
         word_count, bit_count_in_word = split_bit_index(x, len)
         k = word_length(x)
         vals = (Val(:words), Val(:descending))
+        lenx = elem_count(x, first(vals))
 
         for w ∈ 0:(word_count - 1)
             ret <<= k
-            word = get_elem(x, w, vals...)
-            ret |= R(word)
+            if w < lenx # if the output type is larger, truncate turns into zero-extend
+                word = get_elem(x, w, vals...)
+                ret |= R(word)
+            end
         end
 
         if !iszero(bit_count_in_word)
             ret <<= bit_count_in_word
-            wrd = get_elem(x, word_count, vals...)
-            ret |= R(wrd >>> (k - bit_count_in_word))
+            if word_count < lenx # if the output type is larger, truncate turns into zero-extend
+                wrd = get_elem(x, word_count, vals...)
+                ret |= R(wrd >>> (k - bit_count_in_word))
+            end
         end
     end
     ret::R
 end
 
-struct RawBigIntRoundingIncrementHelper{T<:Unsigned}
-    n::RawBigInt{T}
+struct BigFloatDataRoundingIncrementHelper{T<:Unsigned}
+    n::BigFloatData{T}
     trunc_len::Int
 
     final_bit::Bool
     round_bit::Bool
 
-    function RawBigIntRoundingIncrementHelper{T}(n::RawBigInt{T}, len::Int) where {T<:Unsigned}
+    function BigFloatDataRoundingIncrementHelper{T}(n::BigFloatData{T}, len::Int) where {T<:Unsigned}
         vals = (Val(:bits), Val(:descending))
         f = get_elem(n, len - 1, vals...)
         r = get_elem(n, len    , vals...)
@@ -135,15 +128,15 @@ struct RawBigIntRoundingIncrementHelper{T<:Unsigned}
     end
 end
 
-function RawBigIntRoundingIncrementHelper(n::RawBigInt{T}, len::Int) where {T<:Unsigned}
-    RawBigIntRoundingIncrementHelper{T}(n, len)
+function BigFloatDataRoundingIncrementHelper(n::BigFloatData{T}, len::Int) where {T<:Unsigned}
+    BigFloatDataRoundingIncrementHelper{T}(n, len)
 end
 
-(h::RawBigIntRoundingIncrementHelper)(::Rounding.FinalBit) = h.final_bit
+(h::BigFloatDataRoundingIncrementHelper)(::Rounding.FinalBit) = h.final_bit
 
-(h::RawBigIntRoundingIncrementHelper)(::Rounding.RoundBit) = h.round_bit
+(h::BigFloatDataRoundingIncrementHelper)(::Rounding.RoundBit) = h.round_bit
 
-function (h::RawBigIntRoundingIncrementHelper)(::Rounding.StickyBit)
+function (h::BigFloatDataRoundingIncrementHelper)(::Rounding.StickyBit)
     v = Val(:bits)
     n = h.n
     tail_is_nonzero(n, elem_count(n, v) - h.trunc_len - 1, v)
diff --git a/base/reflection.jl b/base/reflection.jl
index 5b395efc58190..80eeb4c4efb12 100644
--- a/base/reflection.jl
+++ b/base/reflection.jl
@@ -76,6 +76,17 @@ function fullname(m::Module)
     return (fullname(mp)..., mn)
 end
 
+"""
+    moduleloc(m::Module) -> LineNumberNode
+
+Get the location of the `module` definition.
+"""
+function moduleloc(m::Module)
+    line = Ref{Int32}(0)
+    file = ccall(:jl_module_getloc, Ref{Symbol}, (Any, Ref{Int32}), m, line)
+    return LineNumberNode(Int(line[]), file)
+end
+
 """
     names(x::Module; all::Bool=false, imported::Bool=false, usings::Bool=false) -> Vector{Symbol}
 
@@ -964,7 +975,7 @@ use it in the following manner to summarize information about a struct:
 julia> structinfo(T) = [(fieldoffset(T,i), fieldname(T,i), fieldtype(T,i)) for i = 1:fieldcount(T)];
 
 julia> structinfo(Base.Filesystem.StatStruct)
-13-element Vector{Tuple{UInt64, Symbol, Type}}:
+14-element Vector{Tuple{UInt64, Symbol, Type}}:
  (0x0000000000000000, :desc, Union{RawFD, String})
  (0x0000000000000008, :device, UInt64)
  (0x0000000000000010, :inode, UInt64)
@@ -978,6 +989,7 @@ julia> structinfo(Base.Filesystem.StatStruct)
  (0x0000000000000050, :blocks, Int64)
  (0x0000000000000058, :mtime, Float64)
  (0x0000000000000060, :ctime, Float64)
+ (0x0000000000000068, :ioerrno, Int32)
 ```
 """
 fieldoffset(x::DataType, idx::Integer) = (@_foldable_meta; ccall(:jl_get_field_offset, Csize_t, (Any, Cint), x, idx))
@@ -1504,6 +1516,13 @@ struct CodegenParams
     """
     use_jlplt::Cint
 
+    """
+    If enabled, only provably reachable code (from functions marked with `entrypoint`) is included
+    in the output system image. Errors or warnings can be given for call sites too dynamic to handle.
+    The option is disabled by default. (0=>disabled, 1=>safe (static errors), 2=>unsafe, 3=>unsafe plus warnings)
+    """
+    trim::Cint
+
     """
     A pointer of type
 
@@ -1519,14 +1538,14 @@ struct CodegenParams
                    prefer_specsig::Bool=false,
                    gnu_pubnames::Bool=true, debug_info_kind::Cint = default_debug_info_kind(),
                    debug_info_level::Cint = Cint(JLOptions().debug_level), safepoint_on_entry::Bool=true,
-                   gcstack_arg::Bool=true, use_jlplt::Bool=true,
+                   gcstack_arg::Bool=true, use_jlplt::Bool=true, trim::Cint=Cint(0),
                    lookup::Ptr{Cvoid}=unsafe_load(cglobal(:jl_rettype_inferred_addr, Ptr{Cvoid})))
         return new(
             Cint(track_allocations), Cint(code_coverage),
             Cint(prefer_specsig),
             Cint(gnu_pubnames), debug_info_kind,
             debug_info_level, Cint(safepoint_on_entry),
-            Cint(gcstack_arg), Cint(use_jlplt),
+            Cint(gcstack_arg), Cint(use_jlplt), Cint(trim),
             lookup)
     end
 end
@@ -2440,7 +2459,7 @@ true
 ```
 """
 function hasmethod(@nospecialize(f), @nospecialize(t))
-    return Core._hasmethod(f, t isa Type ? t : to_tuple_type(t))
+    return Core._hasmethod(signature_type(f, t))
 end
 
 function Core.kwcall(kwargs::NamedTuple, ::typeof(hasmethod), @nospecialize(f), @nospecialize(t))
diff --git a/base/stat.jl b/base/stat.jl
index 506b5644dccbc..c6fb239a96404 100644
--- a/base/stat.jl
+++ b/base/stat.jl
@@ -63,6 +63,7 @@ struct StatStruct
     blocks  :: Int64
     mtime   :: Float64
     ctime   :: Float64
+    ioerrno :: Int32
 end
 
 @eval function Base.:(==)(x::StatStruct, y::StatStruct) # do not include `desc` in equality or hash
@@ -80,22 +81,23 @@ end
     end)
 end
 
-StatStruct() = StatStruct("", 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0)
-StatStruct(buf::Union{Vector{UInt8},Ptr{UInt8}}) = StatStruct("", buf)
-StatStruct(desc::Union{AbstractString, OS_HANDLE}, buf::Union{Vector{UInt8},Ptr{UInt8}}) = StatStruct(
+StatStruct() = StatStruct("", 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, Base.UV_ENOENT)
+StatStruct(buf::Union{Memory{UInt8},Vector{UInt8},Ptr{UInt8}}, ioerrno::Int32) = StatStruct("", buf, ioerrno)
+StatStruct(desc::Union{AbstractString, OS_HANDLE}, buf::Union{Memory{UInt8},Vector{UInt8},Ptr{UInt8}}, ioerrno::Int32) = StatStruct(
     desc isa OS_HANDLE ? desc : String(desc),
-    ccall(:jl_stat_dev,     UInt32,  (Ptr{UInt8},), buf),
-    ccall(:jl_stat_ino,     UInt32,  (Ptr{UInt8},), buf),
-    ccall(:jl_stat_mode,    UInt32,  (Ptr{UInt8},), buf),
-    ccall(:jl_stat_nlink,   UInt32,  (Ptr{UInt8},), buf),
-    ccall(:jl_stat_uid,     UInt32,  (Ptr{UInt8},), buf),
-    ccall(:jl_stat_gid,     UInt32,  (Ptr{UInt8},), buf),
-    ccall(:jl_stat_rdev,    UInt32,  (Ptr{UInt8},), buf),
-    ccall(:jl_stat_size,    UInt64,  (Ptr{UInt8},), buf),
-    ccall(:jl_stat_blksize, UInt64,  (Ptr{UInt8},), buf),
-    ccall(:jl_stat_blocks,  UInt64,  (Ptr{UInt8},), buf),
-    ccall(:jl_stat_mtime,   Float64, (Ptr{UInt8},), buf),
-    ccall(:jl_stat_ctime,   Float64, (Ptr{UInt8},), buf),
+    ioerrno != 0 ? zero(UInt32) : ccall(:jl_stat_dev,     UInt32,  (Ptr{UInt8},), buf),
+    ioerrno != 0 ? zero(UInt32) : ccall(:jl_stat_ino,     UInt32,  (Ptr{UInt8},), buf),
+    ioerrno != 0 ? zero(UInt32) : ccall(:jl_stat_mode,    UInt32,  (Ptr{UInt8},), buf),
+    ioerrno != 0 ? zero(UInt32) : ccall(:jl_stat_nlink,   UInt32,  (Ptr{UInt8},), buf),
+    ioerrno != 0 ? zero(UInt32) : ccall(:jl_stat_uid,     UInt32,  (Ptr{UInt8},), buf),
+    ioerrno != 0 ? zero(UInt32) : ccall(:jl_stat_gid,     UInt32,  (Ptr{UInt8},), buf),
+    ioerrno != 0 ? zero(UInt32) : ccall(:jl_stat_rdev,    UInt32,  (Ptr{UInt8},), buf),
+    ioerrno != 0 ? zero(UInt64) : ccall(:jl_stat_size,    UInt64,  (Ptr{UInt8},), buf),
+    ioerrno != 0 ? zero(UInt64) : ccall(:jl_stat_blksize, UInt64,  (Ptr{UInt8},), buf),
+    ioerrno != 0 ? zero(UInt64) : ccall(:jl_stat_blocks,  UInt64,  (Ptr{UInt8},), buf),
+    ioerrno != 0 ? zero(Float64) : ccall(:jl_stat_mtime,   Float64, (Ptr{UInt8},), buf),
+    ioerrno != 0 ? zero(Float64) : ccall(:jl_stat_ctime,   Float64, (Ptr{UInt8},), buf),
+    ioerrno
 )
 
 function iso_datetime_with_relative(t, tnow)
@@ -130,35 +132,41 @@ end
 function show_statstruct(io::IO, st::StatStruct, oneline::Bool)
     print(io, oneline ? "StatStruct(" : "StatStruct for ")
     show(io, st.desc)
-    oneline || print(io, "\n  ")
-    print(io, " size: ", st.size, " bytes")
-    oneline || print(io, "\n")
-    print(io, " device: ", st.device)
-    oneline || print(io, "\n ")
-    print(io, " inode: ", st.inode)
-    oneline || print(io, "\n  ")
-    print(io, " mode: 0o", string(filemode(st), base = 8, pad = 6), " (", filemode_string(st), ")")
-    oneline || print(io, "\n ")
-    print(io, " nlink: ", st.nlink)
-    oneline || print(io, "\n   ")
-    print(io, " uid: $(st.uid)")
-    username = getusername(st.uid)
-    username === nothing || print(io, " (", username, ")")
-    oneline || print(io, "\n   ")
-    print(io, " gid: ", st.gid)
-    groupname = getgroupname(st.gid)
-    groupname === nothing || print(io, " (", groupname, ")")
-    oneline || print(io, "\n  ")
-    print(io, " rdev: ", st.rdev)
-    oneline || print(io, "\n ")
-    print(io, " blksz: ", st.blksize)
-    oneline || print(io, "\n")
-    print(io, " blocks: ", st.blocks)
-    tnow = round(UInt, time())
-    oneline || print(io, "\n ")
-    print(io, " mtime: ", iso_datetime_with_relative(st.mtime, tnow))
-    oneline || print(io, "\n ")
-    print(io, " ctime: ", iso_datetime_with_relative(st.ctime, tnow))
+    code = st.ioerrno
+    if code != 0
+        print(io, oneline ? " " : "\n ")
+        print(io, Base.uverrorname(code), ": ", Base.struverror(code))
+    else
+        oneline || print(io, "\n  ")
+        print(io, " size: ", st.size, " bytes")
+        oneline || print(io, "\n")
+        print(io, " device: ", st.device)
+        oneline || print(io, "\n ")
+        print(io, " inode: ", st.inode)
+        oneline || print(io, "\n  ")
+        print(io, " mode: 0o", string(filemode(st), base = 8, pad = 6), " (", filemode_string(st), ")")
+        oneline || print(io, "\n ")
+        print(io, " nlink: ", st.nlink)
+        oneline || print(io, "\n   ")
+        print(io, " uid: $(st.uid)")
+        username = getusername(st.uid)
+        username === nothing || print(io, " (", username, ")")
+        oneline || print(io, "\n   ")
+        print(io, " gid: ", st.gid)
+        groupname = getgroupname(st.gid)
+        groupname === nothing || print(io, " (", groupname, ")")
+        oneline || print(io, "\n  ")
+        print(io, " rdev: ", st.rdev)
+        oneline || print(io, "\n ")
+        print(io, " blksz: ", st.blksize)
+        oneline || print(io, "\n")
+        print(io, " blocks: ", st.blocks)
+        tnow = round(UInt, time())
+        oneline || print(io, "\n ")
+        print(io, " mtime: ", iso_datetime_with_relative(st.mtime, tnow))
+        oneline || print(io, "\n ")
+        print(io, " ctime: ", iso_datetime_with_relative(st.ctime, tnow))
+    end
     oneline && print(io, ")")
     return nothing
 end
@@ -168,18 +176,13 @@ show(io::IO, ::MIME"text/plain", st::StatStruct) = show_statstruct(io, st, false
 
 # stat & lstat functions
 
+checkstat(s::StatStruct) = Int(s.ioerrno) in (0, Base.UV_ENOENT, Base.UV_ENOTDIR, Base.UV_EINVAL) ? s : uv_error(string("stat(", repr(s.desc), ")"), s.ioerrno)
+
 macro stat_call(sym, arg1type, arg)
     return quote
-        stat_buf = zeros(UInt8, Int(ccall(:jl_sizeof_stat, Int32, ())))
+        stat_buf = fill!(Memory{UInt8}(undef, Int(ccall(:jl_sizeof_stat, Int32, ()))), 0x00)
         r = ccall($(Expr(:quote, sym)), Int32, ($(esc(arg1type)), Ptr{UInt8}), $(esc(arg)), stat_buf)
-        if !(r in (0, Base.UV_ENOENT, Base.UV_ENOTDIR, Base.UV_EINVAL))
-            uv_error(string("stat(", repr($(esc(arg))), ")"), r)
-        end
-        st = StatStruct($(esc(arg)), stat_buf)
-        if ispath(st) != (r == 0)
-            error("stat returned zero type for a valid path")
-        end
-        return st
+        return checkstat(StatStruct($(esc(arg)), stat_buf, r))
     end
 end
 
@@ -334,7 +337,7 @@ Return `true` if a valid filesystem entity exists at `path`,
 otherwise returns `false`.
 This is the generalization of [`isfile`](@ref), [`isdir`](@ref) etc.
 """
-ispath(st::StatStruct) = filemode(st) & 0xf000 != 0x0000
+ispath(st::StatStruct) = st.ioerrno == 0
 function ispath(path::String)
     # We use `access()` and `F_OK` to determine if a given path exists. `F_OK` comes from `unistd.h`.
     F_OK = 0x00
diff --git a/base/strings/io.jl b/base/strings/io.jl
index c78e3e2e043b6..df34712b519d5 100644
--- a/base/strings/io.jl
+++ b/base/strings/io.jl
@@ -42,13 +42,17 @@ end
 function print(io::IO, xs...)
     lock(io)
     try
-        foreach(Fix1(print, io), xs)
+        for x in xs
+            print(io, x)
+        end
     finally
         unlock(io)
     end
     return nothing
 end
 
+setfield!(typeof(print).name.mt, :max_args, 10, :monotonic)
+
 """
     println([io::IO], xs...)
 
@@ -72,6 +76,7 @@ julia> String(take!(io))
 """
 println(io::IO, xs...) = print(io, xs..., "\n")
 
+setfield!(typeof(println).name.mt, :max_args, 10, :monotonic)
 ## conversion of general objects to strings ##
 
 """
@@ -136,20 +141,33 @@ function print_to_string(xs...)
     if isempty(xs)
         return ""
     end
-    siz = sum(_str_sizehint, xs; init = 0)
+    siz::Int = 0
+    for x in xs
+        siz += _str_sizehint(x)
+    end
+    # specialized for performance reasons
     s = IOBuffer(sizehint=siz)
-    print(s, xs...)
+    for x in xs
+        print(s, x)
+    end
     String(_unsafe_take!(s))
 end
+setfield!(typeof(print_to_string).name.mt, :max_args, 10, :monotonic)
 
 function string_with_env(env, xs...)
     if isempty(xs)
         return ""
     end
-    siz = sum(_str_sizehint, xs; init = 0)
+    siz::Int = 0
+    for x in xs
+        siz += _str_sizehint(x)
+    end
+    # specialized for performance reasons
     s = IOBuffer(sizehint=siz)
     env_io = IOContext(s, env)
-    print(env_io, xs...)
+    for x in xs
+        print(env_io, x)
+    end
     String(_unsafe_take!(s))
 end
 
diff --git a/base/terminfo.jl b/base/terminfo.jl
index 79713f4a86aa3..8ea8387077d36 100644
--- a/base/terminfo.jl
+++ b/base/terminfo.jl
@@ -262,7 +262,7 @@ function find_terminfo_file(term::String)
         append!(terminfo_dirs,
                 replace(split(ENV["TERMINFO_DIRS"], ':'),
                         "" => "/usr/share/terminfo"))
-    push!(terminfo_dirs, normpath(Sys.BINDIR, DATAROOTDIR, "terminfo"))
+    push!(terminfo_dirs, normpath(Sys.BINDIR, DATAROOTDIR, "julia", "terminfo"))
     Sys.isunix() &&
         push!(terminfo_dirs, "/etc/terminfo", "/lib/terminfo", "/usr/share/terminfo")
     for dir in terminfo_dirs
diff --git a/base/timing.jl b/base/timing.jl
index 80ebb74abee26..b094aa230e1c2 100644
--- a/base/timing.jl
+++ b/base/timing.jl
@@ -104,6 +104,33 @@ function gc_page_utilization_data()
     return Base.unsafe_wrap(Array, page_utilization_raw, JL_GC_N_MAX_POOLS, own=false)
 end
 
+# must be kept in sync with `src/gc-stock.h``
+const FULL_SWEEP_REASONS = [:FULL_SWEEP_REASON_SWEEP_ALWAYS_FULL, :FULL_SWEEP_REASON_FORCED_FULL_SWEEP,
+                            :FULL_SWEEP_REASON_USER_MAX_EXCEEDED, :FULL_SWEEP_REASON_LARGE_PROMOTION_RATE]
+
+"""
+    Base.full_sweep_reasons()
+
+Return a dictionary of the number of times each full sweep reason has occurred.
+
+The reasons are:
+- `:FULL_SWEEP_REASON_SWEEP_ALWAYS_FULL`: Full sweep was caused due to `always_full` being set in the GC debug environment
+- `:FULL_SWEEP_REASON_FORCED_FULL_SWEEP`: Full sweep was forced by `GC.gc(true)`
+- `:FULL_SWEEP_REASON_USER_MAX_EXCEEDED`: Full sweep was forced due to the system reaching the heap soft size limit
+- `:FULL_SWEEP_REASON_LARGE_PROMOTION_RATE`: Full sweep was forced by a large promotion rate across GC generations
+
+Note that the set of reasons is not guaranteed to be stable across minor versions of Julia.
+"""
+function full_sweep_reasons()
+    reason = cglobal(:jl_full_sweep_reasons, UInt64)
+    reasons_as_array = Base.unsafe_wrap(Vector{UInt64}, reason, length(FULL_SWEEP_REASONS), own=false)
+    d = Dict{Symbol, Int64}()
+    for (i, r) in enumerate(FULL_SWEEP_REASONS)
+        d[r] = reasons_as_array[i]
+    end
+    return d
+end
+
 """
     Base.jit_total_bytes()
 
@@ -179,7 +206,7 @@ function time_print(io::IO, elapsedtime, bytes=0, gctime=0, allocs=0, lock_confl
             print(io, length(timestr) < 10 ? (" "^(10 - length(timestr))) : "")
         end
         print(io, timestr, " seconds")
-        parens = bytes != 0 || allocs != 0 || gctime > 0 || compile_time > 0
+        parens = bytes != 0 || allocs != 0 || gctime > 0 || lock_conflicts > 0 || compile_time > 0
         parens && print(io, " (")
         if bytes != 0 || allocs != 0
             allocs, ma = prettyprint_getunits(allocs, length(_cnt_units), Int64(1000))
@@ -197,11 +224,14 @@ function time_print(io::IO, elapsedtime, bytes=0, gctime=0, allocs=0, lock_confl
             print(io, Ryu.writefixed(Float64(100*gctime/elapsedtime), 2), "% gc time")
         end
         if lock_conflicts > 0
+            if bytes != 0 || allocs != 0 || gctime > 0
+                print(io, ", ")
+            end
             plural = lock_conflicts == 1 ? "" : "s"
-            print(io, ", ", lock_conflicts, " lock conflict$plural")
+            print(io, lock_conflicts, " lock conflict$plural")
         end
         if compile_time > 0
-            if bytes != 0 || allocs != 0 || gctime > 0
+            if bytes != 0 || allocs != 0 || gctime > 0 || lock_conflicts > 0
                 print(io, ", ")
             end
             print(io, Ryu.writefixed(Float64(100*compile_time/elapsedtime), 2), "% compilation time")
diff --git a/contrib/generate_precompile.jl b/contrib/generate_precompile.jl
index d3e73a1b1865a..60f7290c7a0ac 100644
--- a/contrib/generate_precompile.jl
+++ b/contrib/generate_precompile.jl
@@ -347,8 +347,7 @@ generate_precompile_statements() = try # Make sure `ansi_enablecursor` is printe
         print_state("step1" => "F$n_step1")
         return :ok
     end
-    Base.errormonitor(step1)
-    !PARALLEL_PRECOMPILATION && wait(step1)
+    PARALLEL_PRECOMPILATION ? bind(statements_step1, step1) : wait(step1)
 
     # Create a staging area where all the loaded packages are available
     PrecompileStagingArea = Module()
@@ -362,7 +361,7 @@ generate_precompile_statements() = try # Make sure `ansi_enablecursor` is printe
     # Make statements unique
     statements = Set{String}()
     # Execute the precompile statements
-    for sts in [statements_step1,], statement in sts
+    for statement in statements_step1
         # Main should be completely clean
         occursin("Main.", statement) && continue
         Base.in!(statement, statements) && continue
@@ -398,6 +397,7 @@ generate_precompile_statements() = try # Make sure `ansi_enablecursor` is printe
     println()
     # Seems like a reasonable number right now, adjust as needed
     # comment out if debugging script
+    have_repl = false
     n_succeeded > (have_repl ? 650 : 90) || @warn "Only $n_succeeded precompile statements"
 
     fetch(step1) == :ok || throw("Step 1 of collecting precompiles failed.")
@@ -408,7 +408,6 @@ generate_precompile_statements() = try # Make sure `ansi_enablecursor` is printe
 finally
     fancyprint && print(ansi_enablecursor)
     GC.gc(true); GC.gc(false); # reduce memory footprint
-    return
 end
 
 generate_precompile_statements()
diff --git a/contrib/julia-config.jl b/contrib/julia-config.jl
index c692b3f522fb2..8b1eb55cbe4f4 100755
--- a/contrib/julia-config.jl
+++ b/contrib/julia-config.jl
@@ -67,7 +67,7 @@ function ldlibs(doframework)
         "julia"
     end
     if Sys.isunix()
-        return "-Wl,-rpath,$(shell_escape(libDir())) -Wl,-rpath,$(shell_escape(private_libDir())) -l$libname"
+        return "-L$(shell_escape(private_libDir())) -Wl,-rpath,$(shell_escape(libDir())) -Wl,-rpath,$(shell_escape(private_libDir())) -l$libname"
     else
         return "-l$libname -lopenlibm"
     end
diff --git a/contrib/juliac-buildscript.jl b/contrib/juliac-buildscript.jl
new file mode 100644
index 0000000000000..50f96198c416b
--- /dev/null
+++ b/contrib/juliac-buildscript.jl
@@ -0,0 +1,277 @@
+# Script to run in the process that generates juliac's object file output
+
+inputfile = ARGS[1]
+output_type = ARGS[2]
+add_ccallables = ARGS[3] == "true"
+
+# Initialize some things not usually initialized when output is requested
+Sys.__init__()
+Base.init_depot_path()
+Base.init_load_path()
+Base.init_active_project()
+task = current_task()
+task.rngState0 = 0x5156087469e170ab
+task.rngState1 = 0x7431eaead385992c
+task.rngState2 = 0x503e1d32781c2608
+task.rngState3 = 0x3a77f7189200c20b
+task.rngState4 = 0x5502376d099035ae
+uuid_tuple = (UInt64(0), UInt64(0))
+ccall(:jl_set_module_uuid, Cvoid, (Any, NTuple{2, UInt64}), Base.__toplevel__, uuid_tuple)
+ccall(:jl_set_newly_inferred, Cvoid, (Any,), Core.Compiler.newly_inferred)
+
+# Patch methods in Core and Base
+
+@eval Core begin
+    DomainError(@nospecialize(val), @nospecialize(msg::AbstractString)) = (@noinline; $(Expr(:new, :DomainError, :val, :msg)))
+end
+
+(f::Base.RedirectStdStream)(io::Core.CoreSTDOUT) = Base._redirect_io_global(io, f.unix_fd)
+
+@eval Base begin
+    _assert_tostring(msg) = ""
+    reinit_stdio() = nothing
+    JuliaSyntax.enable_in_core!() = nothing
+    init_active_project() = ACTIVE_PROJECT[] = nothing
+    set_active_project(projfile::Union{AbstractString,Nothing}) = ACTIVE_PROJECT[] = projfile
+    disable_library_threading() = nothing
+    start_profile_listener() = nothing
+    @inline function invokelatest(f::F, args...; kwargs...) where F
+        return f(args...; kwargs...)
+    end
+    function sprint(f::F, args::Vararg{Any,N}; context=nothing, sizehint::Integer=0) where {F<:Function,N}
+        s = IOBuffer(sizehint=sizehint)
+        if context isa Tuple
+            f(IOContext(s, context...), args...)
+        elseif context !== nothing
+            f(IOContext(s, context), args...)
+        else
+            f(s, args...)
+        end
+        String(_unsafe_take!(s))
+    end
+    function show_typeish(io::IO, @nospecialize(T))
+        if T isa Type
+            show(io, T)
+        elseif T isa TypeVar
+            print(io, (T::TypeVar).name)
+        else
+            print(io, "?")
+        end
+    end
+    function show(io::IO, T::Type)
+        if T isa DataType
+            print(io, T.name.name)
+            if T !== T.name.wrapper && length(T.parameters) > 0
+                print(io, "{")
+                first = true
+                for p in T.parameters
+                    if !first
+                        print(io, ", ")
+                    end
+                    first = false
+                    if p isa Int
+                        show(io, p)
+                    elseif p isa Type
+                        show(io, p)
+                    elseif p isa Symbol
+                        print(io, ":")
+                        print(io, p)
+                    elseif p isa TypeVar
+                        print(io, p.name)
+                    else
+                        print(io, "?")
+                    end
+                end
+                print(io, "}")
+            end
+        elseif T isa Union
+            print(io, "Union{")
+            show_typeish(io, T.a)
+            print(io, ", ")
+            show_typeish(io, T.b)
+            print(io, "}")
+        elseif T isa UnionAll
+            print(io, T.body::Type)
+            print(io, " where ")
+            print(io, T.var.name)
+        end
+    end
+    show_type_name(io::IO, tn::Core.TypeName) = print(io, tn.name)
+
+    mapreduce(f::F, op::F2, A::AbstractArrayOrBroadcasted; dims=:, init=_InitialValue()) where {F, F2} =
+    _mapreduce_dim(f, op, init, A, dims)
+    mapreduce(f::F, op::F2, A::AbstractArrayOrBroadcasted...; kw...) where {F, F2} =
+        reduce(op, map(f, A...); kw...)
+
+    _mapreduce_dim(f::F, op::F2, nt, A::AbstractArrayOrBroadcasted, ::Colon) where {F, F2} =
+        mapfoldl_impl(f, op, nt, A)
+
+    _mapreduce_dim(f::F, op::F2, ::_InitialValue, A::AbstractArrayOrBroadcasted, ::Colon) where {F, F2} =
+        _mapreduce(f, op, IndexStyle(A), A)
+
+    _mapreduce_dim(f::F, op::F2, nt, A::AbstractArrayOrBroadcasted, dims) where {F, F2} =
+        mapreducedim!(f, op, reducedim_initarray(A, dims, nt), A)
+
+    _mapreduce_dim(f::F, op::F2, ::_InitialValue, A::AbstractArrayOrBroadcasted, dims) where {F,F2} =
+        mapreducedim!(f, op, reducedim_init(f, op, A, dims), A)
+
+    mapreduce_empty_iter(f::F, op::F2, itr, ItrEltype) where {F, F2} =
+        reduce_empty_iter(MappingRF(f, op), itr, ItrEltype)
+        mapreduce_first(f::F, op::F2, x) where {F,F2} = reduce_first(op, f(x))
+
+    _mapreduce(f::F, op::F2, A::AbstractArrayOrBroadcasted) where {F,F2} = _mapreduce(f, op, IndexStyle(A), A)
+    mapreduce_empty(::typeof(identity), op::F, T) where {F} = reduce_empty(op, T)
+    mapreduce_empty(::typeof(abs), op::F, T) where {F}     = abs(reduce_empty(op, T))
+    mapreduce_empty(::typeof(abs2), op::F, T) where {F}    = abs2(reduce_empty(op, T))
+end
+@eval Base.Unicode begin
+    function utf8proc_map(str::Union{String,SubString{String}}, options::Integer, chartransform::F = identity) where F
+        nwords = utf8proc_decompose(str, options, C_NULL, 0, chartransform)
+        buffer = Base.StringVector(nwords*4)
+        nwords = utf8proc_decompose(str, options, buffer, nwords, chartransform)
+        nbytes = ccall(:utf8proc_reencode, Int, (Ptr{UInt8}, Int, Cint), buffer, nwords, options)
+        nbytes < 0 && utf8proc_error(nbytes)
+        return String(resize!(buffer, nbytes))
+    end
+end
+@eval Base.GMP begin
+    function __init__()
+        try
+            ccall((:__gmp_set_memory_functions, libgmp), Cvoid,
+                (Ptr{Cvoid},Ptr{Cvoid},Ptr{Cvoid}),
+                cglobal(:jl_gc_counted_malloc),
+                cglobal(:jl_gc_counted_realloc_with_old_size),
+                cglobal(:jl_gc_counted_free_with_size))
+            ZERO.alloc, ZERO.size, ZERO.d = 0, 0, C_NULL
+            ONE.alloc, ONE.size, ONE.d = 1, 1, pointer(_ONE)
+        catch ex
+            Base.showerror_nostdio(ex, "WARNING: Error during initialization of module GMP")
+        end
+        # This only works with a patched version of GMP, ignore otherwise
+        try
+            ccall((:__gmp_set_alloc_overflow_function, libgmp), Cvoid,
+                (Ptr{Cvoid},),
+                cglobal(:jl_throw_out_of_memory_error))
+            ALLOC_OVERFLOW_FUNCTION[] = true
+        catch ex
+            # ErrorException("ccall: could not find function...")
+            if typeof(ex) != ErrorException
+                rethrow()
+            end
+        end
+    end
+end
+@eval Base.Sort begin
+    issorted(itr;
+        lt::T=isless, by::F=identity, rev::Union{Bool,Nothing}=nothing, order::Ordering=Forward) where {T,F} =
+        issorted(itr, ord(lt,by,rev,order))
+end
+@eval Base.TOML begin
+    function try_return_datetime(p, year, month, day, h, m, s, ms)
+        return DateTime(year, month, day, h, m, s, ms)
+    end
+    function try_return_date(p, year, month, day)
+        return Date(year, month, day)
+    end
+    function parse_local_time(l::Parser)
+        h = @try parse_int(l, false)
+        h in 0:23 || return ParserError(ErrParsingDateTime)
+        _, m, s, ms = @try _parse_local_time(l, true)
+        # TODO: Could potentially parse greater accuracy for the
+        # fractional seconds here.
+        return try_return_time(l, h, m, s, ms)
+    end
+    function try_return_time(p, h, m, s, ms)
+        return Time(h, m, s, ms)
+    end
+end
+
+# Load user code
+
+import Base.Experimental.entrypoint
+
+let mod = Base.include(Base.__toplevel__, inputfile)
+    if !isa(mod, Module)
+        mod = Main
+    end
+    if output_type == "--output-exe" && isdefined(mod, :main) && !add_ccallables
+        entrypoint(mod.main, ())
+    end
+    #entrypoint(join, (Base.GenericIOBuffer{Memory{UInt8}}, Array{Base.SubString{String}, 1}, String))
+    #entrypoint(join, (Base.GenericIOBuffer{Memory{UInt8}}, Array{String, 1}, Char))
+    entrypoint(Base.task_done_hook, (Task,))
+    entrypoint(Base.wait, ())
+    entrypoint(Base.trypoptask, (Base.StickyWorkqueue,))
+    entrypoint(Base.checktaskempty, ())
+    if add_ccallables
+        ccall(:jl_add_ccallable_entrypoints, Cvoid, ())
+    end
+end
+
+# Additional method patches depending on whether user code loads certain stdlibs
+
+let loaded = Symbol.(Base.loaded_modules_array())  # TODO better way to do this
+    if :SparseArrays in loaded
+        using SparseArrays
+        @eval SparseArrays.CHOLMOD begin
+            function __init__()
+                ccall((:SuiteSparse_config_malloc_func_set, :libsuitesparseconfig),
+                    Cvoid, (Ptr{Cvoid},), cglobal(:jl_malloc, Ptr{Cvoid}))
+                ccall((:SuiteSparse_config_calloc_func_set, :libsuitesparseconfig),
+                    Cvoid, (Ptr{Cvoid},), cglobal(:jl_calloc, Ptr{Cvoid}))
+                ccall((:SuiteSparse_config_realloc_func_set, :libsuitesparseconfig),
+                    Cvoid, (Ptr{Cvoid},), cglobal(:jl_realloc, Ptr{Cvoid}))
+                ccall((:SuiteSparse_config_free_func_set, :libsuitesparseconfig),
+                Cvoid, (Ptr{Cvoid},), cglobal(:jl_free, Ptr{Cvoid}))
+            end
+        end
+    end
+    if :Artifacts in loaded
+        using Artifacts
+        @eval Artifacts begin
+            function _artifact_str(__module__, artifacts_toml, name, path_tail, artifact_dict, hash, platform, _::Val{lazyartifacts}) where lazyartifacts
+                moduleroot = Base.moduleroot(__module__)
+                if haskey(Base.module_keys, moduleroot)
+                    # Process overrides for this UUID, if we know what it is
+                    process_overrides(artifact_dict, Base.module_keys[moduleroot].uuid)
+                end
+
+                # If the artifact exists, we're in the happy path and we can immediately
+                # return the path to the artifact:
+                dirs = artifact_paths(hash; honor_overrides=true)
+                for dir in dirs
+                    if isdir(dir)
+                        return jointail(dir, path_tail)
+                    end
+                end
+            end
+        end
+    end
+    if :Pkg in loaded
+        using Pkg
+        @eval Pkg begin
+            __init__() = rand() #TODO, methods that do nothing don't get codegened
+        end
+    end
+    if :StyledStrings in loaded
+        using StyledStrings
+        @eval StyledStrings begin
+            __init__() = rand()
+        end
+    end
+end
+
+empty!(Core.ARGS)
+empty!(Base.ARGS)
+empty!(LOAD_PATH)
+empty!(DEPOT_PATH)
+empty!(Base.TOML_CACHE.d)
+Base.TOML.reinit!(Base.TOML_CACHE.p, "")
+Base.ACTIVE_PROJECT[] = nothing
+@eval Base begin
+    PROGRAM_FILE = ""
+end
+@eval Sys begin
+    BINDIR = ""
+    STDLIB = ""
+end
diff --git a/contrib/juliac.jl b/contrib/juliac.jl
new file mode 100644
index 0000000000000..61e0e91958667
--- /dev/null
+++ b/contrib/juliac.jl
@@ -0,0 +1,110 @@
+# Julia compiler wrapper script
+# NOTE: The interface and location of this script are considered unstable/experimental
+
+cmd = Base.julia_cmd()
+cmd = `$cmd --startup-file=no --history-file=no`
+output_type = nothing  # exe, sharedlib, sysimage
+trim = nothing
+outname = nothing
+file = nothing
+add_ccallables = false
+
+help = findfirst(x->x == "--help", ARGS)
+if help !== nothing
+    println(
+        """
+        Usage: julia juliac.jl [--output-exe | --output-lib | --output-sysimage] <name> [options] <file.jl>
+        --trim=<no,safe,unsafe,unsafe-warn>  Only output code statically determined to be reachable
+        --compile-ccallable  Include all methods marked `@ccallable` in output
+        --verbose            Request verbose output
+        """)
+    exit(0)
+end
+
+let i = 1
+    while i <= length(ARGS)
+        arg = ARGS[i]
+        if arg == "--output-exe" || arg == "--output-lib" || arg == "--output-sysimage"
+            isnothing(output_type) || error("Multiple output types specified")
+            global output_type = arg
+            i == length(ARGS) && error("Output specifier requires an argument")
+            global outname = ARGS[i+1]
+            i += 1
+        elseif startswith(arg, "--trim")
+            arg = split(arg, '=')
+            if length(arg) == 1
+                global trim = "safe"
+            else
+                global trim = arg[2]
+            end
+        elseif arg == "--compile-ccallable"
+            global add_ccallables = true
+        else
+            if arg[1] == '-' || !isnothing(file)
+                println("Unexpected argument `$arg`")
+                exit(1)
+            end
+            global file = arg
+        end
+        i += 1
+    end
+end
+
+isnothing(outname) && error("No output file specified")
+isnothing(file) && error("No input file specified")
+
+absfile = abspath(file)
+cflags = readchomp(`$(cmd) $(joinpath(Sys.BINDIR, Base.DATAROOTDIR,"julia", "julia-config.jl")) --cflags `)
+cflags = Base.shell_split(cflags)
+allflags = readchomp(`$(cmd) $(joinpath(Sys.BINDIR, Base.DATAROOTDIR,"julia", "julia-config.jl")) --allflags`)
+allflags = Base.shell_split(allflags)
+tmpdir = mktempdir(cleanup=false)
+initsrc_path = joinpath(tmpdir, "init.c")
+init_path = joinpath(tmpdir, "init.a")
+img_path = joinpath(tmpdir, "img.a")
+bc_path = joinpath(tmpdir, "img-bc.a")
+
+open(initsrc_path, "w") do io
+    print(io, """
+              #include <julia.h>
+              __attribute__((constructor)) void static_init(void) {
+                  if (jl_is_initialized())
+                      return;
+                  julia_init(JL_IMAGE_IN_MEMORY);
+                  jl_exception_clear();
+              }
+              """)
+end
+
+static_call_graph_arg() = isnothing(trim) ?  `` : `--trim=$(trim)`
+is_verbose() = verbose ? `--verbose-compilation=yes` : ``
+cmd = addenv(`$cmd --project=$(Base.active_project()) --output-o $img_path --output-incremental=no --strip-ir --strip-metadata $(static_call_graph_arg()) $(joinpath(@__DIR__,"juliac-buildscript.jl")) $absfile $output_type $add_ccallables`, "OPENBLAS_NUM_THREADS" => 1, "JULIA_NUM_THREADS" => 1)
+
+if !success(pipeline(cmd; stdout, stderr))
+    println(stderr, "\nFailed to compile $file")
+    exit(1)
+end
+
+run(`cc $(cflags) -g -c -o $init_path $initsrc_path`)
+
+if output_type == "--output-lib" || output_type == "--output-sysimage"
+    of, ext = splitext(outname)
+    soext = "." * Base.BinaryPlatforms.platform_dlext()
+    if ext == ""
+        outname = of * soext
+    end
+end
+
+julia_libs = Base.shell_split(Base.isdebugbuild() ? "-ljulia-debug -ljulia-internal-debug" : "-ljulia -ljulia-internal")
+try
+    if output_type == "--output-lib"
+        run(`cc $(allflags) -o $outname -shared -Wl,$(Base.Linking.WHOLE_ARCHIVE) $img_path  -Wl,$(Base.Linking.NO_WHOLE_ARCHIVE) $init_path  $(julia_libs)`)
+    elseif output_type == "--output-sysimage"
+        run(`cc $(allflags) -o $outname -shared -Wl,$(Base.Linking.WHOLE_ARCHIVE) $img_path  -Wl,$(Base.Linking.NO_WHOLE_ARCHIVE)             $(julia_libs)`)
+    else
+        run(`cc $(allflags) -o $outname -Wl,$(Base.Linking.WHOLE_ARCHIVE) $img_path -Wl,$(Base.Linking.NO_WHOLE_ARCHIVE) $init_path $(julia_libs)`)
+    end
+catch
+    println("\nCompilation failed.")
+    exit(1)
+end
diff --git a/deps/checksums/Pkg-51d4910c114a863d888659cb8962c1e161b2a421.tar.gz/md5 b/deps/checksums/Pkg-51d4910c114a863d888659cb8962c1e161b2a421.tar.gz/md5
new file mode 100644
index 0000000000000..b5b82565470c0
--- /dev/null
+++ b/deps/checksums/Pkg-51d4910c114a863d888659cb8962c1e161b2a421.tar.gz/md5
@@ -0,0 +1 @@
+88b8a25a8d465ac8cc94d13bc5f51707
diff --git a/deps/checksums/Pkg-51d4910c114a863d888659cb8962c1e161b2a421.tar.gz/sha512 b/deps/checksums/Pkg-51d4910c114a863d888659cb8962c1e161b2a421.tar.gz/sha512
new file mode 100644
index 0000000000000..a746b269d91f0
--- /dev/null
+++ b/deps/checksums/Pkg-51d4910c114a863d888659cb8962c1e161b2a421.tar.gz/sha512
@@ -0,0 +1 @@
+22262687f3bf75292ab0170e19a9c4a494022a653b2811443b8c52bc099dee0fddd09f6632ae42b3193adf3b0693ddcb6679b5d91e50a500f65261df5b7ced7d
diff --git a/deps/checksums/Pkg-ef9f76c175872bab6803da4a5fa3fd99bce3d03a.tar.gz/md5 b/deps/checksums/Pkg-ef9f76c175872bab6803da4a5fa3fd99bce3d03a.tar.gz/md5
deleted file mode 100644
index 39dbb56dbaf53..0000000000000
--- a/deps/checksums/Pkg-ef9f76c175872bab6803da4a5fa3fd99bce3d03a.tar.gz/md5
+++ /dev/null
@@ -1 +0,0 @@
-080b5cb82d208245cba014f1dfcb8033
diff --git a/deps/checksums/Pkg-ef9f76c175872bab6803da4a5fa3fd99bce3d03a.tar.gz/sha512 b/deps/checksums/Pkg-ef9f76c175872bab6803da4a5fa3fd99bce3d03a.tar.gz/sha512
deleted file mode 100644
index 2f95d4a0e28da..0000000000000
--- a/deps/checksums/Pkg-ef9f76c175872bab6803da4a5fa3fd99bce3d03a.tar.gz/sha512
+++ /dev/null
@@ -1 +0,0 @@
-1b91505c78d2608afa89ceea16f645bb41c0737815aec1853ad72c9751e7299b264135c9a40a6319f68b973073a151619b925d7a9655c46526bccf501b116113
diff --git a/deps/llvm.mk b/deps/llvm.mk
index 73697069a4fac..3f4bc3e6746f0 100644
--- a/deps/llvm.mk
+++ b/deps/llvm.mk
@@ -102,7 +102,7 @@ endif
 LLVM_CMAKE += -DLLVM_TOOLS_INSTALL_DIR=$(call rel_path,$(build_prefix),$(build_depsbindir))
 LLVM_CMAKE += -DLLVM_UTILS_INSTALL_DIR=$(call rel_path,$(build_prefix),$(build_depsbindir))
 LLVM_CMAKE += -DLLVM_INCLUDE_UTILS=ON -DLLVM_INSTALL_UTILS=ON
-LLVM_CMAKE += -DLLVM_BINDINGS_LIST="" -DLLVM_ENABLE_BINDINGS=OFF -DLLVM_INCLUDE_DOCS=Off -DLLVM_ENABLE_TERMINFO=Off -DHAVE_LIBEDIT=Off
+LLVM_CMAKE += -DLLVM_BINDINGS_LIST="" -DLLVM_ENABLE_BINDINGS=OFF -DLLVM_INCLUDE_DOCS=Off -DLLVM_ENABLE_TERMINFO=Off -DHAVE_LIBEDIT=Off -DLLVM_ENABLE_LIBEDIT=OFF
 ifeq ($(LLVM_ASSERTIONS), 1)
 LLVM_CMAKE += -DLLVM_ENABLE_ASSERTIONS:BOOL=ON
 endif # LLVM_ASSERTIONS
diff --git a/deps/terminfo.mk b/deps/terminfo.mk
index 63194f786f566..60865838a813e 100644
--- a/deps/terminfo.mk
+++ b/deps/terminfo.mk
@@ -22,8 +22,8 @@ $(BUILDDIR)/TermInfoDB-v$(TERMINFO_VER)/build-checked: $(BUILDDIR)/TermInfoDB-v$
 	echo 1 > $@
 
 define TERMINFO_INSTALL
-	mkdir -p $2/$$(build_datarootdir)
-	cp -R $1/terminfo $2/$$(build_datarootdir)
+	mkdir -p $2/$$(build_datarootdir)/julia
+	cp -R $1/terminfo $2/$$(build_datarootdir)/julia/
 endef
 $(eval $(call staged-install, \
 	terminfo,TermInfoDB-v$(TERMINFO_VER), \
diff --git a/doc/man/julia.1 b/doc/man/julia.1
index 536a23bd37894..56cb690d66eeb 100644
--- a/doc/man/julia.1
+++ b/doc/man/julia.1
@@ -290,6 +290,10 @@ Methods that were recompiled are printed in yellow or with a trailing comment if
 --trace-compile-timing=
 If --trace-compile is enabled show how long each took to compile in ms
 
+.TP
+--trace-dispatch={stderr|name}
+Print precompile statements for methods dispatched during execution or save to stderr or a path.
+
 .TP
 -image-codegen
 Force generate code in imaging mode
diff --git a/doc/src/devdocs/sysimg.md b/doc/src/devdocs/sysimg.md
index 7d4f7afdbb86a..64c309e1fb02a 100644
--- a/doc/src/devdocs/sysimg.md
+++ b/doc/src/devdocs/sysimg.md
@@ -117,3 +117,80 @@ See code comments for each components for more implementation details.
     depending on the ISA. The target selection will prefer exact CPU name match,
     larger vector register size, and larger number of features.
     An overview of this process is in `src/processor.cpp`.
+
+## Trimming
+
+System images are typically quite large, since Base includes a lot of functionality, and by
+default system images also include several packages such as LinearAlgebra for convenience
+and backwards compatibility. Most programs will use only a fraction of the functions in
+these packages. Therefore it makes sense to build binaries that exclude unused functions
+to save space, referred to as "trimming".
+
+While the basic idea of trimming is sound, Julia has dynamic and reflective features that make it
+difficult (or impossible) to know in general which functions are unused. As an extreme example,
+consider code like
+
+```
+getglobal(Base, Symbol(readchomp(stdin)))(1)
+```
+
+This code reads a function name from `stdin` and calls the named function from Base on the value
+`1`. In this case it is impossible to predict which function will be called, so no functions
+can reliably be considered "unused". With some noteworthy exceptions (Julia's own REPL being
+one of them), most real-world programs do not do things like this.
+
+Less extreme cases occur, for example, when there are type instabilities that make it impossible
+for the compiler to predict which method will be called. However, if code is well-typed and does
+not use reflection, a complete and (hopefully) relatively small set of needed methods can be
+determined, and the rest can be removed. The `--trim` command-line option requests this kind of
+compilation.
+
+When `--trim` is specified in a command used to build a system image, the compiler begins
+tracing calls starting at methods marked using `Base.Experimental.entrypoint`. If a call is too
+dynamic to reasonably narrow down the possible call targets, an error is given at compile
+time showing the location of the call. For testing purposes, it is possible to skip these
+errors by specifying `--trim=unsafe` or `--trim=unsafe-warn`. Then you will get a system
+image built, but it may crash at run time if needed code is not present.
+
+It typically makes sense to specify `--strip-ir` along with `--trim`, since trimmed binaries
+are fully compiled and therefore don't need Julia IR. At some point we may make `--trim` imply
+`--strip-ir`, but for now we have kept them orthogonal.
+
+To get the smallest possible binary, it will also help to specify `--strip-metadata` and
+run the Unix `strip` utility. However, those steps remove Julia-specific and native (DWARF format)
+debug info, respectively, and so will make debugging more difficult.
+
+### Common problems
+
+- The Base global variables `stdin`, `stdout`, and `stderr` are non-constant and so their
+  types are not known. All printing should use a specific IO object with a known type.
+  The easiest substitution is to use `print(Core.stdout, x)` instead of `print(x)` or
+  `print(stdout, x)`.
+- Use tools like `JET`, `Cthulhu`, and/or `SnoopCompile` to identify failures of type-inference, and
+  follow our [Performance Tips](@ref) to fix them.
+
+### Compatibility concerns
+
+We have identified many small changes to Base that significantly increase the set of programs
+that can be reliably trimmed. Unfortunately some of those changes would be considered breaking,
+and so are only applied when trimming is requested (this is done by an external build script,
+currently maintained inside the test suite as `test/trimming/buildscript.jl`).
+Therefore in many cases trimming will require you to opt in to new variants of Base and some
+standard libraries.
+
+If you want to use trimming, it is important to set up continuous integration testing that
+performs a trimmed build and fully tests the resulting program.
+Fortunately, if your program successfully compiles with `--trim` then it is very likely to work
+the same as it did before. However, CI is needed to ensure that your program continues to build
+with trimming as you develop it.
+
+Package authors may wish to test that their package is "trimming safe", however this is impossible
+in general. Trimming is only expected to work given concrete entry points such as `main()` and
+library entry points meant to be called from outside Julia. For generic packages, existing tests
+for type stability like `@inferred` and `JET` are about as close as you can get to checking
+trim compatibility.
+
+Trimming also introduces new compatibility issues between minor versions of Julia. At this time,
+we are not able to guarantee that a program that can be trimmed in one version of Julia
+can also be trimmed in all future versions of Julia. However, breakage of that kind is expected
+to be rare. We also plan to try to *increase* the set of programs that can be trimmed over time.
diff --git a/doc/src/manual/command-line-interface.md b/doc/src/manual/command-line-interface.md
index 41c3eacd61d26..5255720e55cd7 100644
--- a/doc/src/manual/command-line-interface.md
+++ b/doc/src/manual/command-line-interface.md
@@ -216,9 +216,10 @@ The following is a complete list of command-line switches available when launchi
 |`--output-incremental={yes\|no*}`      |Generate an incremental output file (rather than complete)|
 |`--trace-compile={stderr\|name}`       |Print precompile statements for methods compiled during execution or save to stderr or a path. Methods that were recompiled are printed in yellow or with a trailing comment if color is not supported|
 |`--trace-compile-timing`               |If --trace-compile is enabled show how long each took to compile in ms|
+|`--trace-dispatch={stderr\|name}`      |Print precompile statements for methods dispatched during execution or save to stderr or a path.|
 |`--image-codegen`                      |Force generate code in imaging mode|
 |`--permalloc-pkgimg={yes\|no*}`        |Copy the data section of package images into memory|
-
+|`--trim={no*|safe|unsafe|unsafe-warn}` |Build a sysimage including only code provably reachable from methods marked by calling `entrypoint`. The three non-default options differ in how they handle dynamic call sites. In safe mode, such sites result in compile-time errors. In unsafe mode, such sites are allowed but the resulting binary might be missing needed code and can throw runtime errors. With unsafe-warn, such sites will trigger warnings at compile-time and might error at runtime.|
 
 !!! compat "Julia 1.1"
     In Julia 1.0, the default `--project=@.` option did not search up from the root
diff --git a/src/Makefile b/src/Makefile
index 52e673aa6cc1a..a6b1f433b73ce 100644
--- a/src/Makefile
+++ b/src/Makefile
@@ -287,10 +287,10 @@ endif
 	$(INSTALL_NAME_CMD)libccalltest.$(SHLIB_EXT) $@
 
 $(build_shlibdir)/libccalllazyfoo.$(SHLIB_EXT): $(SRCDIR)/ccalllazyfoo.c
-	@$(call PRINT_CC, $(CC) $(JCFLAGS) $(JL_CFLAGS) $(JCPPFLAGS) $(FLAGS) -O3 $< $(fPIC) -shared -o $@ $(LDFLAGS) $(COMMON_LIBPATHS) $(call SONAME_FLAGS,ccalllazyfoo.$(SHLIB_EXT)))
+	@$(call PRINT_CC, $(CC) $(JCFLAGS) $(JL_CFLAGS) $(JCPPFLAGS) $(FLAGS) -O3 $< $(fPIC) -shared -o $@ $(LDFLAGS) $(COMMON_LIBPATHS) $(call SONAME_FLAGS,libccalllazyfoo.$(SHLIB_EXT)))
 
 $(build_shlibdir)/libccalllazybar.$(SHLIB_EXT): $(SRCDIR)/ccalllazybar.c $(build_shlibdir)/libccalllazyfoo.$(SHLIB_EXT)
-	@$(call PRINT_CC, $(CC) $(JCFLAGS) $(JL_CFLAGS) $(JCPPFLAGS) $(FLAGS) -O3 $< $(fPIC) -shared -o $@ $(LDFLAGS) $(COMMON_LIBPATHS) $(call SONAME_FLAGS,ccalllazybar.$(SHLIB_EXT)) -lccalllazyfoo)
+	@$(call PRINT_CC, $(CC) $(JCFLAGS) $(JL_CFLAGS) $(JCPPFLAGS) $(FLAGS) -O3 $< $(fPIC) -shared -o $@ $(LDFLAGS) $(COMMON_LIBPATHS) $(call SONAME_FLAGS,libccalllazybar.$(SHLIB_EXT)) -lccalllazyfoo)
 
 $(build_shlibdir)/libllvmcalltest.$(SHLIB_EXT): $(SRCDIR)/llvmcalltest.cpp $(LLVM_CONFIG_ABSOLUTE)
 	@$(call PRINT_CC, $(CXX) $(LLVM_CXXFLAGS) $(FLAGS) $(CPPFLAGS) $(CXXFLAGS) -O3 $< $(fPIC) -shared -o $@ $(LDFLAGS) $(COMMON_LIBPATHS) $(NO_WHOLE_ARCHIVE) $(CG_LLVMLINK)) -lpthread
diff --git a/src/aotcompile.cpp b/src/aotcompile.cpp
index b4c8ef6095a55..c2f112f9c9d5c 100644
--- a/src/aotcompile.cpp
+++ b/src/aotcompile.cpp
@@ -95,6 +95,17 @@ void jl_get_function_id_impl(void *native_code, jl_code_instance_t *codeinst,
     }
 }
 
+extern "C" JL_DLLEXPORT_CODEGEN
+void jl_get_llvm_mis_impl(void *native_code, arraylist_t* MIs)
+{
+    jl_native_code_desc_t *data = (jl_native_code_desc_t*)native_code;
+    auto map = data->jl_fvar_map;
+    for (auto &ci : map) {
+        jl_method_instance_t *mi = ci.first->def;
+        arraylist_push(MIs, mi);
+    }
+}
+
 extern "C" JL_DLLEXPORT_CODEGEN
 void jl_get_llvm_gvs_impl(void *native_code, arraylist_t *gvs)
 {
@@ -284,6 +295,7 @@ jl_code_instance_t *jl_ci_cache_lookup(const jl_cgparams_t &cgparams, jl_method_
     jl_value_t *ci = cgparams.lookup(mi, world, world);
     JL_GC_PROMISE_ROOTED(ci);
     jl_code_instance_t *codeinst = NULL;
+    JL_GC_PUSH1(&codeinst);
     if (ci != jl_nothing && jl_atomic_load_relaxed(&((jl_code_instance_t *)ci)->inferred) != jl_nothing) {
         codeinst = (jl_code_instance_t*)ci;
     }
@@ -301,9 +313,11 @@ jl_code_instance_t *jl_ci_cache_lookup(const jl_cgparams_t &cgparams, jl_method_
                 jl_mi_cache_insert(mi, codeinst);
         }
     }
+    JL_GC_POP();
     return codeinst;
 }
 
+arraylist_t new_invokes;
 // takes the running content that has collected in the shadow module and dump it to disk
 // this builds the object file portion of the sysimage files for fast startup, and can
 // also be used be extern consumers like GPUCompiler.jl to obtain a module containing
@@ -353,8 +367,12 @@ void *jl_create_native_impl(jl_array_t *methods, LLVMOrcThreadSafeModuleRef llvm
     params.imaging_mode = imaging;
     params.debug_level = cgparams->debug_info_level;
     params.external_linkage = _external_linkage;
+    arraylist_new(&new_invokes, 0);
     size_t compile_for[] = { jl_typeinf_world, _world };
-    for (int worlds = 0; worlds < 2; worlds++) {
+    int worlds = 0;
+    if (jl_options.trim != JL_TRIM_NO)
+        worlds = 1;
+    for (; worlds < 2; worlds++) {
         JL_TIMING(NATIVE_AOT, NATIVE_Codegen);
         size_t this_world = compile_for[worlds];
         if (!this_world)
@@ -373,6 +391,7 @@ void *jl_create_native_impl(jl_array_t *methods, LLVMOrcThreadSafeModuleRef llvm
                 continue;
             }
             mi = (jl_method_instance_t*)item;
+compile_mi:
             src = NULL;
             // if this method is generally visible to the current compilation world,
             // and this is either the primary world, or not applicable in the primary world
@@ -380,16 +399,47 @@ void *jl_create_native_impl(jl_array_t *methods, LLVMOrcThreadSafeModuleRef llvm
             if (jl_atomic_load_relaxed(&mi->def.method->primary_world) <= this_world && this_world <= jl_atomic_load_relaxed(&mi->def.method->deleted_world)) {
                 // find and prepare the source code to compile
                 jl_code_instance_t *codeinst = jl_ci_cache_lookup(*cgparams, mi, this_world);
-                if (codeinst && !params.compiled_functions.count(codeinst)) {
+                if (jl_options.trim != JL_TRIM_NO && !codeinst) {
+                    // If we're building a small image, we need to compile everything
+                    // to ensure that we have all the information we need.
+                    jl_safe_printf("Codegen decided not to compile code root");
+                    jl_(mi);
+                    abort();
+                }
+                if (codeinst && !params.compiled_functions.count(codeinst) && !data->jl_fvar_map.count(codeinst)) {
                     // now add it to our compilation results
-                    JL_GC_PROMISE_ROOTED(codeinst->rettype);
-                    orc::ThreadSafeModule result_m = jl_create_ts_module(name_from_method_instance(codeinst->def),
-                            params.tsctx, clone.getModuleUnlocked()->getDataLayout(),
-                            Triple(clone.getModuleUnlocked()->getTargetTriple()));
-                    jl_llvm_functions_t decls = jl_emit_codeinst(result_m, codeinst, NULL, params);
-                    if (result_m)
-                        params.compiled_functions[codeinst] = {std::move(result_m), std::move(decls)};
+                    // Const returns do not do codegen, but juliac inspects codegen results so make a dummy fvar entry to represent it
+                    if (jl_options.trim != JL_TRIM_NO && jl_atomic_load_relaxed(&codeinst->invoke) == jl_fptr_const_return_addr) {
+                        data->jl_fvar_map[codeinst] = std::make_tuple((uint32_t)-3, (uint32_t)-3);
+                    } else {
+                        JL_GC_PROMISE_ROOTED(codeinst->rettype);
+                        orc::ThreadSafeModule result_m = jl_create_ts_module(name_from_method_instance(codeinst->def),
+                                params.tsctx, clone.getModuleUnlocked()->getDataLayout(),
+                                Triple(clone.getModuleUnlocked()->getTargetTriple()));
+                        jl_llvm_functions_t decls = jl_emit_codeinst(result_m, codeinst, NULL, params);
+                        if (result_m)
+                            params.compiled_functions[codeinst] = {std::move(result_m), std::move(decls)};
+                        else if (jl_options.trim != JL_TRIM_NO) {
+                            // if we're building a small image, we need to compile everything
+                            // to ensure that we have all the information we need.
+                            jl_safe_printf("codegen failed to compile code root");
+                            jl_(mi);
+                            abort();
+                        }
+                    }
                 }
+            } else if (this_world != jl_typeinf_world) {
+                /*
+                jl_safe_printf("Codegen could not find requested codeinstance to be compiled\n");
+                jl_(mi);
+                abort();
+                */
+            }
+            // TODO: is goto the best way to do this?
+            jl_compile_workqueue(params, policy);
+            mi = (jl_method_instance_t*)arraylist_pop(&new_invokes);
+            if (mi != NULL) {
+                goto compile_mi;
             }
         }
 
@@ -397,6 +447,7 @@ void *jl_create_native_impl(jl_array_t *methods, LLVMOrcThreadSafeModuleRef llvm
         jl_compile_workqueue(params, policy);
     }
     JL_GC_POP();
+    arraylist_free(&new_invokes);
 
     // process the globals array, before jl_merge_module destroys them
     SmallVector<std::string, 0> gvars(params.global_targets.size());
diff --git a/src/cgutils.cpp b/src/cgutils.cpp
index 993ccd9ccc05a..f37db5c93aa5c 100644
--- a/src/cgutils.cpp
+++ b/src/cgutils.cpp
@@ -2336,6 +2336,12 @@ static jl_cgval_t typed_store(jl_codectx_t &ctx,
             ret = emit_invoke(ctx, *modifyop, argv, 3, (jl_value_t*)jl_any_type);
         }
         else {
+            if (trim_may_error(ctx.params->trim)) {
+                // if we know the return type, we can assume the result is of that type
+                errs() << "ERROR: Dynamic call to setfield/modifyfield\n";
+                errs() << "In " << ctx.builder.getCurrentDebugLocation()->getFilename() << ":" << ctx.builder.getCurrentDebugLocation()->getLine() << "\n";
+                print_stacktrace(ctx, ctx.params->trim);
+            }
             Value *callval = emit_jlcall(ctx, jlapplygeneric_func, nullptr, argv, 3, julia_call);
             ret = mark_julia_type(ctx, callval, true, jl_any_type);
         }
@@ -3637,6 +3643,8 @@ static void union_alloca_type(jl_uniontype_t *ut,
             },
             (jl_value_t*)ut,
             counter);
+    if (align > JL_HEAP_ALIGNMENT)
+        align = JL_HEAP_ALIGNMENT;
 }
 
 static AllocaInst *try_emit_union_alloca(jl_codectx_t &ctx, jl_uniontype_t *ut, bool &allunbox, size_t &min_align, size_t &nbytes)
@@ -4075,6 +4083,12 @@ static jl_cgval_t union_store(jl_codectx_t &ctx,
                 rhs = emit_invoke(ctx, *modifyop, argv, 3, (jl_value_t*)jl_any_type);
             }
             else {
+                if (trim_may_error(ctx.params->trim)) {
+                    // if we know the return type, we can assume the result is of that type
+                    errs() << "ERROR: Dynamic call to setfield/modifyfield\n";
+                    errs() << "In " << ctx.builder.getCurrentDebugLocation()->getFilename() << ":" << ctx.builder.getCurrentDebugLocation()->getLine() << "\n";
+                    print_stacktrace(ctx, ctx.params->trim);
+                }
                 Value *callval = emit_jlcall(ctx, jlapplygeneric_func, nullptr, argv, 3, julia_call);
                 rhs = mark_julia_type(ctx, callval, true, jl_any_type);
             }
diff --git a/src/codegen-stubs.c b/src/codegen-stubs.c
index 41812d903816c..7ddb68fd6b036 100644
--- a/src/codegen-stubs.c
+++ b/src/codegen-stubs.c
@@ -15,6 +15,7 @@ JL_DLLEXPORT void jl_dump_native_fallback(void *native_code,
         ios_t *z, ios_t *s) UNAVAILABLE
 JL_DLLEXPORT void jl_get_llvm_gvs_fallback(void *native_code, arraylist_t *gvs) UNAVAILABLE
 JL_DLLEXPORT void jl_get_llvm_external_fns_fallback(void *native_code, arraylist_t *gvs) UNAVAILABLE
+JL_DLLEXPORT void jl_get_llvm_mis_fallback(void *native_code, arraylist_t* MIs) UNAVAILABLE
 
 JL_DLLEXPORT void jl_extern_c_fallback(jl_function_t *f, jl_value_t *rt, jl_value_t *argt, char *name) UNAVAILABLE
 JL_DLLEXPORT jl_value_t *jl_dump_method_asm_fallback(jl_method_instance_t *linfo, size_t world,
diff --git a/src/codegen.cpp b/src/codegen.cpp
index abb21fcbca27e..a7a985284c87b 100644
--- a/src/codegen.cpp
+++ b/src/codegen.cpp
@@ -16,6 +16,7 @@
 #include <array>
 #include <vector>
 #include <set>
+#include <unordered_set>
 #include <functional>
 
 // target machine computation
@@ -1651,31 +1652,23 @@ static const auto &builtin_func_map() {
     return builtins;
 }
 
+static const auto &may_dispatch_builtins() {
+    static std::unordered_set<jl_fptr_args_t> builtins(
+        {jl_f__apply_iterate_addr,
+        jl_f__apply_pure_addr,
+        jl_f__call_in_world_addr,
+        jl_f__call_in_world_total_addr,
+        jl_f__call_latest_addr,
+        });
+    return builtins;
+}
+
 static const auto jl_new_opaque_closure_jlcall_func = new JuliaFunction<>{XSTR(jl_new_opaque_closure_jlcall), get_func_sig, get_func_attrs};
 
 static _Atomic(uint64_t) globalUniqueGeneratedNames{1};
 
 // --- code generation ---
 
-extern "C" {
-    jl_cgparams_t jl_default_cgparams = {
-        /* track_allocations */ 1,
-        /* code_coverage */ 1,
-        /* prefer_specsig */ 0,
-#ifdef _OS_WINDOWS_
-        /* gnu_pubnames */ 0,
-#else
-        /* gnu_pubnames */ 1,
-#endif
-        /* debug_info_kind */ (int) DICompileUnit::DebugEmissionKind::FullDebug,
-        /* debug_line_info */ 1,
-        /* safepoint_on_entry */ 1,
-        /* gcstack_arg */ 1,
-        /* use_jlplt*/ 1,
-        /* lookup */ jl_rettype_inferred_addr };
-}
-
-
 static MDNode *best_tbaa(jl_tbaacache_t &tbaa_cache, jl_value_t *jt) {
     jt = jl_unwrap_unionall(jt);
     if (jt == (jl_value_t*)jl_datatype_type ||
@@ -1987,7 +1980,7 @@ class jl_codectx_t {
     size_t max_world = -1;
     const char *name = NULL;
     StringRef file{};
-    ssize_t *line = NULL;
+    int32_t line = -1;
     Value *spvals_ptr = NULL;
     Value *argArray = NULL;
     Value *argCount = NULL;
@@ -2146,6 +2139,179 @@ static Value *literal_pointer_val(jl_codectx_t &ctx, jl_value_t *p);
 static unsigned julia_alignment(jl_value_t *jt);
 static void recombine_value(jl_codectx_t &ctx, const jl_cgval_t &x, Value *dst, jl_aliasinfo_t const &dst_ai, Align alignment, bool isVolatile);
 
+static void print_stack_crumbs(jl_codectx_t &ctx)
+{
+    errs() << "\n";
+    errs() << "Stacktrace:\n";
+    jl_method_instance_t *caller = ctx.linfo;
+    jl_((jl_value_t*)caller);
+    errs() << "In " << ctx.file << ":" << ctx.line << "\n";
+    while (true) {
+        auto it = ctx.emission_context.enqueuers.find(caller);
+        if (it != ctx.emission_context.enqueuers.end()) {
+            caller = std::get<jl_method_instance_t *>(it->second);
+        } else {
+            break;
+        }
+        if (caller) {
+            if (jl_is_method_instance(caller)) {
+                for (auto it2 = std::get<CallFrames>(it->second).begin(); it2 != (std::prev(std::get<CallFrames>(it->second).end())); ++it2) {
+                    auto frame = *it2;
+                    errs() << std::get<0>(frame) << "<inlined> \n";
+                    errs() << "In " << std::get<1>(frame) << ":" << std::get<unsigned int>(frame) << "\n";
+                }
+                auto &frame = std::get<CallFrames>(it->second).front();
+                jl_((jl_value_t*)caller);
+                errs() << "In " << std::get<1>(frame) << ":" << std::get<unsigned int>(frame) << "\n";
+            }
+        }
+        else
+            break;
+    }
+    abort();
+}
+
+static jl_value_t *StackFrame(
+        jl_value_t *linfo,
+        std::string fn_name,
+        std::string filepath,
+        int32_t lineno,
+        jl_value_t *inlined)
+{
+    jl_value_t *StackFrame = jl_get_global(jl_base_module, jl_symbol("StackFrame"));
+    assert(StackFrame != nullptr);
+
+    jl_value_t *args[7] = {
+        /* func */ (jl_value_t *)jl_symbol(fn_name.c_str()),
+        /* line */ (jl_value_t *)jl_symbol(filepath.c_str()),
+        /* line */ jl_box_int32(lineno),
+        /* linfo */ (jl_value_t *)linfo,
+        /* from_c */ jl_false,
+        /* inlined */ inlined,
+        /* pointer */ jl_box_uint64(0)
+    };
+
+    jl_value_t *frame = nullptr;
+    JL_TRY {
+        frame = jl_apply_generic(StackFrame, args, 7);
+    } JL_CATCH {
+        jl_safe_printf("Error creating stack frame\n");
+    }
+    return frame;
+}
+
+static void push_frames(jl_codectx_t &ctx, jl_method_instance_t *caller, jl_method_instance_t *callee, int no_debug=false)
+{
+    CallFrames frames;
+    auto it = ctx.emission_context.enqueuers.find(callee);
+    if (it != ctx.emission_context.enqueuers.end())
+        return;
+    if (no_debug) { // Used in tojlinvoke
+        frames.push_back({ctx.funcName, "", 0});
+        ctx.emission_context.enqueuers.insert({callee, {caller, std::move(frames)}});
+        return;
+    }
+    auto DL = ctx.builder.getCurrentDebugLocation();
+    auto filename = std::string(DL->getFilename());
+    auto line = DL->getLine();
+    auto fname = std::string(DL->getScope()->getSubprogram()->getName());
+    frames.push_back({fname, filename, line});
+    auto DI = DL.getInlinedAt();
+    while (DI) {
+        auto filename = std::string(DI->getFilename());
+        auto line = DI->getLine();
+        auto fname = std::string(DI->getScope()->getSubprogram()->getName());
+        frames.push_back({fname, filename, line});
+        DI = DI->getInlinedAt();
+    }
+    ctx.emission_context.enqueuers.insert({callee, {caller, std::move(frames)}});
+}
+
+static jl_array_t* build_stack_crumbs(jl_codectx_t &ctx) JL_NOTSAFEPOINT
+{
+    static intptr_t counter = 5;
+    jl_method_instance_t *caller = (jl_method_instance_t*)counter; //nothing serves as a sentinel for the bottom for the stack
+    push_frames(ctx, ctx.linfo, (jl_method_instance_t*)caller);
+    counter++;
+    jl_array_t *out = jl_alloc_array_1d(jl_array_any_type, 0);
+    JL_GC_PUSH1(&out);
+    while (true) {
+        auto it = ctx.emission_context.enqueuers.find(caller);
+        if (it != ctx.emission_context.enqueuers.end()) {
+            caller = std::get<jl_method_instance_t *>(it->second);
+        } else {
+            break;
+        }
+        if (caller) {
+            assert(ctx.emission_context.enqueuers.count(caller) == 1);
+            if (jl_is_method_instance(caller)) {
+                //TODO: Use a subrange when C++20 is a thing
+                for (auto it2 = std::get<CallFrames>(it->second).begin(); it2 != (std::prev(std::get<CallFrames>(it->second).end())); ++it2) {
+                    auto frame = *it2;
+                    jl_value_t *stackframe = StackFrame(jl_nothing, std::get<0>(frame), std::get<1>(frame), std::get<unsigned int>(frame), jl_true);
+                    if (stackframe == nullptr)
+                        print_stack_crumbs(ctx);
+                    jl_array_ptr_1d_push(out, stackframe);
+                }
+                auto &frame = std::get<CallFrames>(it->second).back();
+                jl_value_t *stackframe = StackFrame((jl_value_t *)caller, std::get<0>(frame), std::get<1>(frame), std::get<unsigned int>(frame), jl_false);
+                if (stackframe == nullptr)
+                    print_stack_crumbs(ctx);
+                jl_array_ptr_1d_push(out, stackframe);
+            }
+        }
+        else
+            break;
+    }
+    JL_GC_POP();
+    return out;
+}
+
+static void print_stacktrace(jl_codectx_t &ctx, int trim)
+{
+    jl_task_t *ct = jl_get_current_task();
+    assert(ct);
+
+    // Temporarily operate in the current age
+    size_t last_age = ct->world_age;
+    ct->world_age = jl_get_world_counter();
+    jl_array_t* bt = build_stack_crumbs(ctx);
+    JL_GC_PUSH1(&bt);
+
+    // Call `reinit_stdio` to get TTY IO objects (w/ color)
+    jl_value_t *reinit_stdio = jl_get_global(jl_base_module, jl_symbol("_reinit_stdio"));
+    assert(reinit_stdio);
+    jl_apply_generic(reinit_stdio, nullptr, 0);
+
+    // Show the backtrace
+    jl_value_t *show_backtrace = jl_get_global(jl_base_module, jl_symbol("show_backtrace"));
+    jl_value_t *base_stderr = jl_get_global(jl_base_module, jl_symbol("stderr"));
+    assert(show_backtrace && base_stderr);
+
+    JL_TRY {
+        jl_value_t *args[2] = { base_stderr, (jl_value_t *)bt };
+        jl_apply_generic(show_backtrace, args, 2);
+    } JL_CATCH {
+        jl_printf(JL_STDERR,"Error showing backtrace\n");
+        print_stack_crumbs(ctx);
+    }
+
+    jl_printf(JL_STDERR, "\n\n");
+    JL_GC_POP();
+    ct->world_age = last_age;
+
+    if (trim == JL_TRIM_SAFE) {
+        jl_printf(JL_STDERR,"Aborting compilation due to finding a dynamic dispatch");
+        exit(1);
+    }
+    return;
+}
+
+static int trim_may_error(int trim)
+{
+    return (trim == JL_TRIM_SAFE) || (trim == JL_TRIM_UNSAFE_WARN);
+}
+
 static GlobalVariable *prepare_global_in(Module *M, JuliaVariable *G)
 {
     return G->realize(M);
@@ -4281,6 +4447,12 @@ static bool emit_builtin_call(jl_codectx_t &ctx, jl_cgval_t *ret, jl_value_t *f,
                 Value *theArgs = emit_ptrgep(ctx, ctx.argArray, ctx.nReqArgs * sizeof(jl_value_t*));
                 Value *r = ctx.builder.CreateCall(prepare_call(jlapplygeneric_func), { theF, theArgs, nva });
                 *ret = mark_julia_type(ctx, r, true, jl_any_type);
+                if (trim_may_error(ctx.params->trim)) {
+                    // if we know the return type, we can assume the result is of that type
+                    errs() << "ERROR: Dynamic call to Core._apply_iterate detected\n";
+                    errs() << "In " << ctx.builder.getCurrentDebugLocation()->getFilename() << ":" << ctx.builder.getCurrentDebugLocation()->getLine() << "\n";
+                    print_stacktrace(ctx, ctx.params->trim);
+                }
                 return true;
             }
         }
@@ -5151,7 +5323,7 @@ static jl_cgval_t emit_call_specfun_other(jl_codectx_t &ctx, bool is_opaque_clos
                     Value *val = arg.V;
                     SmallVector<Value*,0> roots(arg.inline_roots);
                     if (roots.empty())
-                        std::tie(val, roots) = split_value(ctx, arg, Align(jl_datatype_align(jt)));
+                        std::tie(val, roots) = split_value(ctx, arg, Align(julia_alignment(jt)));
                     AllocaInst *proots = emit_static_roots(ctx, roots.size());
                     for (size_t i = 0; i < roots.size(); i++)
                         ctx.builder.CreateAlignedStore(roots[i], emit_ptrgep(ctx, proots, i * sizeof(void*)), Align(sizeof(void*)));
@@ -5388,12 +5560,25 @@ static jl_cgval_t emit_invoke(jl_codectx_t &ctx, const jl_cgval_t &lival, ArrayR
                     if (need_to_emit) {
                         Function *trampoline_decl = cast<Function>(jl_Module->getNamedValue(protoname));
                         ctx.call_targets[codeinst] = {cc, return_roots, trampoline_decl, specsig};
+                        if (trim_may_error(ctx.params->trim))
+                            push_frames(ctx, ctx.linfo, mi);
                     }
                 }
             }
         }
     }
     if (!handled) {
+        if (trim_may_error(ctx.params->trim)) {
+            if (lival.constant) {
+                arraylist_push(&new_invokes, lival.constant);
+                push_frames(ctx, ctx.linfo, (jl_method_instance_t*)lival.constant);
+            } else {
+                errs() << "Dynamic call to unknown function";
+                errs() << "In " << ctx.builder.getCurrentDebugLocation()->getFilename() << ":" << ctx.builder.getCurrentDebugLocation()->getLine() << "\n";
+
+                print_stacktrace(ctx, ctx.params->trim);
+            }
+        }
         Value *r = emit_jlcall(ctx, jlinvoke_func, boxed(ctx, lival), argv, nargs, julia_call2);
         result = mark_julia_type(ctx, r, true, rt);
     }
@@ -5453,7 +5638,12 @@ static jl_cgval_t emit_invoke_modify(jl_codectx_t &ctx, jl_expr_t *ex, jl_value_
             return mark_julia_type(ctx, oldnew, true, rt);
         }
     }
-
+    if (trim_may_error(ctx.params->trim)) {
+        errs() << "ERROR: dynamic invoke modify call to";
+        jl_(args[0]);
+        errs() << "In " << ctx.builder.getCurrentDebugLocation()->getFilename() << ":" << ctx.builder.getCurrentDebugLocation()->getLine() << "\n";
+        print_stacktrace(ctx, ctx.params->trim);
+    }
     // emit function and arguments
     Value *callval = emit_jlcall(ctx, jlapplygeneric_func, nullptr, argv, nargs, julia_call);
     return mark_julia_type(ctx, callval, true, rt);
@@ -5523,10 +5713,15 @@ static jl_cgval_t emit_call(jl_codectx_t &ctx, jl_expr_t *ex, jl_value_t *rt, bo
             bool handled = emit_builtin_call(ctx, &result, f.constant, argv, nargs - 1, rt, ex, is_promotable);
             if (handled)
                 return result;
-
+            jl_fptr_args_t builtin_fptr = jl_get_builtin_fptr((jl_datatype_t*)jl_typeof(f.constant));
             // special case for some known builtin not handled by emit_builtin_call
-            auto it = builtin_func_map().find(jl_get_builtin_fptr((jl_datatype_t*)jl_typeof(f.constant)));
+            auto it = builtin_func_map().find(builtin_fptr);
             if (it != builtin_func_map().end()) {
+                if (trim_may_error(ctx.params->trim) && may_dispatch_builtins().count(builtin_fptr)) {
+                    errs() << "ERROR: Dynamic call to builtin" << jl_symbol_name(((jl_datatype_t*)jl_typeof(f.constant))->name->name);
+                    errs() << "In " << ctx.builder.getCurrentDebugLocation()->getFilename() << ":" << ctx.builder.getCurrentDebugLocation()->getLine() << "\n";
+                    print_stacktrace(ctx, ctx.params->trim);
+                }
                 Value *ret = emit_jlcall(ctx, it->second, Constant::getNullValue(ctx.types().T_prjlvalue), ArrayRef<jl_cgval_t>(argv).drop_front(), nargs - 1, julia_call);
                 setName(ctx.emission_context, ret, it->second->name + "_ret");
                 return mark_julia_type(ctx, ret, true, rt);
@@ -5565,7 +5760,79 @@ static jl_cgval_t emit_call(jl_codectx_t &ctx, jl_expr_t *ex, jl_value_t *rt, bo
             }
         }
     }
+    int failed_dispatch = !argv[0].constant;
+    if (ctx.params->trim != JL_TRIM_NO) {
+        size_t min_valid = 1;
+        size_t max_valid = ~(size_t)0;
+        size_t latest_world = jl_get_world_counter(); // TODO: marshal the world age of the compilation here.
+
+        // Find all methods matching the call signature
+        jl_array_t *matches = NULL;
+        jl_value_t *tup = NULL;
+        JL_GC_PUSH2(&tup, &matches);
+        if (!failed_dispatch) {
+            SmallVector<jl_value_t*> argtypes;
+            for (auto& arg: argv)
+                argtypes.push_back(arg.typ);
+            tup = jl_apply_tuple_type_v(argtypes.data(), argtypes.size());
+            matches = (jl_array_t*)jl_matching_methods((jl_tupletype_t*)tup, jl_nothing, 10 /*TODO: make global*/, 1,
+                                                latest_world, &min_valid, &max_valid, NULL);
+            if ((jl_value_t*)matches == jl_nothing)
+                failed_dispatch = 1;
+        }
+
+        // Expand each matching method to its unique specialization, if it has exactly one
+        if (!failed_dispatch) {
+            size_t k;
+            size_t len = new_invokes.len;
+            for (k = 0; k < jl_array_nrows(matches); k++) {
+                jl_method_match_t *match = (jl_method_match_t *)jl_array_ptr_ref(matches, k);
+                jl_method_instance_t *mi = jl_method_match_to_mi(match, latest_world, min_valid, max_valid, 0);
+                if (!mi) {
+                    if (jl_array_nrows(matches) == 1) {
+                        // if the method match is not compileable, but there is only one, fall back to
+                        // unspecialized implementation
+                        mi = jl_get_unspecialized(match->method);
+                    }
+                    else {
+                        new_invokes.len = len;
+                        failed_dispatch = 1;
+                        break;
+                    }
+                }
+                arraylist_push(&new_invokes, mi);
+            }
+        }
+        JL_GC_POP();
+    }
 
+    if (failed_dispatch && trim_may_error(ctx.params->trim)) {
+        errs() << "Dynamic call to ";
+        jl_jmp_buf *old_buf = jl_get_safe_restore();
+        jl_jmp_buf buf;
+        jl_set_safe_restore(&buf);
+        if (!jl_setjmp(buf, 0)) {
+            jl_static_show((JL_STREAM*)STDERR_FILENO, (jl_value_t*)args[0]);
+            jl_printf((JL_STREAM*)STDERR_FILENO,"(");
+            for (size_t i = 1; i < nargs; ++i) {
+                jl_value_t *typ = argv[i].typ;
+                if (!jl_is_concrete_type(typ)) // Print type in red
+                    jl_printf((JL_STREAM*)STDERR_FILENO, "\x1b[31m");
+                jl_static_show((JL_STREAM*)STDERR_FILENO, (jl_value_t*)argv[i].typ);
+                if (!jl_is_concrete_type(typ))
+                    jl_printf((JL_STREAM*)STDERR_FILENO, "\x1b[0m");
+                if (i != nargs-1)
+                    jl_printf((JL_STREAM*)STDERR_FILENO,", ");
+            }
+            jl_printf((JL_STREAM*)STDERR_FILENO,")\n");
+        }
+        else {
+            jl_printf((JL_STREAM*)STDERR_FILENO, "\n!!! ERROR while printing error -- ABORTING !!!\n");
+        }
+        jl_set_safe_restore(old_buf);
+        errs() << "In " << ctx.builder.getCurrentDebugLocation()->getFilename() << ":" << ctx.builder.getCurrentDebugLocation()->getLine() << "\n";
+        print_stacktrace(ctx, ctx.params->trim);
+    }
     // emit function and arguments
     Value *callval = emit_jlcall(ctx, jlapplygeneric_func, nullptr, argv, n_generic_args, julia_call);
     return mark_julia_type(ctx, callval, true, rt);
@@ -6710,6 +6977,13 @@ static jl_cgval_t emit_expr(jl_codectx_t &ctx, jl_value_t *expr, ssize_t ssaidx_
             ((jl_method_t*)source.constant)->nargs > 0 &&
             jl_is_valid_oc_argtype((jl_tupletype_t*)argt.constant, (jl_method_t*)source.constant);
 
+        if (!can_optimize && trim_may_error(ctx.params->trim)) {
+            // if we know the return type, we can assume the result is of that type
+            errs() << "ERROR: Dynamic call to OpaqueClosure method\n";
+            errs() << "In " << ctx.builder.getCurrentDebugLocation()->getFilename() << ":" << ctx.builder.getCurrentDebugLocation()->getLine() << "\n";
+            print_stacktrace(ctx, ctx.params->trim);
+        }
+
         if (can_optimize) {
             jl_value_t *closure_t = NULL;
             jl_value_t *env_t = NULL;
@@ -6909,6 +7183,11 @@ static Function *emit_tojlinvoke(jl_code_instance_t *codeinst, StringRef theFptr
             GlobalVariable::InternalLinkage,
             name, M);
     jl_init_function(f, params.TargetTriple);
+    if (trim_may_error(params.params->trim)) {
+        arraylist_push(&new_invokes, codeinst->def); // Try t compile this invoke
+        // TODO: Debuginfo!
+        push_frames(ctx, ctx.linfo, codeinst->def, 1);
+    }
     jl_name_jlfunc_args(params, f);
     //f->setAlwaysInline();
     ctx.f = f; // for jl_Module
@@ -7859,7 +8138,7 @@ static jl_returninfo_t get_specsig_function(jl_codectx_t &ctx, Module *M, Value
             }
             props.cc = jl_returninfo_t::SRet;
             props.union_bytes = jl_datatype_size(jlrettype);
-            props.union_align = props.union_minalign = jl_datatype_align(jlrettype);
+            props.union_align = props.union_minalign = julia_alignment(jlrettype);
             // sret is always passed from alloca
             assert(M);
             fsig.push_back(rt->getPointerTo(M->getDataLayout().getAllocaAddrSpace()));
@@ -8126,6 +8405,7 @@ static jl_llvm_functions_t
     if (lam && jl_is_method(lam->def.method)) {
         toplineno = lam->def.method->line;
         ctx.file = jl_symbol_name(lam->def.method->file);
+        ctx.line = lam->def.method->line;
     }
     else if ((jl_value_t*)src->debuginfo != jl_nothing) {
         // look for the file and line info of the original start of this block, as reported by lowering
@@ -8134,6 +8414,7 @@ static jl_llvm_functions_t
             debuginfo = debuginfo->linetable;
         ctx.file = jl_debuginfo_file(debuginfo);
         struct jl_codeloc_t lineidx = jl_uncompress1_codeloc(debuginfo->codelocs, 0);
+        ctx.line = lineidx.line;
         toplineno = std::max((int32_t)0, lineidx.line);
     }
     if (ctx.file.empty())
@@ -9904,7 +10185,7 @@ void jl_compile_workqueue(
             if (it == params.compiled_functions.end()) {
                 // Reinfer the function. The JIT came along and removed the inferred
                 // method body. See #34993
-                if (policy != CompilationPolicy::Default &&
+                if ((policy != CompilationPolicy::Default || params.params->trim) &&
                     jl_atomic_load_relaxed(&codeinst->inferred) == jl_nothing) {
                     // XXX: SOURCE_MODE_FORCE_SOURCE is wrong here (neither sufficient nor necessary)
                     codeinst = jl_type_infer(codeinst->def, jl_atomic_load_relaxed(&codeinst->max_world), SOURCE_MODE_FORCE_SOURCE);
@@ -9935,6 +10216,16 @@ void jl_compile_workqueue(
         if (proto.specsig) {
             // expected specsig
             if (!preal_specsig) {
+                if (params.params->trim) {
+                    auto it = params.compiled_functions.find(codeinst); //TODO: What to do about this
+                    errs() << "Bailed out to invoke when compiling:";
+                    jl_(codeinst->def);
+                    if (it != params.compiled_functions.end()) {
+                        errs() << it->second.second.functionObject << "\n";
+                        errs() << it->second.second.specFunctionObject << "\n";
+                    } else
+                        errs() << "codeinst not in compile_functions\n";
+                }
                 // emit specsig-to-(jl)invoke conversion
                 StringRef invokeName;
                 if (invoke != NULL)
@@ -10124,6 +10415,22 @@ int jl_opaque_ptrs_set = 0;
 
 extern "C" void jl_init_llvm(void)
 {
+    jl_default_cgparams = {
+        /* track_allocations */ 1,
+        /* code_coverage */ 1,
+        /* prefer_specsig */ 0,
+#ifdef _OS_WINDOWS_
+        /* gnu_pubnames */ 0,
+#else
+        /* gnu_pubnames */ 1,
+#endif
+        /* debug_info_kind */ (int) DICompileUnit::DebugEmissionKind::FullDebug,
+        /* debug_info_level */ (int) jl_options.debug_level,
+        /* safepoint_on_entry */ 1,
+        /* gcstack_arg */ 1,
+        /* use_jlplt*/ 1,
+        /* trim */ 0,
+        /* lookup */ jl_rettype_inferred_addr };
     jl_page_size = jl_getpagesize();
     jl_default_debug_info_kind = (int) DICompileUnit::DebugEmissionKind::FullDebug;
     jl_default_cgparams.debug_info_level = (int) jl_options.debug_level;
diff --git a/src/gc-heap-snapshot.cpp b/src/gc-heap-snapshot.cpp
index fcda11dad4f8a..72eb17115f4c7 100644
--- a/src/gc-heap-snapshot.cpp
+++ b/src/gc-heap-snapshot.cpp
@@ -618,38 +618,32 @@ void final_serialize_heap_snapshot(ios_t *json, ios_t *strings, HeapSnapshot &sn
 {
     // mimicking https://github.com/nodejs/node/blob/5fd7a72e1c4fbaf37d3723c4c81dce35c149dc84/deps/v8/src/profiler/heap-snapshot-generator.cc#L2567-L2567
     // also https://github.com/microsoft/vscode-v8-heap-tools/blob/c5b34396392397925ecbb4ecb904a27a2754f2c1/v8-heap-parser/src/decoder.rs#L43-L51
-    ios_printf(json, "{\"snapshot\":{");
+    ios_printf(json, "{\"snapshot\":{\n");
 
-    ios_printf(json, "\"meta\":{");
-    ios_printf(json, "\"node_fields\":[\"type\",\"name\",\"id\",\"self_size\",\"edge_count\",\"trace_node_id\",\"detachedness\"],");
-    ios_printf(json, "\"node_types\":[");
+    ios_printf(json, "  \"meta\":{\n");
+    ios_printf(json, "    \"node_fields\":[\"type\",\"name\",\"id\",\"self_size\",\"edge_count\",\"trace_node_id\",\"detachedness\"],\n");
+    ios_printf(json, "    \"node_types\":[");
     snapshot.node_types.print_json_array(json, false);
     ios_printf(json, ",");
-    ios_printf(json, "\"string\", \"number\", \"number\", \"number\", \"number\", \"number\"],");
-    ios_printf(json, "\"edge_fields\":[\"type\",\"name_or_index\",\"to_node\"],");
-    ios_printf(json, "\"edge_types\":[");
+    ios_printf(json, "\"string\", \"number\", \"number\", \"number\", \"number\", \"number\"],\n");
+    ios_printf(json, "    \"edge_fields\":[\"type\",\"name_or_index\",\"to_node\"],\n");
+    ios_printf(json, "    \"edge_types\":[");
     snapshot.edge_types.print_json_array(json, false);
     ios_printf(json, ",");
-    ios_printf(json, "\"string_or_number\",\"from_node\"],");
+    ios_printf(json, "\"string_or_number\",\"from_node\"],\n");
     // not used. Required by microsoft/vscode-v8-heap-tools
-    ios_printf(json, "\"trace_function_info_fields\":[\"function_id\",\"name\",\"script_name\",\"script_id\",\"line\",\"column\"],");
-    ios_printf(json, "\"trace_node_fields\":[\"id\",\"function_info_index\",\"count\",\"size\",\"children\"],");
-    ios_printf(json, "\"sample_fields\":[\"timestamp_us\",\"last_assigned_id\"],");
-    ios_printf(json, "\"location_fields\":[\"object_index\",\"script_id\",\"line\",\"column\"]");
+    ios_printf(json, "    \"trace_function_info_fields\":[\"function_id\",\"name\",\"script_name\",\"script_id\",\"line\",\"column\"],\n");
+    ios_printf(json, "    \"trace_node_fields\":[\"id\",\"function_info_index\",\"count\",\"size\",\"children\"],\n");
+    ios_printf(json, "    \"sample_fields\":[\"timestamp_us\",\"last_assigned_id\"],\n");
+    ios_printf(json, "    \"location_fields\":[\"object_index\",\"script_id\",\"line\",\"column\"]\n");
     // end not used
-    ios_printf(json, "},\n"); // end "meta"
+    ios_printf(json, "  },\n"); // end "meta"
 
-    ios_printf(json, "\"node_count\":%zu,", snapshot.num_nodes);
-    ios_printf(json, "\"edge_count\":%zu,", snapshot.num_edges);
-    ios_printf(json, "\"trace_function_count\":0"); // not used. Required by microsoft/vscode-v8-heap-tools
-    ios_printf(json, "},\n"); // end "snapshot"
-
-    // not used. Required by microsoft/vscode-v8-heap-tools
-    ios_printf(json, "\"trace_function_infos\":[],");
-    ios_printf(json, "\"trace_tree\":[],");
-    ios_printf(json, "\"samples\":[],");
-    ios_printf(json, "\"locations\":[]");
-    // end not used
+    ios_printf(json, "  \"node_count\":%zu,\n", snapshot.num_nodes);
+    ios_printf(json, "  \"edge_count\":%zu,\n", snapshot.num_edges);
+    ios_printf(json, "  \"trace_function_count\":0\n"); // not used. Required by microsoft/vscode-v8-heap-tools
+    ios_printf(json, "}\n"); // end "snapshot"
 
+    // this } is removed by the julia reassembler in Profile
     ios_printf(json, "}");
 }
diff --git a/src/gc-stock.c b/src/gc-stock.c
index d25f8917f302d..6b97881909bbd 100644
--- a/src/gc-stock.c
+++ b/src/gc-stock.c
@@ -40,6 +40,8 @@ uv_sem_t gc_sweep_assists_needed;
 uv_mutex_t gc_queue_observer_lock;
 // Tag for sentinel nodes in bigval list
 uintptr_t gc_bigval_sentinel_tag;
+// Table recording number of full GCs due to each reason
+JL_DLLEXPORT uint64_t jl_full_sweep_reasons[FULL_SWEEP_NUM_REASONS];
 
 // Flag that tells us whether we need to support conservative marking
 // of objects.
@@ -3043,10 +3045,12 @@ static int _jl_gc_collect(jl_ptls_t ptls, jl_gc_collection_t collection)
     // we either free some space or get an OOM error.
     if (gc_sweep_always_full) {
         sweep_full = 1;
+        gc_count_full_sweep_reason(FULL_SWEEP_REASON_SWEEP_ALWAYS_FULL);
     }
     if (collection == JL_GC_FULL && !prev_sweep_full) {
         sweep_full = 1;
         recollect = 1;
+        gc_count_full_sweep_reason(FULL_SWEEP_REASON_FORCED_FULL_SWEEP);
     }
     if (sweep_full) {
         // these are the difference between the number of gc-perm bytes scanned
@@ -3182,10 +3186,17 @@ static int _jl_gc_collect(jl_ptls_t ptls, jl_gc_collection_t collection)
     }
 
     double old_ratio = (double)promoted_bytes/(double)heap_size;
-    if (heap_size > user_max || old_ratio > 0.15)
+    if (heap_size > user_max) {
         next_sweep_full = 1;
-    else
+        gc_count_full_sweep_reason(FULL_SWEEP_REASON_USER_MAX_EXCEEDED);
+    }
+    else if (old_ratio > 0.15) {
+        next_sweep_full = 1;
+        gc_count_full_sweep_reason(FULL_SWEEP_REASON_LARGE_PROMOTION_RATE);
+    }
+    else {
         next_sweep_full = 0;
+    }
     if (heap_size > user_max || thrashing)
         under_pressure = 1;
     // sweeping is over
diff --git a/src/gc-stock.h b/src/gc-stock.h
index 45c93bf4289ae..46f7d3e11e105 100644
--- a/src/gc-stock.h
+++ b/src/gc-stock.h
@@ -505,6 +505,20 @@ FORCE_INLINE void gc_big_object_link(bigval_t *sentinel_node, bigval_t *node) JL
     sentinel_node->next = node;
 }
 
+// Must be kept in sync with `base/timing.jl`
+#define FULL_SWEEP_REASON_SWEEP_ALWAYS_FULL (0)
+#define FULL_SWEEP_REASON_FORCED_FULL_SWEEP (1)
+#define FULL_SWEEP_REASON_USER_MAX_EXCEEDED (2)
+#define FULL_SWEEP_REASON_LARGE_PROMOTION_RATE (3)
+#define FULL_SWEEP_NUM_REASONS (4)
+
+extern JL_DLLEXPORT uint64_t jl_full_sweep_reasons[FULL_SWEEP_NUM_REASONS];
+STATIC_INLINE void gc_count_full_sweep_reason(int reason) JL_NOTSAFEPOINT
+{
+    assert(reason >= 0 && reason < FULL_SWEEP_NUM_REASONS);
+    jl_full_sweep_reasons[reason]++;
+}
+
 extern uv_mutex_t gc_perm_lock;
 extern uv_mutex_t gc_threads_lock;
 extern uv_cond_t gc_threads_cond;
diff --git a/src/gf.c b/src/gf.c
index e6f5b4ee007f7..56ebe6fe2fa84 100644
--- a/src/gf.c
+++ b/src/gf.c
@@ -1360,8 +1360,7 @@ static inline jl_typemap_entry_t *lookup_leafcache(jl_genericmemory_t *leafcache
     }
     return NULL;
 }
-
-static jl_method_instance_t *cache_method(
+jl_method_instance_t *cache_method(
         jl_methtable_t *mt, _Atomic(jl_typemap_t*) *cache, jl_value_t *parent JL_PROPAGATES_ROOT,
         jl_tupletype_t *tt, // the original tupletype of the signature
         jl_method_t *definition,
@@ -1707,7 +1706,7 @@ static void method_overwrite(jl_typemap_entry_t *newentry, jl_method_t *oldvalue
         jl_printf(s, ".\n");
         jl_uv_flush(s);
     }
-    if (jl_generating_output()) {
+    if (jl_generating_output() && jl_options.incremental) {
         jl_printf(JL_STDERR, "ERROR: Method overwriting is not permitted during Module precompilation. Use `__precompile__(false)` to opt-out of precompilation.\n");
         jl_throw(jl_precompilable_error);
     }
@@ -2411,7 +2410,7 @@ JL_DLLEXPORT jl_method_instance_t *jl_method_lookup(jl_value_t **args, size_t na
 // spvals is any matched static parameter values, m is the Method,
 // full is a boolean indicating if that method fully covers the input
 //
-// lim is the max # of methods to return. if there are more, returns jl_false.
+// lim is the max # of methods to return. if there are more, returns jl_nothing.
 // Negative values stand for no limit.
 // Unless lim == -1, remove matches that are unambiguously covered by earlier ones
 JL_DLLEXPORT jl_value_t *jl_matching_methods(jl_tupletype_t *types, jl_value_t *mt, int lim, int include_ambiguous,
@@ -2431,7 +2430,7 @@ JL_DLLEXPORT jl_value_t *jl_matching_methods(jl_tupletype_t *types, jl_value_t *
     return ml_matches((jl_methtable_t*)mt, types, lim, include_ambiguous, 1, world, 1, min_valid, max_valid, ambig);
 }
 
-jl_method_instance_t *jl_get_unspecialized(jl_method_t *def JL_PROPAGATES_ROOT)
+JL_DLLEXPORT jl_method_instance_t *jl_get_unspecialized(jl_method_t *def JL_PROPAGATES_ROOT)
 {
     // one unspecialized version of a function can be shared among all cached specializations
     if (!jl_is_method(def) || def->source == NULL) {
@@ -2561,6 +2560,38 @@ static void record_precompile_statement(jl_method_instance_t *mi, double compila
     JL_UNLOCK(&precomp_statement_out_lock);
 }
 
+jl_mutex_t dispatch_statement_out_lock;
+
+static void record_dispatch_statement(jl_method_instance_t *mi)
+{
+    static ios_t f_dispatch;
+    static JL_STREAM* s_dispatch = NULL;
+    jl_method_t *def = mi->def.method;
+    if (!jl_is_method(def))
+        return;
+
+    JL_LOCK(&dispatch_statement_out_lock);
+    if (s_dispatch == NULL) {
+        const char *t = jl_options.trace_dispatch;
+        if (!strncmp(t, "stderr", 6)) {
+            s_dispatch = JL_STDERR;
+        }
+        else {
+            if (ios_file(&f_dispatch, t, 1, 1, 1, 1) == NULL)
+                jl_errorf("cannot open dispatch statement file \"%s\" for writing", t);
+            s_dispatch = (JL_STREAM*) &f_dispatch;
+        }
+    }
+    if (!jl_has_free_typevars(mi->specTypes)) {
+        jl_printf(s_dispatch, "precompile(");
+        jl_static_show(s_dispatch, mi->specTypes);
+        jl_printf(s_dispatch, ")\n");
+        if (s_dispatch != JL_STDERR)
+            ios_flush(&f_dispatch);
+    }
+    JL_UNLOCK(&dispatch_statement_out_lock);
+}
+
 // If waitcompile is 0, this will return NULL if compiling is on-going in the JIT. This is
 // useful for the JIT itself, since it just doesn't cause redundant work or missed updates,
 // but merely causes it to look into the current JIT worklist.
@@ -2910,7 +2941,7 @@ jl_method_instance_t *jl_normalize_to_compilable_mi(jl_method_instance_t *mi JL_
 }
 
 // return a MethodInstance for a compileable method_match
-jl_method_instance_t *jl_method_match_to_mi(jl_method_match_t *match, size_t world, size_t min_valid, size_t max_valid, int mt_cache)
+JL_DLLEXPORT jl_method_instance_t *jl_method_match_to_mi(jl_method_match_t *match, size_t world, size_t min_valid, size_t max_valid, int mt_cache)
 {
     jl_method_t *m = match->method;
     jl_svec_t *env = match->sparams;
@@ -3068,7 +3099,8 @@ static void jl_compile_now(jl_method_instance_t *mi)
 JL_DLLEXPORT void jl_compile_method_instance(jl_method_instance_t *mi, jl_tupletype_t *types, size_t world)
 {
     size_t tworld = jl_typeinf_world;
-    jl_atomic_store_relaxed(&mi->precompiled, 1);
+    uint8_t miflags = jl_atomic_load_relaxed(&mi->flags) | JL_MI_FLAGS_MASK_PRECOMPILED;
+    jl_atomic_store_relaxed(&mi->flags, miflags);
     if (jl_generating_output()) {
         jl_compile_now(mi);
         // In addition to full compilation of the compilation-signature, if `types` is more specific (e.g. due to nospecialize),
@@ -3083,7 +3115,8 @@ JL_DLLEXPORT void jl_compile_method_instance(jl_method_instance_t *mi, jl_tuplet
             types2 = jl_type_intersection_env((jl_value_t*)types, (jl_value_t*)mi->def.method->sig, &tpenv2);
             jl_method_instance_t *mi2 = jl_specializations_get_linfo(mi->def.method, (jl_value_t*)types2, tpenv2);
             JL_GC_POP();
-            jl_atomic_store_relaxed(&mi2->precompiled, 1);
+            miflags = jl_atomic_load_relaxed(&mi2->flags) | JL_MI_FLAGS_MASK_PRECOMPILED;
+            jl_atomic_store_relaxed(&mi2->flags, miflags);
             if (jl_rettype_inferred_native(mi2, world, world) == jl_nothing)
                 (void)jl_type_infer(mi2, world, SOURCE_MODE_NOT_REQUIRED);
             if (jl_typeinf_func && jl_atomic_load_relaxed(&mi->def.method->primary_world) <= tworld) {
@@ -3112,6 +3145,21 @@ JL_DLLEXPORT int jl_compile_hint(jl_tupletype_t *types)
     return 1;
 }
 
+JL_DLLEXPORT int jl_add_entrypoint(jl_tupletype_t *types)
+{
+    size_t world = jl_atomic_load_acquire(&jl_world_counter);
+    size_t min_valid = 0;
+    size_t max_valid = ~(size_t)0;
+    jl_method_instance_t *mi = jl_get_compile_hint_specialization(types, world, &min_valid, &max_valid, 1);
+    if (mi == NULL)
+        return 0;
+    JL_GC_PROMISE_ROOTED(mi);
+    if (jl_generating_output() && jl_options.trim) {
+        arraylist_push(jl_entrypoint_mis, mi);
+    }
+    return 1;
+}
+
 // add type of `f` to front of argument tuple type
 jl_value_t *jl_argtype_with_function(jl_value_t *f, jl_value_t *types0)
 {
@@ -3344,6 +3392,16 @@ STATIC_INLINE jl_method_instance_t *jl_lookup_generic_(jl_value_t *F, jl_value_t
             jl_method_error(F, args, nargs, world);
             // unreachable
         }
+        // mfunc is about to be dispatched
+        if (jl_options.trace_dispatch != NULL) {
+            uint8_t miflags = jl_atomic_load_relaxed(&mfunc->flags);
+            uint8_t was_dispatched = miflags & JL_MI_FLAGS_MASK_DISPATCHED;
+            if (!was_dispatched) {
+                miflags |= JL_MI_FLAGS_MASK_DISPATCHED;
+                jl_atomic_store_relaxed(&mfunc->flags, miflags);
+                record_dispatch_statement(mfunc);
+            }
+        }
     }
 
 #ifdef JL_TRACE
@@ -3466,6 +3524,15 @@ jl_value_t *jl_gf_invoke_by_method(jl_method_t *method, jl_value_t *gf, jl_value
             jl_gc_sync_total_bytes(last_alloc); // discard allocation count from compilation
     }
     JL_GC_PROMISE_ROOTED(mfunc);
+    if (jl_options.trace_dispatch != NULL) {
+        uint8_t miflags = jl_atomic_load_relaxed(&mfunc->flags);
+        uint8_t was_dispatched = miflags & JL_MI_FLAGS_MASK_DISPATCHED;
+        if (!was_dispatched) {
+            miflags |= JL_MI_FLAGS_MASK_DISPATCHED;
+            jl_atomic_store_relaxed(&mfunc->flags, miflags);
+            record_dispatch_statement(mfunc);
+        }
+    }
     size_t world = jl_current_task->world_age;
     return _jl_invoke(gf, args, nargs - 1, mfunc, world);
 }
diff --git a/src/init.c b/src/init.c
index 86c0877b14289..413d4e8055e54 100644
--- a/src/init.c
+++ b/src/init.c
@@ -44,6 +44,7 @@ extern BOOL (WINAPI *hSymRefreshModuleList)(HANDLE);
 
 // list of modules being deserialized with __init__ methods
 jl_array_t *jl_module_init_order;
+arraylist_t *jl_entrypoint_mis;
 
 JL_DLLEXPORT size_t jl_page_size;
 
@@ -721,6 +722,7 @@ static void restore_fp_env(void)
 static NOINLINE void _finish_julia_init(JL_IMAGE_SEARCH rel, jl_ptls_t ptls, jl_task_t *ct);
 
 JL_DLLEXPORT int jl_default_debug_info_kind;
+JL_DLLEXPORT jl_cgparams_t jl_default_cgparams;
 
 static void init_global_mutexes(void) {
     JL_MUTEX_INIT(&jl_modules_mutex, "jl_modules_mutex");
@@ -841,8 +843,10 @@ static NOINLINE void _finish_julia_init(JL_IMAGE_SEARCH rel, jl_ptls_t ptls, jl_
     JL_TIMING(JULIA_INIT, JULIA_INIT);
     jl_resolve_sysimg_location(rel);
     // loads sysimg if available, and conditionally sets jl_options.cpu_target
-    if (rel == JL_IMAGE_IN_MEMORY)
+    if (rel == JL_IMAGE_IN_MEMORY) {
         jl_set_sysimg_so(jl_exe_handle);
+        jl_options.image_file = jl_options.julia_bin;
+    }
     else if (jl_options.image_file)
         jl_preload_sysimg_so(jl_options.image_file);
     if (jl_options.cpu_target == NULL)
@@ -899,6 +903,11 @@ static NOINLINE void _finish_julia_init(JL_IMAGE_SEARCH rel, jl_ptls_t ptls, jl_
         JL_GC_POP();
     }
 
+    if (jl_options.trim) {
+        jl_entrypoint_mis = (arraylist_t *)malloc_s(sizeof(arraylist_t));
+        arraylist_new(jl_entrypoint_mis, 0);
+    }
+
     if (jl_options.handle_signals == JL_OPTIONS_HANDLE_SIGNALS_ON)
         jl_install_sigint_handler();
 }
diff --git a/src/jitlayers.h b/src/jitlayers.h
index 107782e354d4a..93669c2351d88 100644
--- a/src/jitlayers.h
+++ b/src/jitlayers.h
@@ -29,7 +29,7 @@
 #include "llvm-version.h"
 #include <stack>
 #include <queue>
-
+#include <tuple>
 
 // As of LLVM 13, there are two runtime JIT linker implementations, the older
 // RuntimeDyld (used via orc::RTDyldObjectLinkingLayer) and the newer JITLink
@@ -65,6 +65,7 @@
 using namespace llvm;
 
 extern "C" jl_cgparams_t jl_default_cgparams;
+extern arraylist_t new_invokes;
 
 DEFINE_SIMPLE_CONVERSION_FUNCTIONS(orc::ThreadSafeContext, LLVMOrcThreadSafeContextRef)
 DEFINE_SIMPLE_CONVERSION_FUNCTIONS(orc::ThreadSafeModule, LLVMOrcThreadSafeModuleRef)
@@ -211,7 +212,7 @@ struct jl_codegen_call_target_t {
 typedef SmallVector<std::pair<jl_code_instance_t*, jl_codegen_call_target_t>, 0> jl_workqueue_t;
 // TODO DenseMap?
 typedef std::map<jl_code_instance_t*, std::pair<orc::ThreadSafeModule, jl_llvm_functions_t>> jl_compiled_functions_t;
-
+typedef std::list<std::tuple<std::string, std::string, unsigned int>> CallFrames;
 struct jl_codegen_params_t {
     orc::ThreadSafeContext tsctx;
     orc::ThreadSafeContext::Lock tsctx_lock;
@@ -230,6 +231,7 @@ struct jl_codegen_params_t {
     std::map<jl_datatype_t*, DIType*> ditypes;
     std::map<jl_datatype_t*, Type*> llvmtypes;
     DenseMap<Constant*, GlobalVariable*> mergedConstants;
+    llvm::MapVector<jl_method_instance_t*, std::tuple<jl_method_instance_t*, CallFrames>> enqueuers;
     // Map from symbol name (in a certain library) to its GV in sysimg and the
     // DL handle address in the current session.
     StringMap<std::pair<GlobalVariable*,SymMapGV>> libMapGV;
diff --git a/src/jl_exported_funcs.inc b/src/jl_exported_funcs.inc
index 7abf2b055bb8c..a00a0171d23b7 100644
--- a/src/jl_exported_funcs.inc
+++ b/src/jl_exported_funcs.inc
@@ -316,6 +316,7 @@
     XX(jl_module_name) \
     XX(jl_module_names) \
     XX(jl_module_parent) \
+    XX(jl_module_getloc) \
     XX(jl_module_public) \
     XX(jl_module_public_p) \
     XX(jl_module_use) \
@@ -344,6 +345,8 @@
     XX(jl_new_typevar) \
     XX(jl_next_from_addrinfo) \
     XX(jl_normalize_to_compilable_sig) \
+    XX(jl_method_match_to_mi) \
+    XX(jl_get_unspecialized) \
     XX(jl_no_exc_handler) \
     XX(jl_object_id) \
     XX(jl_object_id_) \
@@ -522,6 +525,7 @@
     YY(jl_dump_native) \
     YY(jl_get_llvm_gvs) \
     YY(jl_get_llvm_external_fns) \
+    YY(jl_get_llvm_mis) \
     YY(jl_dump_function_asm) \
     YY(jl_LLVMCreateDisasm) \
     YY(jl_LLVMDisasmInstruction) \
diff --git a/src/jloptions.c b/src/jloptions.c
index f63f4de020e26..35f0a76e3f6e7 100644
--- a/src/jloptions.c
+++ b/src/jloptions.c
@@ -77,6 +77,7 @@ JL_DLLEXPORT void jl_init_options(void)
                         1,    // can_inline
                         JL_OPTIONS_POLLY_ON, // polly
                         NULL, // trace_compile
+                        NULL, // trace_dispatch
                         JL_OPTIONS_FAST_MATH_DEFAULT,
                         0,    // worker
                         NULL, // cookie
@@ -101,6 +102,7 @@ JL_DLLEXPORT void jl_init_options(void)
                         0, // permalloc_pkgimg
                         0, // heap-size-hint
                         0, // trace_compile_timing
+                        0, // trim
     };
     jl_options_initialized = 1;
 }
@@ -251,7 +253,7 @@ static const char opts_hidden[]  =
     " --strip-ir                                    Remove IR (intermediate representation) of compiled\n"
     "                                               functions\n\n"
 
-    // compiler debugging (see the devdocs for tips on using these options)
+    // compiler debugging and experimental (see the devdocs for tips on using these options)
     " --output-unopt-bc <name>                      Generate unoptimized LLVM bitcode (.bc)\n"
     " --output-bc <name>                            Generate LLVM bitcode (.bc)\n"
     " --output-asm <name>                           Generate an assembly file (.s)\n"
@@ -265,6 +267,13 @@ static const char opts_hidden[]  =
     "                                               compile in ms\n"
     " --image-codegen                               Force generate code in imaging mode\n"
     " --permalloc-pkgimg={yes|no*}                  Copy the data section of package images into memory\n"
+    " --trim={no*|safe|unsafe|unsafe-warn}\n"
+    "                                               Build a sysimage including only code provably reachable\n"
+    "                                               from methods marked by calling `entrypoint`. In unsafe\n"
+    "                                               mode, the resulting binary might be missing needed code\n"
+    "                                               and can throw errors. With unsafe-warn warnings will be\n"
+    "                                               printed for dynamic call sites that might lead to such\n"
+    "                                               errors. In safe mode compile-time errors are given instead.\n"
 ;
 
 JL_DLLEXPORT void jl_parse_opts(int *argcp, char ***argvp)
@@ -286,6 +295,7 @@ JL_DLLEXPORT void jl_parse_opts(int *argcp, char ***argvp)
            opt_polly,
            opt_trace_compile,
            opt_trace_compile_timing,
+           opt_trace_dispatch,
            opt_math_mode,
            opt_worker,
            opt_bind_to,
@@ -311,7 +321,8 @@ JL_DLLEXPORT void jl_parse_opts(int *argcp, char ***argvp)
            opt_strip_ir,
            opt_heap_size_hint,
            opt_gc_threads,
-           opt_permalloc_pkgimg
+           opt_permalloc_pkgimg,
+           opt_trim,
     };
     static const char* const shortopts = "+vhqH:e:E:L:J:C:it:p:O:g:m:";
     static const struct option longopts[] = {
@@ -363,6 +374,7 @@ JL_DLLEXPORT void jl_parse_opts(int *argcp, char ***argvp)
         { "polly",           required_argument, 0, opt_polly },
         { "trace-compile",   required_argument, 0, opt_trace_compile },
         { "trace-compile-timing",  no_argument, 0, opt_trace_compile_timing },
+        { "trace-dispatch",  required_argument, 0, opt_trace_dispatch },
         { "math-mode",       required_argument, 0, opt_math_mode },
         { "handle-signals",  required_argument, 0, opt_handle_signals },
         // hidden command line options
@@ -375,6 +387,7 @@ JL_DLLEXPORT void jl_parse_opts(int *argcp, char ***argvp)
         { "strip-ir",        no_argument,       0, opt_strip_ir },
         { "permalloc-pkgimg",required_argument, 0, opt_permalloc_pkgimg },
         { "heap-size-hint",  required_argument, 0, opt_heap_size_hint },
+        { "trim",  optional_argument, 0, opt_trim },
         { 0, 0, 0, 0 }
     };
 
@@ -818,6 +831,11 @@ JL_DLLEXPORT void jl_parse_opts(int *argcp, char ***argvp)
         case opt_trace_compile_timing:
             jl_options.trace_compile_timing = 1;
             break;
+         case opt_trace_dispatch:
+            jl_options.trace_dispatch = strdup(optarg);
+            if (!jl_options.trace_dispatch)
+                jl_errorf("fatal error: failed to allocate memory: %s", strerror(errno));
+            break;
         case opt_math_mode:
             if (!strcmp(optarg,"ieee"))
                 jl_options.fast_math = JL_OPTIONS_FAST_MATH_OFF;
@@ -934,6 +952,18 @@ JL_DLLEXPORT void jl_parse_opts(int *argcp, char ***argvp)
             else
                 jl_errorf("julia: invalid argument to --permalloc-pkgimg={yes|no} (%s)", optarg);
             break;
+        case opt_trim:
+            if (optarg == NULL || !strcmp(optarg,"safe"))
+                jl_options.trim = JL_TRIM_SAFE;
+            else if (!strcmp(optarg,"no"))
+                jl_options.trim = JL_TRIM_NO;
+            else if (!strcmp(optarg,"unsafe"))
+                jl_options.trim = JL_TRIM_UNSAFE;
+            else if (!strcmp(optarg,"unsafe-warn"))
+                jl_options.trim = JL_TRIM_UNSAFE_WARN;
+            else
+                jl_errorf("julia: invalid argument to --trim={safe|no|unsafe|unsafe-warn} (%s)", optarg);
+            break;
         default:
             jl_errorf("julia: unhandled option -- %c\n"
                       "This is a bug, please report it.", c);
diff --git a/src/jloptions.h b/src/jloptions.h
index aac2a64a373a8..e58797caace3c 100644
--- a/src/jloptions.h
+++ b/src/jloptions.h
@@ -38,6 +38,7 @@ typedef struct {
     int8_t can_inline;
     int8_t polly;
     const char *trace_compile;
+    const char *trace_dispatch;
     int8_t fast_math;
     int8_t worker;
     const char *cookie;
@@ -62,6 +63,7 @@ typedef struct {
     int8_t permalloc_pkgimg;
     uint64_t heap_size_hint;
     int8_t trace_compile_timing;
+    int8_t trim;
 } jl_options_t;
 
 #endif
diff --git a/src/jltypes.c b/src/jltypes.c
index fbc8e9f7f7f16..11f1d11a14edc 100644
--- a/src/jltypes.c
+++ b/src/jltypes.c
@@ -3617,7 +3617,7 @@ void jl_init_types(void) JL_GC_DISABLED
                             "backedges",
                             "cache",
                             "cache_with_orig",
-                            "precompiled"),
+                            "flags"),
                         jl_svec(7,
                             jl_new_struct(jl_uniontype_type, jl_method_type, jl_module_type),
                             jl_any_type,
diff --git a/src/julia.expmap.in b/src/julia.expmap.in
index e5f9ee890205f..29366f6296a85 100644
--- a/src/julia.expmap.in
+++ b/src/julia.expmap.in
@@ -5,8 +5,8 @@
     asprintf;
     bitvector_*;
     ios_*;
-    arraylist_grow;
-    small_arraylist_grow;
+    arraylist_*;
+    small_arraylist_*;
     jl_*;
     ijl_*;
     _jl_mutex_*;
diff --git a/src/julia.h b/src/julia.h
index abb8a57ff13b0..ed3d9bf825658 100644
--- a/src/julia.h
+++ b/src/julia.h
@@ -410,8 +410,14 @@ struct _jl_method_instance_t {
     jl_array_t *backedges; // list of method-instances which call this method-instance; `invoke` records (invokesig, caller) pairs
     _Atomic(struct _jl_code_instance_t*) cache;
     uint8_t cache_with_orig; // !cache_with_specTypes
-    _Atomic(uint8_t) precompiled; // true if this instance was generated by an explicit `precompile(...)` call
+
+    // flags for this method instance
+    //   bit 0: generated by an explicit `precompile(...)`
+    //   bit 1: dispatched
+    _Atomic(uint8_t) flags;
 };
+#define JL_MI_FLAGS_MASK_PRECOMPILED    0x01
+#define JL_MI_FLAGS_MASK_DISPATCHED     0x02
 
 // OpaqueClosure
 typedef struct _jl_opaque_closure_t {
@@ -704,6 +710,8 @@ typedef struct _jl_module_t {
     struct _jl_module_t *parent;
     _Atomic(jl_svec_t*) bindings;
     _Atomic(jl_genericmemory_t*) bindingkeyset; // index lookup by name into bindings
+    jl_sym_t *file;
+    int32_t line;
     // hidden fields:
     arraylist_t usings;  // modules with all bindings potentially imported
     jl_uuid_t build_id;
@@ -2579,6 +2587,11 @@ JL_DLLEXPORT int jl_generating_output(void) JL_NOTSAFEPOINT;
 #define JL_OPTIONS_USE_PKGIMAGES_YES 1
 #define JL_OPTIONS_USE_PKGIMAGES_NO 0
 
+#define JL_TRIM_NO 0
+#define JL_TRIM_SAFE 1
+#define JL_TRIM_UNSAFE 2
+#define JL_TRIM_UNSAFE_WARN 3
+
 // Version information
 #include <julia_version.h> // Generated file
 
@@ -2626,10 +2639,12 @@ typedef struct {
     int gcstack_arg; // Pass the ptls value as an argument with swiftself
 
     int use_jlplt; // Whether to use the Julia PLT mechanism or emit symbols directly
+    int trim; // can we emit dynamic dispatches?
     // Cache access. Default: jl_rettype_inferred_native.
     jl_codeinstance_lookup_t lookup;
 } jl_cgparams_t;
 extern JL_DLLEXPORT int jl_default_debug_info_kind;
+extern JL_DLLEXPORT jl_cgparams_t jl_default_cgparams;
 
 typedef struct {
     int emit_metadata;
diff --git a/src/julia_internal.h b/src/julia_internal.h
index f00667d016796..9a61c3d18356f 100644
--- a/src/julia_internal.h
+++ b/src/julia_internal.h
@@ -664,8 +664,9 @@ JL_DLLEXPORT jl_code_instance_t *jl_compile_method_internal(jl_method_instance_t
 JL_DLLEXPORT jl_code_instance_t *jl_get_method_inferred(
         jl_method_instance_t *mi JL_PROPAGATES_ROOT, jl_value_t *rettype,
         size_t min_world, size_t max_world, jl_debuginfo_t *edges);
-jl_method_instance_t *jl_get_unspecialized(jl_method_t *def JL_PROPAGATES_ROOT);
+JL_DLLEXPORT jl_method_instance_t *jl_get_unspecialized(jl_method_t *def JL_PROPAGATES_ROOT);
 JL_DLLEXPORT void jl_read_codeinst_invoke(jl_code_instance_t *ci, uint8_t *specsigflags, jl_callptr_t *invoke, void **specptr, int waitcompile) JL_NOTSAFEPOINT;
+JL_DLLEXPORT jl_method_instance_t *jl_method_match_to_mi(jl_method_match_t *match, size_t world, size_t min_valid, size_t max_valid, int mt_cache);
 
 JL_DLLEXPORT jl_code_instance_t *jl_new_codeinst_uninit(jl_method_instance_t *mi, jl_value_t *owner);
 JL_DLLEXPORT jl_code_instance_t *jl_new_codeinst(
@@ -683,6 +684,7 @@ JL_DLLEXPORT const char *jl_debuginfo_name(jl_value_t *func) JL_NOTSAFEPOINT;
 
 JL_DLLEXPORT void jl_compile_method_instance(jl_method_instance_t *mi, jl_tupletype_t *types, size_t world);
 JL_DLLEXPORT int jl_compile_hint(jl_tupletype_t *types);
+JL_DLLEXPORT int jl_add_entrypoint(jl_tupletype_t *types);
 jl_code_info_t *jl_code_for_interpreter(jl_method_instance_t *lam JL_PROPAGATES_ROOT, size_t world);
 jl_value_t *jl_code_or_ci_for_interpreter(jl_method_instance_t *lam JL_PROPAGATES_ROOT, size_t world);
 int jl_code_requires_compiler(jl_code_info_t *src, int include_force_compile);
@@ -853,6 +855,7 @@ extern htable_t jl_current_modules JL_GLOBALLY_ROOTED;
 extern JL_DLLEXPORT jl_module_t *jl_precompile_toplevel_module JL_GLOBALLY_ROOTED;
 extern jl_genericmemory_t *jl_global_roots_list JL_GLOBALLY_ROOTED;
 extern jl_genericmemory_t *jl_global_roots_keyset JL_GLOBALLY_ROOTED;
+extern arraylist_t *jl_entrypoint_mis;
 JL_DLLEXPORT int jl_is_globally_rooted(jl_value_t *val JL_MAYBE_UNROOTED) JL_NOTSAFEPOINT;
 JL_DLLEXPORT jl_value_t *jl_as_global_root(jl_value_t *val, int insert) JL_GLOBALLY_ROOTED;
 
@@ -1902,7 +1905,7 @@ JL_DLLIMPORT void jl_get_function_id(void *native_code, jl_code_instance_t *ncod
         int32_t *func_idx, int32_t *specfunc_idx);
 JL_DLLIMPORT void jl_register_fptrs(uint64_t image_base, const struct _jl_image_fptrs_t *fptrs,
                                     jl_method_instance_t **linfos, size_t n);
-
+JL_DLLIMPORT void jl_get_llvm_mis(void *native_code, arraylist_t* MIs);
 JL_DLLIMPORT void jl_init_codegen(void);
 JL_DLLIMPORT void jl_teardown_codegen(void) JL_NOTSAFEPOINT;
 JL_DLLIMPORT int jl_getFunctionInfo(jl_frame_t **frames, uintptr_t pointer, int skipC, int noInline) JL_NOTSAFEPOINT;
diff --git a/src/method.c b/src/method.c
index d4457b1549353..6aba60e7fe12c 100644
--- a/src/method.c
+++ b/src/method.c
@@ -629,7 +629,7 @@ JL_DLLEXPORT jl_method_instance_t *jl_new_method_instance_uninit(void)
     mi->backedges = NULL;
     jl_atomic_store_relaxed(&mi->cache, NULL);
     mi->cache_with_orig = 0;
-    jl_atomic_store_relaxed(&mi->precompiled, 0);
+    jl_atomic_store_relaxed(&mi->flags, 0);
     return mi;
 }
 
diff --git a/src/module.c b/src/module.c
index 96d94049cff13..f4da7e1e994de 100644
--- a/src/module.c
+++ b/src/module.c
@@ -52,6 +52,8 @@ JL_DLLEXPORT jl_module_t *jl_new_module_(jl_sym_t *name, jl_module_t *parent, ui
     m->compile = -1;
     m->infer = -1;
     m->max_methods = -1;
+    m->file = name; // Using the name as a placeholder is better than nothing
+    m->line = 0;
     m->hash = parent == NULL ? bitmix(name->hash, jl_module_type->hash) :
         bitmix(name->hash, parent->hash);
     JL_MUTEX_INIT(&m->lock, "module->lock");
@@ -856,7 +858,7 @@ JL_DLLEXPORT int jl_binding_resolved_p(jl_module_t *m, jl_sym_t *var)
     return kind == BINDING_KIND_DECLARED || !jl_bkind_is_some_guard(kind);
 }
 
-static uint_t bindingkey_hash(size_t idx, jl_value_t *data)
+uint_t bindingkey_hash(size_t idx, jl_value_t *data)
 {
     jl_binding_t *b = (jl_binding_t*)jl_svecref(data, idx); // This must always happen inside the lock
     jl_sym_t *var = b->globalref->name;
@@ -1179,6 +1181,14 @@ jl_module_t *jl_module_root(jl_module_t *m)
     }
 }
 
+JL_DLLEXPORT jl_sym_t *jl_module_getloc(jl_module_t *m, int32_t *line)
+{
+    if (line) {
+        *line = m->line;
+    }
+    return m->file;
+}
+
 JL_DLLEXPORT jl_uuid_t jl_module_build_id(jl_module_t *m) { return m->build_id; }
 JL_DLLEXPORT jl_uuid_t jl_module_uuid(jl_module_t* m) { return m->uuid; }
 
diff --git a/src/precompile.c b/src/precompile.c
index c40e867ea699e..c21cf5367fba6 100644
--- a/src/precompile.c
+++ b/src/precompile.c
@@ -39,9 +39,17 @@ void write_srctext(ios_t *f, jl_array_t *udeps, int64_t srctextpos) {
         static jl_value_t *replace_depot_func = NULL;
         if (!replace_depot_func)
             replace_depot_func = jl_get_global(jl_base_module, jl_symbol("replace_depot_path"));
+        static jl_value_t *normalize_depots_func = NULL;
+        if (!normalize_depots_func)
+            normalize_depots_func = jl_get_global(jl_base_module, jl_symbol("normalize_depots_for_relocation"));
         ios_t srctext;
-        jl_value_t *deptuple = NULL;
-        JL_GC_PUSH2(&deptuple, &udeps);
+        jl_value_t *deptuple = NULL, *depots = NULL;
+        JL_GC_PUSH3(&deptuple, &udeps, &depots);
+        jl_task_t *ct = jl_current_task;
+        size_t last_age = ct->world_age;
+        ct->world_age = jl_atomic_load_acquire(&jl_world_counter);
+        depots = jl_apply(&normalize_depots_func, 1);
+        ct->world_age = last_age;
         for (size_t i = 0; i < len; i++) {
             deptuple = jl_array_ptr_ref(udeps, i);
             jl_value_t *depmod = jl_fieldref(deptuple, 0);  // module
@@ -60,13 +68,14 @@ void write_srctext(ios_t *f, jl_array_t *udeps, int64_t srctextpos) {
                 }
 
                 jl_value_t **replace_depot_args;
-                JL_GC_PUSHARGS(replace_depot_args, 2);
+                JL_GC_PUSHARGS(replace_depot_args, 3);
                 replace_depot_args[0] = replace_depot_func;
                 replace_depot_args[1] = abspath;
+                replace_depot_args[2] = depots;
                 jl_task_t *ct = jl_current_task;
                 size_t last_age = ct->world_age;
                 ct->world_age = jl_atomic_load_acquire(&jl_world_counter);
-                jl_value_t *depalias = (jl_value_t*)jl_apply(replace_depot_args, 2);
+                jl_value_t *depalias = (jl_value_t*)jl_apply(replace_depot_args, 3);
                 ct->world_age = last_age;
                 JL_GC_POP();
 
@@ -116,14 +125,16 @@ JL_DLLEXPORT void jl_write_compiler_output(void)
         if (f) {
             jl_array_ptr_1d_push(jl_module_init_order, m);
             int setting = jl_get_module_compile((jl_module_t*)m);
-            if (setting != JL_OPTIONS_COMPILE_OFF &&
-                setting != JL_OPTIONS_COMPILE_MIN) {
+            if ((setting != JL_OPTIONS_COMPILE_OFF && (jl_options.trim ||
+                (setting != JL_OPTIONS_COMPILE_MIN)))) {
                 // TODO: this would be better handled if moved entirely to jl_precompile
                 // since it's a slightly duplication of effort
                 jl_value_t *tt = jl_is_type(f) ? (jl_value_t*)jl_wrap_Type(f) : jl_typeof(f);
                 JL_GC_PUSH1(&tt);
                 tt = jl_apply_tuple_type_v(&tt, 1);
                 jl_compile_hint((jl_tupletype_t*)tt);
+                if (jl_options.trim)
+                    jl_add_entrypoint((jl_tupletype_t*)tt);
                 JL_GC_POP();
             }
         }
@@ -188,6 +199,10 @@ JL_DLLEXPORT void jl_write_compiler_output(void)
             jl_printf(JL_STDERR, "\n  ** incremental compilation may be broken for this module **\n\n");
         }
     }
+    if (jl_options.trim) {
+        exit(0); // Some finalizers need to run and we've blown up the bindings table
+        // TODO: Is this still needed
+    }
     JL_GC_POP();
     jl_gc_enable_finalizers(ct, 1);
 }
diff --git a/src/precompile_utils.c b/src/precompile_utils.c
index 5a4f599d1f0eb..a78d1e66dbb51 100644
--- a/src/precompile_utils.c
+++ b/src/precompile_utils.c
@@ -321,3 +321,83 @@ static void *jl_precompile_worklist(jl_array_t *worklist, jl_array_t *extext_met
     JL_GC_POP();
     return native_code;
 }
+
+static int enq_ccallable_entrypoints_(jl_typemap_entry_t *def, void *closure)
+{
+    jl_method_t *m = def->func.method;
+    if (m->external_mt)
+        return 1;
+    if (m->ccallable)
+        jl_add_entrypoint((jl_tupletype_t*)jl_svecref(m->ccallable, 1));
+    return 1;
+}
+
+static int enq_ccallable_entrypoints(jl_methtable_t *mt, void *env)
+{
+    return jl_typemap_visitor(jl_atomic_load_relaxed(&mt->defs), enq_ccallable_entrypoints_, env);
+}
+
+JL_DLLEXPORT void jl_add_ccallable_entrypoints(void)
+{
+    jl_foreach_reachable_mtable(enq_ccallable_entrypoints, NULL);
+}
+
+static void *jl_precompile_trimmed(size_t world)
+{
+    // array of MethodInstances and ccallable aliases to include in the output
+    jl_array_t *m = jl_alloc_vec_any(0);
+    jl_value_t *ccallable = NULL;
+    JL_GC_PUSH2(&m, &ccallable);
+    jl_method_instance_t *mi;
+    while (1)
+    {
+        mi = (jl_method_instance_t*)arraylist_pop(jl_entrypoint_mis);
+        if (mi == NULL)
+            break;
+        assert(jl_is_method_instance(mi));
+
+        jl_array_ptr_1d_push(m, (jl_value_t*)mi);
+        ccallable = (jl_value_t *)mi->def.method->ccallable;
+        if (ccallable)
+            jl_array_ptr_1d_push(m, ccallable);
+    }
+
+    jl_cgparams_t params = jl_default_cgparams;
+    params.trim = jl_options.trim;
+    void *native_code = jl_create_native(m, NULL, &params, 0, /* imaging */ 1, 0,
+                                         world);
+    JL_GC_POP();
+    return native_code;
+}
+
+static void jl_rebuild_methtables(arraylist_t* MIs, htable_t* mtables)
+{
+    size_t i;
+    for (i = 0; i < MIs->len; i++) {
+        jl_method_instance_t *mi = (jl_method_instance_t*)MIs->items[i];
+        jl_method_t *m = mi->def.method;
+        jl_methtable_t *old_mt = jl_method_get_table(m);
+        if ((jl_value_t *)old_mt == jl_nothing)
+            continue;
+        jl_sym_t *name = old_mt->name;
+        if (!ptrhash_has(mtables, old_mt))
+            ptrhash_put(mtables, old_mt, jl_new_method_table(name, m->module));
+        jl_methtable_t *mt = (jl_methtable_t*)ptrhash_get(mtables, old_mt);
+        size_t world =  jl_atomic_load_acquire(&jl_world_counter);
+        jl_value_t * lookup = jl_methtable_lookup(mt, m->sig, world);
+        // Check if the method is already in the new table, if not then insert it there
+        if (lookup == jl_nothing || (jl_method_t*)lookup != m) {
+            //TODO: should this be a function like unsafe_insert_method?
+            size_t min_world = jl_atomic_load_relaxed(&m->primary_world);
+            size_t max_world = jl_atomic_load_relaxed(&m->deleted_world);
+            jl_atomic_store_relaxed(&m->primary_world, ~(size_t)0);
+            jl_atomic_store_relaxed(&m->deleted_world, 1);
+            jl_typemap_entry_t *newentry = jl_method_table_add(mt, m, NULL);
+            jl_atomic_store_relaxed(&m->primary_world, min_world);
+            jl_atomic_store_relaxed(&m->deleted_world, max_world);
+            jl_atomic_store_relaxed(&newentry->min_world, min_world);
+            jl_atomic_store_relaxed(&newentry->max_world, max_world);
+        }
+    }
+
+}
diff --git a/src/staticdata.c b/src/staticdata.c
index 363aa46b62221..0a8cbe6db7c67 100644
--- a/src/staticdata.c
+++ b/src/staticdata.c
@@ -363,6 +363,9 @@ static void *to_seroder_entry(size_t idx)
     return (void*)((char*)HT_NOTFOUND + 1 + idx);
 }
 
+static htable_t new_methtables;
+static size_t precompilation_world;
+
 static int ptr_cmp(const void *l, const void *r)
 {
     uintptr_t left = *(const uintptr_t*)l;
@@ -770,22 +773,41 @@ static uintptr_t jl_fptr_id(void *fptr)
 #define jl_queue_for_serialization(s, v) jl_queue_for_serialization_((s), (jl_value_t*)(v), 1, 0)
 static void jl_queue_for_serialization_(jl_serializer_state *s, jl_value_t *v, int recursive, int immediate) JL_GC_DISABLED;
 
-
 static void jl_queue_module_for_serialization(jl_serializer_state *s, jl_module_t *m) JL_GC_DISABLED
 {
     jl_queue_for_serialization(s, m->name);
     jl_queue_for_serialization(s, m->parent);
-    jl_queue_for_serialization(s, jl_atomic_load_relaxed(&m->bindings));
+    if (jl_options.trim) {
+        jl_queue_for_serialization_(s, (jl_value_t*)jl_atomic_load_relaxed(&m->bindings), 0, 1);
+    } else {
+        jl_queue_for_serialization(s, jl_atomic_load_relaxed(&m->bindings));
+    }
     jl_queue_for_serialization(s, jl_atomic_load_relaxed(&m->bindingkeyset));
-    if (jl_options.strip_metadata) {
+    if (jl_options.strip_metadata || jl_options.trim) {
         jl_svec_t *table = jl_atomic_load_relaxed(&m->bindings);
         for (size_t i = 0; i < jl_svec_len(table); i++) {
             jl_binding_t *b = (jl_binding_t*)jl_svecref(table, i);
             if ((void*)b == jl_nothing)
                 break;
-            jl_sym_t *name = b->globalref->name;
-            if (name == jl_docmeta_sym && jl_get_binding_value(b))
-                record_field_change((jl_value_t**)&b->value, jl_nothing);
+            if (jl_options.strip_metadata) {
+                jl_sym_t *name = b->globalref->name;
+                if (name == jl_docmeta_sym && jl_get_binding_value(b))
+                    record_field_change((jl_value_t**)&b->value, jl_nothing);
+            }
+            if (jl_options.trim) {
+                jl_value_t *val = jl_get_binding_value(b);
+                // keep binding objects that are defined and ...
+                if (val &&
+                    // ... point to modules ...
+                    (jl_is_module(val) ||
+                     // ... or point to __init__ methods ...
+                     !strcmp(jl_symbol_name(b->globalref->name), "__init__") ||
+                     // ... or point to Base functions accessed by the runtime
+                     (m == jl_base_module && (!strcmp(jl_symbol_name(b->globalref->name), "wait") ||
+                                              !strcmp(jl_symbol_name(b->globalref->name), "task_done_hook"))))) {
+                    jl_queue_for_serialization(s, b);
+                }
+            }
         }
     }
 
@@ -944,6 +966,23 @@ static void jl_insert_into_serialization_queue(jl_serializer_state *s, jl_value_
         jl_queue_for_serialization_(s, get_replaceable_field((jl_value_t**)&bpart->next, 0), 1, immediate);
     }
     else if (layout->nfields > 0) {
+        if (jl_options.trim) {
+            if (jl_is_method(v)) {
+                jl_method_t *m = (jl_method_t *)v;
+                if (jl_is_svec(jl_atomic_load_relaxed(&m->specializations)))
+                    jl_queue_for_serialization_(s, (jl_value_t*)jl_atomic_load_relaxed(&m->specializations), 0, 1);
+            }
+            else if (jl_typetagis(v, jl_typename_type)) {
+                jl_typename_t *tn = (jl_typename_t*)v;
+                if (tn->mt != NULL && !tn->mt->frozen) {
+                    jl_methtable_t * new_methtable = (jl_methtable_t *)ptrhash_get(&new_methtables, tn->mt);
+                    if (new_methtable != HT_NOTFOUND)
+                        record_field_change((jl_value_t **)&tn->mt, (jl_value_t*)new_methtable);
+                    else
+                        record_field_change((jl_value_t **)&tn->mt, NULL);
+                }
+            }
+        }
         char *data = (char*)jl_data_ptr(v);
         size_t i, np = layout->npointers;
         for (i = 0; i < np; i++) {
@@ -989,6 +1028,7 @@ done_fields: ;
     }
 }
 
+
 static void jl_queue_for_serialization_(jl_serializer_state *s, jl_value_t *v, int recursive, int immediate) JL_GC_DISABLED
 {
     if (!jl_needs_serialization(s, v))
@@ -1219,6 +1259,9 @@ static void jl_write_module(jl_serializer_state *s, uintptr_t item, jl_module_t
     jl_atomic_store_relaxed(&newm->bindingkeyset, NULL);
     arraylist_push(&s->relocs_list, (void*)(reloc_offset + offsetof(jl_module_t, bindingkeyset)));
     arraylist_push(&s->relocs_list, (void*)backref_id(s, jl_atomic_load_relaxed(&m->bindingkeyset), s->link_ids_relocs));
+    newm->file = NULL;
+    arraylist_push(&s->relocs_list, (void*)(reloc_offset + offsetof(jl_module_t, file)));
+    arraylist_push(&s->relocs_list, (void*)backref_id(s, m->file, s->link_ids_relocs));
 
     // write out the usings list
     memset(&newm->usings._space, 0, sizeof(newm->usings._space));
@@ -1678,7 +1721,7 @@ static void jl_write_values(jl_serializer_state *s) JL_GC_DISABLED
             else if (jl_is_method_instance(v)) {
                 assert(f == s->s);
                 jl_method_instance_t *newmi = (jl_method_instance_t*)&f->buf[reloc_offset];
-                jl_atomic_store_relaxed(&newmi->precompiled, 0);
+                jl_atomic_store_relaxed(&newmi->flags, 0);
             }
             else if (jl_is_code_instance(v)) {
                 assert(f == s->s);
@@ -2407,6 +2450,53 @@ static void jl_prune_type_cache_linear(jl_svec_t *cache)
         jl_svecset(cache, ins++, jl_nothing);
 }
 
+uint_t bindingkey_hash(size_t idx, jl_value_t *data);
+
+static void jl_prune_module_bindings(jl_module_t * m) JL_GC_DISABLED
+{
+    jl_svec_t * bindings = jl_atomic_load_relaxed(&m->bindings);
+    size_t l = jl_svec_len(bindings), i;
+    arraylist_t bindings_list;
+    arraylist_new(&bindings_list, 0);
+    if (l == 0)
+        return;
+    for (i = 0; i < l; i++) {
+        jl_value_t *ti = jl_svecref(bindings, i);
+        if (ti == jl_nothing)
+            continue;
+        jl_binding_t *ref = ((jl_binding_t*)ti);
+        if (!((ptrhash_get(&serialization_order, ref) == HT_NOTFOUND) &&
+            (ptrhash_get(&serialization_order, ref->globalref) == HT_NOTFOUND))) {
+            jl_svecset(bindings, i, jl_nothing);
+            arraylist_push(&bindings_list, ref);
+        }
+    }
+    jl_genericmemory_t* bindingkeyset = jl_atomic_load_relaxed(&m->bindingkeyset);
+    _Atomic(jl_genericmemory_t*)bindingkeyset2;
+    jl_atomic_store_relaxed(&bindingkeyset2,(jl_genericmemory_t*)jl_an_empty_memory_any);
+    jl_svec_t *bindings2 = jl_alloc_svec_uninit(bindings_list.len);
+    for (i = 0; i < bindings_list.len; i++) {
+        jl_binding_t *ref = (jl_binding_t*)bindings_list.items[i];
+        jl_svecset(bindings2, i, ref);
+        jl_smallintset_insert(&bindingkeyset2, (jl_value_t*)m, bindingkey_hash, i, (jl_value_t*)bindings2);
+    }
+    void *idx = ptrhash_get(&serialization_order, bindings);
+    assert(idx != HT_NOTFOUND && idx != (void*)(uintptr_t)-1);
+    assert(serialization_queue.items[(char*)idx - 1 - (char*)HT_NOTFOUND] == bindings);
+    ptrhash_put(&serialization_order, bindings2, idx);
+    serialization_queue.items[(char*)idx - 1 - (char*)HT_NOTFOUND] = bindings2;
+
+    idx = ptrhash_get(&serialization_order, bindingkeyset);
+    assert(idx != HT_NOTFOUND && idx != (void*)(uintptr_t)-1);
+    assert(serialization_queue.items[(char*)idx - 1 - (char*)HT_NOTFOUND] == bindingkeyset);
+    ptrhash_put(&serialization_order, jl_atomic_load_relaxed(&bindingkeyset2), idx);
+    serialization_queue.items[(char*)idx - 1 - (char*)HT_NOTFOUND] = jl_atomic_load_relaxed(&bindingkeyset2);
+    jl_atomic_store_relaxed(&m->bindings, bindings2);
+    jl_atomic_store_relaxed(&m->bindingkeyset, jl_atomic_load_relaxed(&bindingkeyset2));
+    jl_gc_wb(m, bindings2);
+    jl_gc_wb(m, jl_atomic_load_relaxed(&bindingkeyset2));
+}
+
 static void strip_slotnames(jl_array_t *slotnames)
 {
     // replace slot names with `?`, except unused_sym since the compiler looks at it
@@ -2473,7 +2563,7 @@ static int strip_all_codeinfos__(jl_typemap_entry_t *def, void *_env)
     if (m->source) {
         int stripped_ir = 0;
         if (jl_options.strip_ir) {
-            int should_strip_ir = 0;
+            int should_strip_ir = jl_options.trim;
             if (!should_strip_ir) {
                 if (jl_atomic_load_relaxed(&m->unspecialized)) {
                     jl_code_instance_t *unspec = jl_atomic_load_relaxed(&jl_atomic_load_relaxed(&m->unspecialized)->cache);
@@ -2675,8 +2765,46 @@ static void jl_save_system_image_to_stream(ios_t *f, jl_array_t *mod_array,
     // strip metadata and IR when requested
     if (jl_options.strip_metadata || jl_options.strip_ir)
         jl_strip_all_codeinfos();
+    // collect needed methods and replace method tables that are in the tags array
+    htable_new(&new_methtables, 0);
+    arraylist_t MIs;
+    arraylist_new(&MIs, 0);
+    arraylist_t gvars;
+    arraylist_new(&gvars, 0);
+    arraylist_t external_fns;
+    arraylist_new(&external_fns, 0);
 
     int en = jl_gc_enable(0);
+    if (native_functions) {
+        jl_get_llvm_gvs(native_functions, &gvars);
+        jl_get_llvm_external_fns(native_functions, &external_fns);
+        if (jl_options.trim)
+            jl_get_llvm_mis(native_functions, &MIs);
+    }
+    if (jl_options.trim) {
+        jl_rebuild_methtables(&MIs, &new_methtables);
+        jl_methtable_t *mt = (jl_methtable_t *)ptrhash_get(&new_methtables, jl_type_type_mt);
+        JL_GC_PROMISE_ROOTED(mt);
+        if (mt != HT_NOTFOUND)
+            jl_type_type_mt = mt;
+        else
+            jl_type_type_mt = jl_new_method_table(jl_type_type_mt->name, jl_type_type_mt->module);
+
+        mt = (jl_methtable_t *)ptrhash_get(&new_methtables, jl_kwcall_mt);
+        JL_GC_PROMISE_ROOTED(mt);
+        if (mt != HT_NOTFOUND)
+            jl_kwcall_mt = mt;
+        else
+            jl_kwcall_mt = jl_new_method_table(jl_kwcall_mt->name, jl_kwcall_mt->module);
+
+        mt = (jl_methtable_t *)ptrhash_get(&new_methtables, jl_nonfunction_mt);
+        JL_GC_PROMISE_ROOTED(mt);
+        if (mt != HT_NOTFOUND)
+            jl_nonfunction_mt = mt;
+        else
+            jl_nonfunction_mt = jl_new_method_table(jl_nonfunction_mt->name, jl_nonfunction_mt->module);
+    }
+
     nsym_tag = 0;
     htable_new(&symbol_table, 0);
     htable_new(&fptr_to_id, sizeof(id_to_fptrs) / sizeof(*id_to_fptrs));
@@ -2722,14 +2850,6 @@ static void jl_save_system_image_to_stream(ios_t *f, jl_array_t *mod_array,
     htable_new(&s.callers_with_edges, 0);
     jl_value_t **const*const tags = get_tags(); // worklist == NULL ? get_tags() : NULL;
 
-    arraylist_t gvars;
-    arraylist_t external_fns;
-    arraylist_new(&gvars, 0);
-    arraylist_new(&external_fns, 0);
-    if (native_functions) {
-        jl_get_llvm_gvs(native_functions, &gvars);
-        jl_get_llvm_external_fns(native_functions, &external_fns);
-    }
 
     if (worklist == NULL) {
         // empty!(Core.ARGS)
@@ -2788,6 +2908,8 @@ static void jl_save_system_image_to_stream(ios_t *f, jl_array_t *mod_array,
         // step 1.2: ensure all gvars are part of the sysimage too
         record_gvars(&s, &gvars);
         record_external_fns(&s, &external_fns);
+        if (jl_options.trim)
+            record_gvars(&s, &MIs);
         jl_serialize_reachable(&s);
         // step 1.3: prune (garbage collect) special weak references from the jl_global_roots_list
         if (worklist == NULL) {
@@ -2808,8 +2930,30 @@ static void jl_save_system_image_to_stream(ios_t *f, jl_array_t *mod_array,
         // step 1.4: prune (garbage collect) some special weak references from
         // built-in type caches too
         for (i = 0; i < serialization_queue.len; i++) {
-            jl_typename_t *tn = (jl_typename_t*)serialization_queue.items[i];
-            if (jl_is_typename(tn)) {
+            jl_value_t *v = (jl_value_t*)serialization_queue.items[i];
+            if (jl_options.trim) {
+                if (jl_is_method(v)){
+                    jl_method_t *m = (jl_method_t*)v;
+                    jl_value_t *specializations_ = jl_atomic_load_relaxed(&m->specializations);
+                    if (!jl_is_svec(specializations_))
+                        continue;
+
+                    jl_svec_t *specializations = (jl_svec_t *)specializations_;
+                    size_t l = jl_svec_len(specializations), i;
+                    for (i = 0; i < l; i++) {
+                        jl_value_t *mi = jl_svecref(specializations, i);
+                        if (mi == jl_nothing)
+                            continue;
+                        if (ptrhash_get(&serialization_order, mi) == HT_NOTFOUND)
+                            jl_svecset(specializations, i, jl_nothing);
+                    }
+                } else if (jl_is_module(v)) {
+                    jl_prune_module_bindings((jl_module_t*)v);
+                }
+            }
+            // Not else
+            if (jl_is_typename(v)) {
+                jl_typename_t *tn = (jl_typename_t*)v;
                 jl_atomic_store_relaxed(&tn->cache,
                     jl_prune_type_cache_hash(jl_atomic_load_relaxed(&tn->cache)));
                 jl_gc_wb(tn, jl_atomic_load_relaxed(&tn->cache));
@@ -2918,7 +3062,9 @@ static void jl_save_system_image_to_stream(ios_t *f, jl_array_t *mod_array,
             jl_write_value(&s, global_roots_keyset);
             jl_write_value(&s, s.ptls->root_task->tls);
             write_uint32(f, jl_get_gs_ctr());
-            write_uint(f, jl_atomic_load_acquire(&jl_world_counter));
+            size_t world = jl_atomic_load_acquire(&jl_world_counter);
+            // assert(world == precompilation_world); // This triggers on a normal build of julia
+            write_uint(f, world);
             write_uint(f, jl_typeinf_world);
         }
         else {
@@ -2971,6 +3117,7 @@ static void jl_save_system_image_to_stream(ios_t *f, jl_array_t *mod_array,
     htable_free(&nullptrs);
     htable_free(&symbol_table);
     htable_free(&fptr_to_id);
+    htable_free(&new_methtables);
     nsym_tag = 0;
 
     jl_gc_enable(en);
@@ -3000,6 +3147,10 @@ static void jl_write_header_for_incremental(ios_t *f, jl_array_t *worklist, jl_a
 JL_DLLEXPORT void jl_create_system_image(void **_native_data, jl_array_t *worklist, bool_t emit_split,
                                          ios_t **s, ios_t **z, jl_array_t **udeps, int64_t *srctextpos)
 {
+    if (jl_options.strip_ir || jl_options.trim) {
+        // make sure this is precompiled for jl_foreach_reachable_mtable
+        jl_get_loaded_modules();
+    }
     jl_gc_collect(JL_GC_FULL);
     jl_gc_collect(JL_GC_INCREMENTAL);   // sweep finalizers
     JL_TIMING(SYSIMG_DUMP, SYSIMG_DUMP);
@@ -3049,7 +3200,11 @@ JL_DLLEXPORT void jl_create_system_image(void **_native_data, jl_array_t *workli
         }
     }
     else if (_native_data != NULL) {
-        *_native_data = jl_precompile(jl_options.compile_enabled == JL_OPTIONS_COMPILE_ALL);
+        precompilation_world = jl_atomic_load_acquire(&jl_world_counter);
+        if (jl_options.trim)
+            *_native_data = jl_precompile_trimmed(precompilation_world);
+        else
+            *_native_data = jl_precompile(jl_options.compile_enabled == JL_OPTIONS_COMPILE_ALL);
     }
 
     // Make sure we don't run any Julia code concurrently after this point
diff --git a/src/staticdata_utils.c b/src/staticdata_utils.c
index f39e5357c6782..8eb223d3cfbde 100644
--- a/src/staticdata_utils.c
+++ b/src/staticdata_utils.c
@@ -159,7 +159,8 @@ static int has_backedge_to_worklist(jl_method_instance_t *mi, htable_t *visited,
     if (jl_is_method(mod))
         mod = ((jl_method_t*)mod)->module;
     assert(jl_is_module(mod));
-    if (jl_atomic_load_relaxed(&mi->precompiled) || !jl_object_in_image((jl_value_t*)mod) || type_in_worklist(mi->specTypes)) {
+    uint8_t is_precompiled = jl_atomic_load_relaxed(&mi->flags) & JL_MI_FLAGS_MASK_PRECOMPILED;
+    if (is_precompiled || !jl_object_in_image((jl_value_t*)mod) || type_in_worklist(mi->specTypes)) {
         return 1;
     }
     if (!mi->backedges) {
@@ -752,6 +753,16 @@ static int64_t write_dependency_list(ios_t *s, jl_array_t* worklist, jl_array_t
     static jl_value_t *replace_depot_func = NULL;
     if (!replace_depot_func)
         replace_depot_func = jl_get_global(jl_base_module, jl_symbol("replace_depot_path"));
+    static jl_value_t *normalize_depots_func = NULL;
+    if (!normalize_depots_func)
+        normalize_depots_func = jl_get_global(jl_base_module, jl_symbol("normalize_depots_for_relocation"));
+
+    jl_value_t *depots = NULL, *prefs_hash = NULL, *prefs_list = NULL;
+    JL_GC_PUSH2(&depots, &prefs_list);
+    last_age = ct->world_age;
+    ct->world_age = jl_atomic_load_acquire(&jl_world_counter);
+    depots = jl_apply(&normalize_depots_func, 1);
+    ct->world_age = last_age;
 
     // write a placeholder for total size so that we can quickly seek past all of the
     // dependencies if we don't need them
@@ -764,13 +775,14 @@ static int64_t write_dependency_list(ios_t *s, jl_array_t* worklist, jl_array_t
 
         if (replace_depot_func) {
             jl_value_t **replace_depot_args;
-            JL_GC_PUSHARGS(replace_depot_args, 2);
+            JL_GC_PUSHARGS(replace_depot_args, 3);
             replace_depot_args[0] = replace_depot_func;
             replace_depot_args[1] = deppath;
+            replace_depot_args[2] = depots;
             ct = jl_current_task;
             size_t last_age = ct->world_age;
             ct->world_age = jl_atomic_load_acquire(&jl_world_counter);
-            deppath = (jl_value_t*)jl_apply(replace_depot_args, 2);
+            deppath = (jl_value_t*)jl_apply(replace_depot_args, 3);
             ct->world_age = last_age;
             JL_GC_POP();
         }
@@ -803,9 +815,6 @@ static int64_t write_dependency_list(ios_t *s, jl_array_t* worklist, jl_array_t
     write_int32(s, 0); // terminator, for ease of reading
 
     // Calculate Preferences hash for current package.
-    jl_value_t *prefs_hash = NULL;
-    jl_value_t *prefs_list = NULL;
-    JL_GC_PUSH1(&prefs_list);
     if (jl_base_module) {
         // Toplevel module is the module we're currently compiling, use it to get our preferences hash
         jl_value_t * toplevel = (jl_value_t*)jl_get_global(jl_base_module, jl_symbol("__toplevel__"));
@@ -852,7 +861,7 @@ static int64_t write_dependency_list(ios_t *s, jl_array_t* worklist, jl_array_t
         write_int32(s, 0);
         write_uint64(s, 0);
     }
-    JL_GC_POP(); // for prefs_list
+    JL_GC_POP(); // for depots, prefs_list
 
     // write a dummy file position to indicate the beginning of the source-text
     pos = ios_pos(s);
diff --git a/src/support/arraylist.h b/src/support/arraylist.h
index 6ad2f0e2f28c9..a83bd2808756c 100644
--- a/src/support/arraylist.h
+++ b/src/support/arraylist.h
@@ -20,11 +20,11 @@ typedef struct {
     void *_space[AL_N_INLINE];
 } arraylist_t;
 
-arraylist_t *arraylist_new(arraylist_t *a, size_t size) JL_NOTSAFEPOINT;
-void arraylist_free(arraylist_t *a) JL_NOTSAFEPOINT;
+JL_DLLEXPORT arraylist_t *arraylist_new(arraylist_t *a, size_t size) JL_NOTSAFEPOINT;
+JL_DLLEXPORT void arraylist_free(arraylist_t *a) JL_NOTSAFEPOINT;
 
-void arraylist_push(arraylist_t *a, void *elt) JL_NOTSAFEPOINT;
-void *arraylist_pop(arraylist_t *a) JL_NOTSAFEPOINT;
+JL_DLLEXPORT void arraylist_push(arraylist_t *a, void *elt) JL_NOTSAFEPOINT;
+JL_DLLEXPORT void *arraylist_pop(arraylist_t *a) JL_NOTSAFEPOINT;
 JL_DLLEXPORT void arraylist_grow(arraylist_t *a, size_t n) JL_NOTSAFEPOINT;
 
 typedef struct {
@@ -34,11 +34,12 @@ typedef struct {
     void *_space[SMALL_AL_N_INLINE];
 } small_arraylist_t;
 
-small_arraylist_t *small_arraylist_new(small_arraylist_t *a, uint32_t size) JL_NOTSAFEPOINT;
-void small_arraylist_free(small_arraylist_t *a) JL_NOTSAFEPOINT;
 
-void small_arraylist_push(small_arraylist_t *a, void *elt) JL_NOTSAFEPOINT;
-void *small_arraylist_pop(small_arraylist_t *a) JL_NOTSAFEPOINT;
+JL_DLLEXPORT small_arraylist_t *small_arraylist_new(small_arraylist_t *a, uint32_t size) JL_NOTSAFEPOINT;
+JL_DLLEXPORT void small_arraylist_free(small_arraylist_t *a) JL_NOTSAFEPOINT;
+
+JL_DLLEXPORT void small_arraylist_push(small_arraylist_t *a, void *elt) JL_NOTSAFEPOINT;
+JL_DLLEXPORT void *small_arraylist_pop(small_arraylist_t *a) JL_NOTSAFEPOINT;
 JL_DLLEXPORT void small_arraylist_grow(small_arraylist_t *a, uint32_t n) JL_NOTSAFEPOINT;
 
 #ifdef __cplusplus
diff --git a/src/sys.c b/src/sys.c
index b54edc32b32b6..fa9054bb93e9a 100644
--- a/src/sys.c
+++ b/src/sys.c
@@ -102,7 +102,6 @@ JL_DLLEXPORT int32_t jl_nb_available(ios_t *s)
 
 // --- dir/file stuff ---
 
-JL_DLLEXPORT int jl_sizeof_uv_fs_t(void) { return sizeof(uv_fs_t); }
 JL_DLLEXPORT char *jl_uv_fs_t_ptr(uv_fs_t *req) { return (char*)req->ptr; }
 JL_DLLEXPORT char *jl_uv_fs_t_path(uv_fs_t *req) { return (char*)req->path; }
 
diff --git a/src/toplevel.c b/src/toplevel.c
index 5d17a3fcf89a7..8caa8b086ec00 100644
--- a/src/toplevel.c
+++ b/src/toplevel.c
@@ -213,6 +213,10 @@ static jl_value_t *jl_eval_module_expr(jl_module_t *parent_module, jl_expr_t *ex
         form = NULL;
     }
 
+    newm->file = jl_symbol(filename);
+    jl_gc_wb_knownold(newm, newm->file);
+    newm->line = lineno;
+
     for (int i = 0; i < jl_array_nrows(exprs); i++) {
         // process toplevel form
         ct->world_age = jl_atomic_load_acquire(&jl_world_counter);
diff --git a/stdlib/FileWatching/docs/src/index.md b/stdlib/FileWatching/docs/src/index.md
index 1b2212fcc5a28..15d4e39a45117 100644
--- a/stdlib/FileWatching/docs/src/index.md
+++ b/stdlib/FileWatching/docs/src/index.md
@@ -5,11 +5,17 @@ EditURL = "https://github.com/JuliaLang/julia/blob/master/stdlib/FileWatching/do
 # [File Events](@id lib-filewatching)
 
 ```@docs
-FileWatching.poll_fd
-FileWatching.poll_file
-FileWatching.watch_file
-FileWatching.watch_folder
-FileWatching.unwatch_folder
+poll_fd
+poll_file
+watch_file
+watch_folder
+unwatch_folder
+```
+```@docs
+FileMonitor
+FolderMonitor
+PollingFileWatcher
+FDWatcher
 ```
 
 # Pidfile
diff --git a/stdlib/FileWatching/src/FileWatching.jl b/stdlib/FileWatching/src/FileWatching.jl
index 0c987ad01c828..7c743ce634193 100644
--- a/stdlib/FileWatching/src/FileWatching.jl
+++ b/stdlib/FileWatching/src/FileWatching.jl
@@ -6,7 +6,7 @@ Utilities for monitoring files and file descriptors for events.
 module FileWatching
 
 export
-    # one-shot API (returns results):
+    # one-shot API (returns results, race-y):
     watch_file, # efficient for small numbers of files
     watch_folder, # efficient for large numbers of files
     unwatch_folder,
@@ -22,11 +22,11 @@ export
     trymkpidlock
 
 import Base: @handle_as, wait, close, eventloop, notify_error, IOError,
-    _sizeof_uv_poll, _sizeof_uv_fs_poll, _sizeof_uv_fs_event, _uv_hook_close, uv_error, _UVError,
-    iolock_begin, iolock_end, associate_julia_struct, disassociate_julia_struct,
-    preserve_handle, unpreserve_handle, isreadable, iswritable, isopen,
-    |, getproperty, propertynames
-import Base.Filesystem.StatStruct
+    uv_req_data, uv_req_set_data, associate_julia_struct, disassociate_julia_struct,
+    _sizeof_uv_poll, _sizeof_uv_fs, _sizeof_uv_fs_event, _uv_hook_close, uv_error, _UVError,
+    iolock_begin, iolock_end, preserve_handle, unpreserve_handle,
+    isreadable, iswritable, isopen, |, getproperty, propertynames
+import Base.Filesystem: StatStruct, uv_fs_req_cleanup
 if Sys.iswindows()
     import Base.WindowsRawSocket
 end
@@ -38,13 +38,13 @@ const UV_CHANGE = Int32(2)
 struct FileEvent
     renamed::Bool
     changed::Bool
-    timedout::Bool
+    timedout::Bool # aka canceled
     FileEvent(r::Bool, c::Bool, t::Bool) = new(r, c, t)
 end
 FileEvent() = FileEvent(false, false, true)
 FileEvent(flags::Integer) = FileEvent((flags & UV_RENAME) != 0,
                                       (flags & UV_CHANGE) != 0,
-                                      false)
+                                      iszero(flags))
 |(a::FileEvent, b::FileEvent) =
     FileEvent(a.renamed | b.renamed,
               a.changed | b.changed,
@@ -78,34 +78,183 @@ isreadable(f::FDEvent) = f.readable
 iswritable(f::FDEvent) = f.writable
 |(a::FDEvent, b::FDEvent) = FDEvent(getfield(a, :events) | getfield(b, :events))
 
+# Callback functions
+
+function uv_fseventscb_file(handle::Ptr{Cvoid}, filename::Ptr, events::Int32, status::Int32)
+    t = @handle_as handle FileMonitor
+    lock(t.notify)
+    try
+        if status != 0
+            t.ioerrno = status
+            notify_error(t.notify, _UVError("FileMonitor", status))
+            uvfinalize(t)
+        elseif events != t.events
+            events = t.events |= events
+            notify(t.notify, all=false)
+        end
+    finally
+        unlock(t.notify)
+    end
+    nothing
+end
+
+function uv_fseventscb_folder(handle::Ptr{Cvoid}, filename::Ptr, events::Int32, status::Int32)
+    t = @handle_as handle FolderMonitor
+    lock(t.notify)
+    try
+        if status != 0
+            notify_error(t.notify, _UVError("FolderMonitor", status))
+        else
+            fname = (filename == C_NULL) ? "" : unsafe_string(convert(Cstring, filename))
+            push!(t.channel, fname => FileEvent(events))
+            notify(t.notify)
+        end
+    finally
+        unlock(t.notify)
+    end
+    nothing
+end
+
+function uv_pollcb(handle::Ptr{Cvoid}, status::Int32, events::Int32)
+    t = @handle_as handle _FDWatcher
+    lock(t.notify)
+    try
+        if status != 0
+            notify_error(t.notify, _UVError("FDWatcher", status))
+        else
+            t.events |= events
+            if t.active[1] || t.active[2]
+                if isempty(t.notify)
+                    # if we keep hearing about events when nobody appears to be listening,
+                    # stop the poll to save cycles
+                    t.active = (false, false)
+                    ccall(:uv_poll_stop, Int32, (Ptr{Cvoid},), t.handle)
+                end
+            end
+            notify(t.notify, events)
+        end
+    finally
+        unlock(t.notify)
+    end
+    nothing
+end
+
+function uv_fspollcb(req::Ptr{Cvoid})
+    pfw = unsafe_pointer_to_objref(uv_req_data(req))::PollingFileWatcher
+    pfw.active = false
+    unpreserve_handle(pfw)
+    @assert pointer(pfw.stat_req) == req
+    r = Int32(ccall(:uv_fs_get_result, Cssize_t, (Ptr{Cvoid},), req))
+    statbuf = ccall(:uv_fs_get_statbuf, Ptr{UInt8}, (Ptr{Cvoid},), req)
+    curr_stat = StatStruct(pfw.file, statbuf, r)
+    uv_fs_req_cleanup(req)
+    lock(pfw.notify)
+    try
+        if !isempty(pfw.notify) # must discard the update if nobody watching
+            if pfw.ioerrno != r || (r == 0 && pfw.prev_stat != curr_stat)
+                if r == 0
+                    pfw.prev_stat = curr_stat
+                end
+                pfw.ioerrno = r
+                notify(pfw.notify, true)
+            end
+            pfw.timer = Timer(pfw.interval) do t
+                # async task
+                iolock_begin()
+                lock(pfw.notify)
+                try
+                    if pfw.timer === t # use identity check to test if this callback is stale by the time we got the lock
+                        pfw.timer = nothing
+                        @assert !pfw.active
+                        if isopen(pfw) && !isempty(pfw.notify)
+                            preserve_handle(pfw)
+                            uv_jl_fspollcb = @cfunction(uv_fspollcb, Cvoid, (Ptr{Cvoid},))
+                            err = ccall(:uv_fs_stat, Cint, (Ptr{Cvoid}, Ptr{Cvoid}, Cstring, Ptr{Cvoid}),
+                                eventloop(), pfw.stat_req, pfw.file, uv_jl_fspollcb::Ptr{Cvoid})
+                            err == 0 || notify(pfw.notify, _UVError("PollingFileWatcher (start)", err), error=true) # likely just ENOMEM
+                            pfw.active = true
+                        end
+                    end
+                finally
+                    unlock(pfw.notify)
+                end
+                iolock_end()
+                nothing
+            end
+        end
+    finally
+        unlock(pfw.notify)
+    end
+    nothing
+end
+
+# Types
+
+"""
+    FileMonitor(path::AbstractString)
+
+Watch file or directory `path` (which must exist) for changes until a change occurs. This
+function does not poll the file system and instead uses platform-specific functionality to
+receive notifications from the operating system (e.g. via inotify on Linux). See the NodeJS
+documentation linked below for details.
+
+`fm = FileMonitor(path)` acts like an auto-reset Event, so `wait(fm)` blocks until there has
+been at least one event in the file originally at the given path and then returns an object
+with boolean fields `renamed`, `changed`, `timedout` summarizing all changes that have
+occurred since the last call to `wait` returned.
+
+This behavior of this function varies slightly across platforms. See
+<https://nodejs.org/api/fs.html#fs_caveats> for more detailed information.
+"""
 mutable struct FileMonitor
     @atomic handle::Ptr{Cvoid}
-    file::String
-    notify::Base.ThreadSynchronizer
-    events::Int32
-    active::Bool
+    const file::String
+    const notify::Base.ThreadSynchronizer
+    events::Int32 # accumulator for events that occurred since the last wait call, similar to Event with autoreset
+    ioerrno::Int32 # record the error, if any occurs (unlikely)
     FileMonitor(file::AbstractString) = FileMonitor(String(file))
     function FileMonitor(file::String)
         handle = Libc.malloc(_sizeof_uv_fs_event)
-        this = new(handle, file, Base.ThreadSynchronizer(), 0, false)
+        this = new(handle, file, Base.ThreadSynchronizer(), 0, 0)
         associate_julia_struct(handle, this)
         iolock_begin()
         err = ccall(:uv_fs_event_init, Cint, (Ptr{Cvoid}, Ptr{Cvoid}), eventloop(), handle)
         if err != 0
             Libc.free(handle)
-            throw(_UVError("FileMonitor", err))
+            uv_error("FileMonitor", err)
         end
-        iolock_end()
         finalizer(uvfinalize, this)
+        uv_jl_fseventscb_file = @cfunction(uv_fseventscb_file, Cvoid, (Ptr{Cvoid}, Ptr{Int8}, Int32, Int32))
+        uv_error("FileMonitor (start)",
+                 ccall(:uv_fs_event_start, Int32, (Ptr{Cvoid}, Ptr{Cvoid}, Cstring, Int32),
+                       this.handle, uv_jl_fseventscb_file::Ptr{Cvoid}, file, 0))
+        iolock_end()
         return this
     end
 end
 
+
+"""
+    FolderMonitor(folder::AbstractString)
+
+Watch a file or directory `path` for changes until a change has occurred. This function does
+not poll the file system and instead uses platform-specific functionality to receive
+notifications from the operating system (e.g. via inotify on Linux). See the NodeJS
+documentation linked below for details.
+
+This acts similar to a Channel, so calling `take!` (or `wait`) blocks until some change has
+occurred. The `wait` function will return a pair where the first field is the name of the
+changed file (if available) and the second field is an object with boolean fields `renamed`
+and `changed`, giving the event that occurred on it.
+
+This behavior of this function varies slightly across platforms. See
+<https://nodejs.org/api/fs.html#fs_caveats> for more detailed information.
+"""
 mutable struct FolderMonitor
     @atomic handle::Ptr{Cvoid}
     # notify::Channel{Any} # eltype = Union{Pair{String, FileEvent}, IOError}
-    notify::Base.ThreadSynchronizer
-    channel::Vector{Any} # eltype = Pair{String, FileEvent}
+    const notify::Base.ThreadSynchronizer
+    const channel::Vector{Any} # eltype = Pair{String, FileEvent}
     FolderMonitor(folder::AbstractString) = FolderMonitor(String(folder))
     function FolderMonitor(folder::String)
         handle = Libc.malloc(_sizeof_uv_fs_event)
@@ -118,6 +267,7 @@ mutable struct FolderMonitor
             throw(_UVError("FolderMonitor", err))
         end
         finalizer(uvfinalize, this)
+        uv_jl_fseventscb_folder = @cfunction(uv_fseventscb_folder, Cvoid, (Ptr{Cvoid}, Ptr{Int8}, Int32, Int32))
         uv_error("FolderMonitor (start)",
                  ccall(:uv_fs_event_start, Int32, (Ptr{Cvoid}, Ptr{Cvoid}, Cstring, Int32),
                        handle, uv_jl_fseventscb_folder::Ptr{Cvoid}, folder, 0))
@@ -126,36 +276,55 @@ mutable struct FolderMonitor
     end
 end
 
+# this is similar to uv_fs_poll, but strives to avoid the design mistakes that make it unsuitable for any usable purpose
+# https://github.com/libuv/libuv/issues/4543
+"""
+    PollingFileWatcher(path::AbstractString, interval_s::Real=5.007)
+
+Monitor a file for changes by polling `stat` every `interval_s` seconds until a change
+occurs or `timeout_s` seconds have elapsed. The `interval_s` should be a long period; the
+default is 5.007 seconds. Call `stat` on it to get the most recent, but old, result.
+
+This acts like an auto-reset Event, so calling `wait` blocks until the `stat` result has
+changed since the previous value captured upon entry to the `wait` call. The `wait` function
+will return a pair of status objects `(previous, current)` once any `stat` change is
+detected since the previous time that `wait` was called. The `previous` status is always a
+`StatStruct`, but it may have all of the fields zeroed (indicating the file didn't
+previously exist, or wasn't previously accessible).
+
+The `current` status object may be a `StatStruct`, an `EOFError` (if the wait is canceled by
+closing this object), or some other `Exception` subtype (if the `stat` operation failed: for
+example, if the path is removed). Note that `stat` value may be outdated if the file has
+changed again multiple times.
+
+Using [`FileMonitor`](@ref) for this operation is preferred, since it is more reliable and
+efficient, although in some situations it may not be available.
+"""
 mutable struct PollingFileWatcher
-    @atomic handle::Ptr{Cvoid}
     file::String
-    interval::UInt32
-    notify::Base.ThreadSynchronizer
-    active::Bool
-    curr_error::Int32
-    curr_stat::StatStruct
+    interval::Float64
+    const notify::Base.ThreadSynchronizer # lock protects all fields which can be changed (including interval and file, if you really must)
+    timer::Union{Nothing,Timer}
+    const stat_req::Memory{UInt8}
+    active::Bool # whether there is already an uv_fspollcb in-flight, so to speak
+    closed::Bool # whether the user has explicitly destroyed this
+    ioerrno::Int32 # the stat errno as of the last result
+    prev_stat::StatStruct # the stat as of the last successful result
     PollingFileWatcher(file::AbstractString, interval::Float64=5.007) = PollingFileWatcher(String(file), interval)
     function PollingFileWatcher(file::String, interval::Float64=5.007) # same default as nodejs
-        handle = Libc.malloc(_sizeof_uv_fs_poll)
-        this = new(handle, file, round(UInt32, interval * 1000), Base.ThreadSynchronizer(), false, 0, StatStruct())
-        associate_julia_struct(handle, this)
-        iolock_begin()
-        err = ccall(:uv_fs_poll_init, Int32, (Ptr{Cvoid}, Ptr{Cvoid}), eventloop(), handle)
-        if err != 0
-            Libc.free(handle)
-            throw(_UVError("PollingFileWatcher", err))
-        end
-        finalizer(uvfinalize, this)
-        iolock_end()
+        stat_req = Memory{UInt8}(undef, Int(_sizeof_uv_fs))
+        this = new(file, interval, Base.ThreadSynchronizer(), nothing, stat_req, false, false, 0, StatStruct())
+        uv_req_set_data(stat_req, this)
+        wait(this) # initialize with the current stat before return
         return this
     end
 end
 
 mutable struct _FDWatcher
     @atomic handle::Ptr{Cvoid}
-    fdnum::Int # this is NOT the file descriptor
+    const fdnum::Int # this is NOT the file descriptor
     refcount::Tuple{Int, Int}
-    notify::Base.ThreadSynchronizer
+    const notify::Base.ThreadSynchronizer
     events::Int32
     active::Tuple{Bool, Bool}
 
@@ -274,9 +443,28 @@ mutable struct _FDWatcher
     end
 end
 
+"""
+    FDWatcher(fd::Union{RawFD,WindowsRawSocket}, readable::Bool, writable::Bool)
+
+Monitor a file descriptor `fd` for changes in the read or write availability.
+
+The keyword arguments determine which of read and/or write status should be monitored; at
+least one of them must be set to `true`.
+
+The returned value is an object with boolean fields `readable`, `writable`, and `timedout`,
+giving the result of the polling.
+
+This acts like a level-set event, so calling `wait` blocks until one of those conditions is
+met, but then continues to return without blocking until the condition is cleared (either
+there is no more to read, or no more space in the write buffer, or both).
+
+!!! warning
+    You must call `close` manually, when finished with this object, before the fd
+    argument is closed. Failure to do so risks serious crashes.
+"""
 mutable struct FDWatcher
     # WARNING: make sure `close` has been manually called on this watcher before closing / destroying `fd`
-    watcher::_FDWatcher
+    const watcher::_FDWatcher
     mask::FDEvent
     function FDWatcher(fd::RawFD, readable::Bool, writable::Bool)
         return FDWatcher(fd, FDEvent(readable, writable, false, false))
@@ -327,7 +515,7 @@ function close(t::FDWatcher)
     close(t.watcher, mask)
 end
 
-function uvfinalize(uv::Union{FileMonitor, FolderMonitor, PollingFileWatcher})
+function uvfinalize(uv::Union{FileMonitor, FolderMonitor})
     iolock_begin()
     if uv.handle != C_NULL
         disassociate_julia_struct(uv) # close (and free) without notify
@@ -336,7 +524,7 @@ function uvfinalize(uv::Union{FileMonitor, FolderMonitor, PollingFileWatcher})
     iolock_end()
 end
 
-function close(t::Union{FileMonitor, FolderMonitor, PollingFileWatcher})
+function close(t::Union{FileMonitor, FolderMonitor})
     iolock_begin()
     if t.handle != C_NULL
         ccall(:jl_close_uv, Cvoid, (Ptr{Cvoid},), t.handle)
@@ -344,6 +532,21 @@ function close(t::Union{FileMonitor, FolderMonitor, PollingFileWatcher})
     iolock_end()
 end
 
+function close(pfw::PollingFileWatcher)
+    timer = nothing
+    lock(pfw.notify)
+    try
+        pfw.closed = true
+        notify(pfw.notify, false)
+        timer = pfw.timer
+        pfw.timer = nothing
+    finally
+        unlock(pfw.notify)
+    end
+    timer === nothing || close(timer)
+    nothing
+end
+
 function _uv_hook_close(uv::_FDWatcher)
     # fyi: jl_atexit_hook can cause this to get called too
     Libc.free(@atomicswap :monotonic uv.handle = C_NULL)
@@ -351,24 +554,11 @@ function _uv_hook_close(uv::_FDWatcher)
     nothing
 end
 
-function _uv_hook_close(uv::PollingFileWatcher)
-    lock(uv.notify)
-    try
-        uv.active = false
-        Libc.free(@atomicswap :monotonic uv.handle = C_NULL)
-        notify(uv.notify, StatStruct())
-    finally
-        unlock(uv.notify)
-    end
-    nothing
-end
-
 function _uv_hook_close(uv::FileMonitor)
     lock(uv.notify)
     try
-        uv.active = false
         Libc.free(@atomicswap :monotonic uv.handle = C_NULL)
-        notify(uv.notify, FileEvent())
+        notify(uv.notify)
     finally
         unlock(uv.notify)
     end
@@ -388,179 +578,11 @@ end
 
 isopen(fm::FileMonitor) = fm.handle != C_NULL
 isopen(fm::FolderMonitor) = fm.handle != C_NULL
-isopen(pfw::PollingFileWatcher) = pfw.handle != C_NULL
+isopen(pfw::PollingFileWatcher) = !pfw.closed
 isopen(pfw::_FDWatcher) = pfw.refcount != (0, 0)
 isopen(pfw::FDWatcher) = !pfw.mask.timedout
 
-function uv_fseventscb_file(handle::Ptr{Cvoid}, filename::Ptr, events::Int32, status::Int32)
-    t = @handle_as handle FileMonitor
-    lock(t.notify)
-    try
-        if status != 0
-            notify_error(t.notify, _UVError("FileMonitor", status))
-        else
-            t.events |= events
-            notify(t.notify, FileEvent(events))
-        end
-    finally
-        unlock(t.notify)
-    end
-    nothing
-end
-
-function uv_fseventscb_folder(handle::Ptr{Cvoid}, filename::Ptr, events::Int32, status::Int32)
-    t = @handle_as handle FolderMonitor
-    lock(t.notify)
-    try
-        if status != 0
-            notify_error(t.notify, _UVError("FolderMonitor", status))
-        else
-            fname = (filename == C_NULL) ? "" : unsafe_string(convert(Cstring, filename))
-            push!(t.channel, fname => FileEvent(events))
-            notify(t.notify)
-        end
-    finally
-        unlock(t.notify)
-    end
-    nothing
-end
-
-function uv_pollcb(handle::Ptr{Cvoid}, status::Int32, events::Int32)
-    t = @handle_as handle _FDWatcher
-    lock(t.notify)
-    try
-        if status != 0
-            notify_error(t.notify, _UVError("FDWatcher", status))
-        else
-            t.events |= events
-            if t.active[1] || t.active[2]
-                if isempty(t.notify)
-                    # if we keep hearing about events when nobody appears to be listening,
-                    # stop the poll to save cycles
-                    t.active = (false, false)
-                    ccall(:uv_poll_stop, Int32, (Ptr{Cvoid},), t.handle)
-                end
-            end
-            notify(t.notify, events)
-        end
-    finally
-        unlock(t.notify)
-    end
-    nothing
-end
-
-function uv_fspollcb(handle::Ptr{Cvoid}, status::Int32, prev::Ptr, curr::Ptr)
-    t = @handle_as handle PollingFileWatcher
-    old_status = t.curr_error
-    t.curr_error = status
-    if status == 0
-        t.curr_stat = StatStruct(convert(Ptr{UInt8}, curr))
-    end
-    if status == 0 || status != old_status
-        prev_stat = StatStruct(convert(Ptr{UInt8}, prev))
-        lock(t.notify)
-        try
-            notify(t.notify, prev_stat)
-        finally
-            unlock(t.notify)
-        end
-    end
-    nothing
-end
-
-global uv_jl_pollcb::Ptr{Cvoid}
-global uv_jl_fspollcb::Ptr{Cvoid}
-global uv_jl_fseventscb_file::Ptr{Cvoid}
-global uv_jl_fseventscb_folder::Ptr{Cvoid}
-
-function __init__()
-    global uv_jl_pollcb = @cfunction(uv_pollcb, Cvoid, (Ptr{Cvoid}, Cint, Cint))
-    global uv_jl_fspollcb = @cfunction(uv_fspollcb, Cvoid, (Ptr{Cvoid}, Cint, Ptr{Cvoid}, Ptr{Cvoid}))
-    global uv_jl_fseventscb_file = @cfunction(uv_fseventscb_file, Cvoid, (Ptr{Cvoid}, Ptr{Int8}, Int32, Int32))
-    global uv_jl_fseventscb_folder = @cfunction(uv_fseventscb_folder, Cvoid, (Ptr{Cvoid}, Ptr{Int8}, Int32, Int32))
-
-    Base.mkpidlock_hook = mkpidlock
-    Base.trymkpidlock_hook = trymkpidlock
-    Base.parse_pidfile_hook = Pidfile.parse_pidfile
-
-    nothing
-end
-
-function start_watching(t::_FDWatcher)
-    iolock_begin()
-    t.handle == C_NULL && throw(ArgumentError("FDWatcher is closed"))
-    readable = t.refcount[1] > 0
-    writable = t.refcount[2] > 0
-    if t.active[1] != readable || t.active[2] != writable
-        # make sure the READABLE / WRITEABLE state is updated
-        uv_error("FDWatcher (start)",
-                 ccall(:uv_poll_start, Int32, (Ptr{Cvoid}, Int32, Ptr{Cvoid}),
-                       t.handle,
-                       (readable ? UV_READABLE : 0) | (writable ? UV_WRITABLE : 0),
-                       uv_jl_pollcb::Ptr{Cvoid}))
-        t.active = (readable, writable)
-    end
-    iolock_end()
-    nothing
-end
-
-function start_watching(t::PollingFileWatcher)
-    iolock_begin()
-    t.handle == C_NULL && throw(ArgumentError("PollingFileWatcher is closed"))
-    if !t.active
-        uv_error("PollingFileWatcher (start)",
-                 ccall(:uv_fs_poll_start, Int32, (Ptr{Cvoid}, Ptr{Cvoid}, Cstring, UInt32),
-                       t.handle, uv_jl_fspollcb::Ptr{Cvoid}, t.file, t.interval))
-        t.active = true
-    end
-    iolock_end()
-    nothing
-end
-
-function stop_watching(t::PollingFileWatcher)
-    iolock_begin()
-    lock(t.notify)
-    try
-        if t.active && isempty(t.notify)
-            t.active = false
-            uv_error("PollingFileWatcher (stop)",
-                     ccall(:uv_fs_poll_stop, Int32, (Ptr{Cvoid},), t.handle))
-        end
-    finally
-        unlock(t.notify)
-    end
-    iolock_end()
-    nothing
-end
-
-function start_watching(t::FileMonitor)
-    iolock_begin()
-    t.handle == C_NULL && throw(ArgumentError("FileMonitor is closed"))
-    if !t.active
-        uv_error("FileMonitor (start)",
-                 ccall(:uv_fs_event_start, Int32, (Ptr{Cvoid}, Ptr{Cvoid}, Cstring, Int32),
-                       t.handle, uv_jl_fseventscb_file::Ptr{Cvoid}, t.file, 0))
-        t.active = true
-    end
-    iolock_end()
-    nothing
-end
-
-function stop_watching(t::FileMonitor)
-    iolock_begin()
-    lock(t.notify)
-    try
-        if t.active && isempty(t.notify)
-            t.active = false
-            uv_error("FileMonitor (stop)",
-                     ccall(:uv_fs_event_stop, Int32, (Ptr{Cvoid},), t.handle))
-        end
-    finally
-        unlock(t.notify)
-    end
-    iolock_end()
-    nothing
-end
+Base.stat(pfw::PollingFileWatcher) = Base.checkstat(@lock pfw.notify pfw.prev_stat)
 
 # n.b. this _wait may return spuriously early with a timedout event
 function _wait(fdw::_FDWatcher, mask::FDEvent)
@@ -572,7 +594,20 @@ function _wait(fdw::_FDWatcher, mask::FDEvent)
         if !isopen(fdw) # !open
             throw(EOFError())
         elseif events.timedout
-            start_watching(fdw) # make sure the poll is active
+            fdw.handle == C_NULL && throw(ArgumentError("FDWatcher is closed"))
+            # start_watching to make sure the poll is active
+            readable = fdw.refcount[1] > 0
+            writable = fdw.refcount[2] > 0
+            if fdw.active[1] != readable || fdw.active[2] != writable
+                # make sure the READABLE / WRITEABLE state is updated
+                uv_jl_pollcb = @cfunction(uv_pollcb, Cvoid, (Ptr{Cvoid}, Cint, Cint))
+                uv_error("FDWatcher (start)",
+                         ccall(:uv_poll_start, Int32, (Ptr{Cvoid}, Int32, Ptr{Cvoid}),
+                               fdw.handle,
+                               (readable ? UV_READABLE : 0) | (writable ? UV_WRITABLE : 0),
+                               uv_jl_pollcb::Ptr{Cvoid}))
+                fdw.active = (readable, writable)
+            end
             iolock_end()
             return FDEvent(wait(fdw.notify)::Int32)
         else
@@ -640,52 +675,88 @@ end
 
 function wait(pfw::PollingFileWatcher)
     iolock_begin()
-    preserve_handle(pfw)
     lock(pfw.notify)
-    local prevstat
+    prevstat = pfw.prev_stat
+    havechange = false
+    timer = nothing
     try
-        start_watching(pfw)
+        # we aren't too strict about the first interval after `wait`, but rather always
+        # check right away to see if it had immediately changed again, and then repeatedly
+        # after interval again until success
+        pfw.closed && throw(ArgumentError("PollingFileWatcher is closed"))
+        timer = pfw.timer
+        pfw.timer = nothing # disable Timer callback
+        # start_watching
+        if !pfw.active
+            preserve_handle(pfw)
+            uv_jl_fspollcb = @cfunction(uv_fspollcb, Cvoid, (Ptr{Cvoid},))
+            err = ccall(:uv_fs_stat, Cint, (Ptr{Cvoid}, Ptr{Cvoid}, Cstring, Ptr{Cvoid}),
+                eventloop(), pfw.stat_req, pfw.file, uv_jl_fspollcb::Ptr{Cvoid})
+            err == 0 || uv_error("PollingFileWatcher (start)", err) # likely just ENOMEM
+            pfw.active = true
+        end
         iolock_end()
-        prevstat = wait(pfw.notify)::StatStruct
+        havechange = wait(pfw.notify)::Bool
         unlock(pfw.notify)
         iolock_begin()
-        lock(pfw.notify)
-    finally
-        unlock(pfw.notify)
-        unpreserve_handle(pfw)
+    catch
+        # stop_watching: cleanup any timers from before or after starting this wait before it failed, if there are no other watchers
+        latetimer = nothing
+        try
+            if isempty(pfw.notify)
+                latetimer = pfw.timer
+                pfw.timer = nothing
+            end
+        finally
+            unlock(pfw.notify)
+        end
+        if timer !== nothing || latetimer !== nothing
+            iolock_end()
+            timer === nothing || close(timer)
+            latetimer === nothing || close(latetimer)
+            iolock_begin()
+        end
+        rethrow()
     end
-    stop_watching(pfw)
     iolock_end()
-    if pfw.handle == C_NULL
+    timer === nothing || close(timer) # cleanup resources so we don't hang on exit
+    if !havechange # user canceled by calling close
         return prevstat, EOFError()
-    elseif pfw.curr_error != 0
-        return prevstat, _UVError("PollingFileWatcher", pfw.curr_error)
+    end
+    # grab the most up-to-date stat result as of this time, even if it was a bit newer than
+    # the notify call (unlikely, as there would need to be a concurrent call to wait)
+    lock(pfw.notify)
+    currstat = pfw.prev_stat
+    ioerrno = pfw.ioerrno
+    unlock(pfw.notify)
+    if ioerrno == 0
+        @assert currstat.ioerrno == 0
+        return prevstat, currstat
+    elseif ioerrno in (Base.UV_ENOENT, Base.UV_ENOTDIR, Base.UV_EINVAL)
+        return prevstat, StatStruct(pfw.file, Ptr{UInt8}(0), ioerrno)
     else
-        return prevstat, pfw.curr_stat
+        return prevstat, _UVError("PollingFileWatcher", ioerrno)
     end
 end
 
 function wait(m::FileMonitor)
-    iolock_begin()
+    m.handle == C_NULL && throw(EOFError())
     preserve_handle(m)
     lock(m.notify)
-    local events
     try
-        start_watching(m)
-        iolock_end()
-        events = wait(m.notify)::FileEvent
-        events |= FileEvent(m.events)
-        m.events = 0
-        unlock(m.notify)
-        iolock_begin()
-        lock(m.notify)
+        while true
+            m.handle == C_NULL && throw(EOFError())
+            events = @atomicswap :not_atomic m.events = 0
+            events == 0 || return FileEvent(events)
+            if m.ioerrno != 0
+                uv_error("FileMonitor", m.ioerrno)
+            end
+            wait(m.notify)
+        end
     finally
         unlock(m.notify)
         unpreserve_handle(m)
     end
-    stop_watching(m)
-    iolock_end()
-    return events
 end
 
 function wait(m::FolderMonitor)
@@ -704,6 +775,7 @@ function wait(m::FolderMonitor)
         end
     return evt::Pair{String, FileEvent}
 end
+Base.take!(m::FolderMonitor) = wait(m) # Channel-like API
 
 
 """
@@ -717,6 +789,10 @@ least one of them must be set to `true`.
 
 The returned value is an object with boolean fields `readable`, `writable`, and `timedout`,
 giving the result of the polling.
+
+This is a thin wrapper over calling `wait` on a [`FDWatcher`](@ref), which implements the
+functionality but requires the user to call `close` manually when finished with it, or risk
+serious crashes.
 """
 function poll_fd(s::Union{RawFD, Sys.iswindows() ? WindowsRawSocket : Union{}}, timeout_s::Real=-1; readable=false, writable=false)
     mask = FDEvent(readable, writable, false, false)
@@ -774,6 +850,15 @@ giving the result of watching the file.
 
 This behavior of this function varies slightly across platforms. See
 <https://nodejs.org/api/fs.html#fs_caveats> for more detailed information.
+
+This is a thin wrapper over calling `wait` on a [`FileMonitor`](@ref). This function has a
+small race window between consecutive calls to `watch_file` where the file might change
+without being detected. To avoid this race, use
+
+    fm = FileMonitor(path)
+    wait(fm)
+
+directly, re-using the same `fm` each time you `wait`.
 """
 function watch_file(s::String, timeout_s::Float64=-1.0)
     fm = FileMonitor(s)
@@ -784,7 +869,12 @@ function watch_file(s::String, timeout_s::Float64=-1.0)
                 close(fm)
             end
         end
-        return wait(fm)
+        try
+            return wait(fm)
+        catch ex
+            ex isa EOFError && return FileEvent()
+            rethrow()
+        end
     finally
         close(fm)
         @isdefined(timer) && close(timer)
@@ -795,7 +885,7 @@ watch_file(s::AbstractString, timeout_s::Real=-1) = watch_file(String(s), Float6
 """
     watch_folder(path::AbstractString, timeout_s::Real=-1)
 
-Watches a file or directory `path` for changes until a change has occurred or `timeout_s`
+Watch a file or directory `path` for changes until a change has occurred or `timeout_s`
 seconds have elapsed. This function does not poll the file system and instead uses platform-specific
 functionality to receive notifications from the operating system (e.g. via inotify on Linux).
 See the NodeJS documentation linked below for details.
@@ -809,10 +899,12 @@ giving the event.
 
 This behavior of this function varies slightly across platforms. See
 <https://nodejs.org/api/fs.html#fs_caveats> for more detailed information.
+
+This function is a thin wrapper over calling `wait` on a [`FolderMonitor`](@ref), with added timeout support.
 """
 watch_folder(s::AbstractString, timeout_s::Real=-1) = watch_folder(String(s), timeout_s)
 function watch_folder(s::String, timeout_s::Real=-1)
-    fm = get!(watched_folders, s) do
+    fm = @lock watched_folders get!(watched_folders[], s) do
         return FolderMonitor(s)
     end
     local timer
@@ -859,12 +951,12 @@ It is not recommended to do this while another task is waiting for
 """
 unwatch_folder(s::AbstractString) = unwatch_folder(String(s))
 function unwatch_folder(s::String)
-    fm = pop!(watched_folders, s, nothing)
+    fm = @lock watched_folders pop!(watched_folders[], s, nothing)
     fm === nothing || close(fm)
     nothing
 end
 
-const watched_folders = Dict{String, FolderMonitor}()
+const watched_folders = Lockable(Dict{String, FolderMonitor}())
 
 """
     poll_file(path::AbstractString, interval_s::Real=5.007, timeout_s::Real=-1) -> (previous::StatStruct, current)
@@ -878,11 +970,15 @@ The `previous` status is always a `StatStruct`, but it may have all of the field
 (indicating the file didn't previously exist, or wasn't previously accessible).
 
 The `current` status object may be a `StatStruct`, an `EOFError` (indicating the timeout elapsed),
-or some other `Exception` subtype (if the `stat` operation failed - for example, if the path does not exist).
+or some other `Exception` subtype (if the `stat` operation failed: for example, if the path does not exist).
+
+To determine when a file was modified, compare `!(current isa StatStruct && prev == current)` to detect
+notification of changes to the mtime or inode. However, using [`watch_file`](@ref) for this operation
+is preferred, since it is more reliable and efficient, although in some situations it may not be available.
 
-To determine when a file was modified, compare `current isa StatStruct && mtime(prev) != mtime(current)` to detect
-notification of changes. However, using [`watch_file`](@ref) for this operation is preferred, since
-it is more reliable and efficient, although in some situations it may not be available.
+This is a thin wrapper over calling `wait` on a [`PollingFileWatcher`](@ref), which implements
+the functionality, but this function has a small race window between consecutive calls to
+`poll_file` where the file might change without being detected.
 """
 function poll_file(s::AbstractString, interval_seconds::Real=5.007, timeout_s::Real=-1)
     pfw = PollingFileWatcher(s, Float64(interval_seconds))
@@ -893,12 +989,7 @@ function poll_file(s::AbstractString, interval_seconds::Real=5.007, timeout_s::R
                 close(pfw)
             end
         end
-        statdiff = wait(pfw)
-        if isa(statdiff[2], IOError)
-            # file didn't initially exist, continue watching for it to be created (or the error to change)
-            statdiff = wait(pfw)
-        end
-        return statdiff
+        return wait(pfw)
     finally
         close(pfw)
         @isdefined(timer) && close(timer)
@@ -908,4 +999,11 @@ end
 include("pidfile.jl")
 import .Pidfile: mkpidlock, trymkpidlock
 
+function __init__()
+    Base.mkpidlock_hook = mkpidlock
+    Base.trymkpidlock_hook = trymkpidlock
+    Base.parse_pidfile_hook = Pidfile.parse_pidfile
+    nothing
+end
+
 end
diff --git a/stdlib/FileWatching/src/pidfile.jl b/stdlib/FileWatching/src/pidfile.jl
index 4c821a3d897e4..95b8f20face29 100644
--- a/stdlib/FileWatching/src/pidfile.jl
+++ b/stdlib/FileWatching/src/pidfile.jl
@@ -4,14 +4,14 @@ module Pidfile
 export mkpidlock, trymkpidlock
 
 using Base:
-    IOError, UV_EEXIST, UV_ESRCH,
+    IOError, UV_EEXIST, UV_ESRCH, UV_ENOENT,
     Process
 
 using Base.Filesystem:
     File, open, JL_O_CREAT, JL_O_RDWR, JL_O_RDONLY, JL_O_EXCL,
     rename, samefile, path_separator
 
-using ..FileWatching: watch_file
+using ..FileWatching: FileMonitor
 using Base.Sys: iswindows
 
 """
@@ -256,19 +256,43 @@ function open_exclusive(path::String;
         end
     end
     # fall-back: wait for the lock
-
+    watch = Lockable(Core.Box(nothing))
     while true
-        # start the file-watcher prior to checking for the pidfile existence
-        t = @async try
-            watch_file(path, poll_interval)
+        # now try again to create it
+        # try to start the file-watcher prior to checking for the pidfile existence
+        watch = try
+            FileMonitor(path)
         catch ex
             isa(ex, IOError) || rethrow(ex)
-            sleep(poll_interval) # if the watch failed, convert to just doing a sleep
+            ex.code != UV_ENOENT # if the file was deleted in the meantime, don't sleep at all, even if the lock fails
+        end
+        timeout = nothing
+        if watch isa FileMonitor && stale_age > 0
+            let watch = watch
+                timeout = Timer(stale_age) do t
+                    close(watch)
+                end
+            end
+        end
+        try
+            file = tryopen_exclusive(path, mode)
+            file === nothing || return file
+            if watch isa FileMonitor
+                try
+                    Base.wait(watch) # will time-out after stale_age passes
+                catch ex
+                    isa(ex, EOFError) || isa(ex, IOError) || rethrow(ex)
+                end
+            end
+            if watch === true # if the watch failed, convert to just doing a sleep
+                sleep(poll_interval)
+            end
+        finally
+            # something changed about the path, so watch is now possibly monitoring the wrong file handle
+            # it will need to be recreated just before the next tryopen_exclusive attempt
+            timeout isa Timer && close(timeout)
+            watch isa FileMonitor && close(watch)
         end
-        # now try again to create it
-        file = tryopen_exclusive(path, mode)
-        file === nothing || return file
-        Base.wait(t) # sleep for a bit before trying again
         if stale_age > 0 && stale_pidfile(path, stale_age, refresh)
             # if the file seems stale, try to remove it before attempting again
             # set stale_age to zero so we won't attempt again, even if the attempt fails
diff --git a/stdlib/FileWatching/test/runtests.jl b/stdlib/FileWatching/test/runtests.jl
index 2592aea024386..def555154264d 100644
--- a/stdlib/FileWatching/test/runtests.jl
+++ b/stdlib/FileWatching/test/runtests.jl
@@ -2,6 +2,7 @@
 
 using Test, FileWatching
 using Base: uv_error, Experimental
+using Base.Filesystem: StatStruct
 
 @testset "FileWatching" begin
 
@@ -168,12 +169,13 @@ file = joinpath(dir, "afile.txt")
 
 # initialize a watch_folder instance and create afile.txt
 function test_init_afile()
-    @test isempty(FileWatching.watched_folders)
+    watched_folders = FileWatching.watched_folders
+    @test @lock watched_folders isempty(watched_folders[])
     @test(watch_folder(dir, 0) == ("" => FileWatching.FileEvent()))
     @test @elapsed(@test(watch_folder(dir, 0) == ("" => FileWatching.FileEvent()))) <= 0.5
-    @test length(FileWatching.watched_folders) == 1
+    @test @lock(watched_folders, length(FileWatching.watched_folders[])) == 1
     @test unwatch_folder(dir) === nothing
-    @test isempty(FileWatching.watched_folders)
+    @test @lock watched_folders isempty(watched_folders[])
     @test 0.002 <= @elapsed(@test(watch_folder(dir, 0.004) == ("" => FileWatching.FileEvent())))
     @test 0.002 <= @elapsed(@test(watch_folder(dir, 0.004) == ("" => FileWatching.FileEvent()))) <= 0.5
     @test unwatch_folder(dir) === nothing
@@ -203,7 +205,7 @@ function test_init_afile()
     @test unwatch_folder(dir) === nothing
     @test(watch_folder(dir, 0) == ("" => FileWatching.FileEvent()))
     @test 0.9 <= @elapsed(@test(watch_folder(dir, 1) == ("" => FileWatching.FileEvent())))
-    @test length(FileWatching.watched_folders) == 1
+    @test @lock(watched_folders, length(FileWatching.watched_folders[])) == 1
     nothing
 end
 
@@ -218,7 +220,7 @@ function test_timeout(tval)
         @async test_file_poll(channel, 10, tval)
         tr = take!(channel)
     end
-    @test tr[1] === Base.Filesystem.StatStruct() && tr[2] === EOFError()
+    @test ispath(tr[1]::StatStruct) && tr[2] === EOFError()
     @test tval <= t_elapsed
 end
 
@@ -231,7 +233,7 @@ function test_touch(slval)
     write(f, "Hello World\n")
     close(f)
     tr = take!(channel)
-    @test ispath(tr[1]) && ispath(tr[2])
+    @test ispath(tr[1]::StatStruct) && ispath(tr[2]::StatStruct)
     fetch(t)
 end
 
@@ -435,11 +437,11 @@ end
 @test_throws(Base._UVError("FolderMonitor (start)", Base.UV_ENOENT),
              watch_folder("____nonexistent_file", 10))
 @test(@elapsed(
-    @test(poll_file("____nonexistent_file", 1, 3.1) ===
-          (Base.Filesystem.StatStruct(), EOFError()))) > 3)
+    @test(poll_file("____nonexistent_file", 1, 3.1) ==
+          (StatStruct(), EOFError()))) > 3)
 
 unwatch_folder(dir)
-@test isempty(FileWatching.watched_folders)
+@test @lock FileWatching.watched_folders isempty(FileWatching.watched_folders[])
 rm(file)
 rm(dir)
 
@@ -450,10 +452,6 @@ rm(dir)
     include("pidfile.jl")
 end
 
-@testset "Docstrings" begin
-    undoc = Docs.undocumented_names(FileWatching)
-    @test_broken isempty(undoc)
-    @test undoc == [:FDWatcher, :FileMonitor, :FolderMonitor, :PollingFileWatcher]
-end
+@test isempty(Docs.undocumented_names(FileWatching))
 
 end # testset
diff --git a/stdlib/LinearAlgebra/src/blas.jl b/stdlib/LinearAlgebra/src/blas.jl
index 413b7866c5444..3c15630091162 100644
--- a/stdlib/LinearAlgebra/src/blas.jl
+++ b/stdlib/LinearAlgebra/src/blas.jl
@@ -159,7 +159,7 @@ function check()
     interface = USE_BLAS64 ? :ilp64 : :lp64
     if !any(lib.interface == interface for lib in config.loaded_libs)
         interfacestr = uppercase(string(interface))
-        @error("No loaded BLAS libraries were built with $interfacestr support.")
+        println(Core.stderr, "No loaded BLAS libraries were built with $interfacestr support.")
         exit(1)
     end
 end
diff --git a/stdlib/LinearAlgebra/src/diagonal.jl b/stdlib/LinearAlgebra/src/diagonal.jl
index d762549a2b228..0a95bac5ffb93 100644
--- a/stdlib/LinearAlgebra/src/diagonal.jl
+++ b/stdlib/LinearAlgebra/src/diagonal.jl
@@ -227,7 +227,6 @@ Base._reverse(A::Diagonal, dims) = reverse!(Matrix(A); dims)
 Base._reverse(A::Diagonal, ::Colon) = Diagonal(reverse(A.diag))
 Base._reverse!(A::Diagonal, ::Colon) = (reverse!(A.diag); A)
 
-ishermitian(D::Diagonal{<:Real}) = true
 ishermitian(D::Diagonal{<:Number}) = isreal(D.diag)
 ishermitian(D::Diagonal) = all(ishermitian, D.diag)
 issymmetric(D::Diagonal{<:Number}) = true
diff --git a/stdlib/LinearAlgebra/src/lbt.jl b/stdlib/LinearAlgebra/src/lbt.jl
index 606ddedbe1343..81d10f930c8c5 100644
--- a/stdlib/LinearAlgebra/src/lbt.jl
+++ b/stdlib/LinearAlgebra/src/lbt.jl
@@ -17,7 +17,7 @@ end
 macro get_warn(map, key)
     return quote
         if !haskey($(esc(map)), $(esc(key)))
-            @warn(string("[LBT] Unknown key into ", $(string(map)), ": ", $(esc(key)), ", defaulting to :unknown"))
+            println(Core.stderr, string("Warning: [LBT] Unknown key into ", $(string(map)), ": ", $(esc(key)), ", defaulting to :unknown"))
             # All the unknown values share a common value: `-1`
             $(esc(map))[$(esc(LBT_INTERFACE_UNKNOWN))]
         else
@@ -132,7 +132,7 @@ struct LBTConfig
             if str_ptr != C_NULL
                 push!(exported_symbols, unsafe_string(str_ptr))
             else
-                @error("NULL string in lbt_config.exported_symbols[$(sym_idx)]")
+                println(Core.stderr, "Error: NULL string in lbt_config.exported_symbols[$(sym_idx)]")
             end
         end
 
diff --git a/stdlib/LinearAlgebra/src/special.jl b/stdlib/LinearAlgebra/src/special.jl
index 5a7c98cfdf32c..32a5476842933 100644
--- a/stdlib/LinearAlgebra/src/special.jl
+++ b/stdlib/LinearAlgebra/src/special.jl
@@ -112,6 +112,8 @@ for op in (:+, :-)
     end
 end
 
+(*)(Da::Diagonal, A::BandedMatrix, Db::Diagonal) = _tri_matmul(Da, A, Db)
+
 # disambiguation between triangular and banded matrices, banded ones "dominate"
 _mul!(C::AbstractMatrix, A::AbstractTriangular, B::BandedMatrix, alpha::Number, beta::Number) =
     @stable_muladdmul _mul!(C, A, B, MulAddMul(alpha, beta))
diff --git a/stdlib/LinearAlgebra/src/symmetriceigen.jl b/stdlib/LinearAlgebra/src/symmetriceigen.jl
index fee524a702187..68a1b29f5dbc7 100644
--- a/stdlib/LinearAlgebra/src/symmetriceigen.jl
+++ b/stdlib/LinearAlgebra/src/symmetriceigen.jl
@@ -20,13 +20,6 @@ function eigen!(A::RealHermSymComplexHerm{<:BlasReal,<:StridedMatrix}, alg::Algo
         throw(ArgumentError("Unsupported value for `alg` keyword."))
     end
 end
-function eigen(A::RealHermSymComplexHerm{Float16}; sortby::Union{Function,Nothing}=nothing)
-    S = eigtype(eltype(A))
-    E = eigen!(eigencopy_oftype(A, S), sortby=sortby)
-    values = convert(AbstractVector{Float16}, E.values)
-    vectors = convert(AbstractMatrix{isreal(E.vectors) ? Float16 : Complex{Float16}}, E.vectors)
-    return Eigen(values, vectors)
-end
 
 """
     eigen(A::Union{Hermitian, Symmetric}, alg::Algorithm = default_eigen_alg(A)) -> Eigen
@@ -53,10 +46,22 @@ The default `alg` used may change in the future.
 The following functions are available for `Eigen` objects: [`inv`](@ref), [`det`](@ref), and [`isposdef`](@ref).
 """
 function eigen(A::RealHermSymComplexHerm, alg::Algorithm = default_eigen_alg(A); sortby::Union{Function,Nothing}=nothing)
+    _eigen(A, alg; sortby)
+end
+
+# we dispatch on the eltype in an internal method to avoid ambiguities
+function _eigen(A::RealHermSymComplexHerm, alg::Algorithm; sortby)
     S = eigtype(eltype(A))
     eigen!(eigencopy_oftype(A, S), alg; sortby)
 end
 
+function _eigen(A::RealHermSymComplexHerm{Float16}, alg::Algorithm; sortby::Union{Function,Nothing}=nothing)
+    S = eigtype(eltype(A))
+    E = eigen!(eigencopy_oftype(A, S), alg, sortby=sortby)
+    values = convert(AbstractVector{Float16}, E.values)
+    vectors = convert(AbstractMatrix{isreal(E.vectors) ? Float16 : Complex{Float16}}, E.vectors)
+    return Eigen(values, vectors)
+end
 
 eigen!(A::RealHermSymComplexHerm{<:BlasReal,<:StridedMatrix}, irange::UnitRange) =
     Eigen(LAPACK.syevr!('V', 'I', A.uplo, A.data, 0.0, 0.0, irange.start, irange.stop, -1.0)...)
diff --git a/stdlib/LinearAlgebra/src/tridiag.jl b/stdlib/LinearAlgebra/src/tridiag.jl
index e755ce63e9b2a..ca61eb8519d42 100644
--- a/stdlib/LinearAlgebra/src/tridiag.jl
+++ b/stdlib/LinearAlgebra/src/tridiag.jl
@@ -319,8 +319,6 @@ eigmax(A::SymTridiagonal) = eigvals(A, size(A, 1):size(A, 1))[1]
 eigmin(A::SymTridiagonal) = eigvals(A, 1:1)[1]
 
 #Compute selected eigenvectors only corresponding to particular eigenvalues
-eigvecs(A::SymTridiagonal) = eigen(A).vectors
-
 """
     eigvecs(A::SymTridiagonal[, eigvals]) -> Matrix
 
diff --git a/stdlib/LinearAlgebra/src/uniformscaling.jl b/stdlib/LinearAlgebra/src/uniformscaling.jl
index b75886b8d99fb..472ea53078f87 100644
--- a/stdlib/LinearAlgebra/src/uniformscaling.jl
+++ b/stdlib/LinearAlgebra/src/uniformscaling.jl
@@ -403,6 +403,16 @@ function copyto!(A::Tridiagonal, J::UniformScaling)
     return A
 end
 
+"""
+    copy!(dest::AbstractMatrix, src::UniformScaling)
+
+Copies a [`UniformScaling`](@ref) onto a matrix.
+
+!!! compat "Julia 1.12"
+    This method is available as of Julia 1.12.
+"""
+Base.copy!(A::AbstractMatrix, J::UniformScaling) = copyto!(A, J)
+
 function cond(J::UniformScaling{T}) where T
     onereal = inv(one(real(J.λ)))
     return J.λ ≠ zero(T) ? onereal : oftype(onereal, Inf)
diff --git a/stdlib/LinearAlgebra/test/diagonal.jl b/stdlib/LinearAlgebra/test/diagonal.jl
index dfb901908ba69..98f5498c71033 100644
--- a/stdlib/LinearAlgebra/test/diagonal.jl
+++ b/stdlib/LinearAlgebra/test/diagonal.jl
@@ -1265,6 +1265,17 @@ end
     @test *(Diagonal(ones(n)), Diagonal(1:n), Diagonal(ones(n)), Diagonal(1:n)) isa Diagonal
 end
 
+@testset "triple multiplication with a sandwiched BandedMatrix" begin
+    D = Diagonal(StepRangeLen(NaN, 0, 4));
+    B = Bidiagonal(1:4, 1:3, :U)
+    C = D * B * D
+    @test iszero(diag(C, 2))
+    # test associativity
+    C1 = (D * B) * D
+    C2 = D * (B * D)
+    @test diag(C,2) == diag(C1,2) == diag(C2,2)
+end
+
 @testset "diagind" begin
     D = Diagonal(1:4)
     M = Matrix(D)
diff --git a/stdlib/LinearAlgebra/test/symmetriceigen.jl b/stdlib/LinearAlgebra/test/symmetriceigen.jl
index d55d1deb6bf33..71087ae4d8d24 100644
--- a/stdlib/LinearAlgebra/test/symmetriceigen.jl
+++ b/stdlib/LinearAlgebra/test/symmetriceigen.jl
@@ -171,6 +171,11 @@ end
     @test D isa Eigen{ComplexF16, Float16, Matrix{ComplexF16}, Vector{Float16}}
     @test D.values ≈ D32.values
     @test D.vectors ≈ D32.vectors
+
+    # ensure that different algorithms dispatch correctly
+    λ, V = eigen(C, LinearAlgebra.QRIteration())
+    @test λ isa Vector{Float16}
+    @test C * V ≈ V * Diagonal(λ)
 end
 
 @testset "complex Symmetric" begin
diff --git a/stdlib/LinearAlgebra/test/uniformscaling.jl b/stdlib/LinearAlgebra/test/uniformscaling.jl
index 92547e8648d8a..d335cd6f63521 100644
--- a/stdlib/LinearAlgebra/test/uniformscaling.jl
+++ b/stdlib/LinearAlgebra/test/uniformscaling.jl
@@ -226,6 +226,13 @@ let
         @test copyto!(B, J) == [λ zero(λ)]
     end
 
+    @testset "copy!" begin
+        A = Matrix{Int}(undef, (3,3))
+        @test copy!(A, I) == one(A)
+        B = Matrix{ComplexF64}(undef, (1,2))
+        @test copy!(B, J) == [λ zero(λ)]
+    end
+
     @testset "binary ops with vectors" begin
         v = complex.(randn(3), randn(3))
         # As shown in #20423@GitHub, vector acts like x1 matrix when participating in linear algebra
diff --git a/stdlib/Pkg.version b/stdlib/Pkg.version
index f5ca169a775c6..34233c58702b4 100644
--- a/stdlib/Pkg.version
+++ b/stdlib/Pkg.version
@@ -1,4 +1,4 @@
 PKG_BRANCH = master
-PKG_SHA1 = ef9f76c175872bab6803da4a5fa3fd99bce3d03a
+PKG_SHA1 = 51d4910c114a863d888659cb8962c1e161b2a421
 PKG_GIT_URL := https://github.com/JuliaLang/Pkg.jl.git
 PKG_TAR_URL = https://api.github.com/repos/JuliaLang/Pkg.jl/tarball/$1
diff --git a/stdlib/Profile/src/Profile.jl b/stdlib/Profile/src/Profile.jl
index c7ef1efb35945..b753c9ca88f24 100644
--- a/stdlib/Profile/src/Profile.jl
+++ b/stdlib/Profile/src/Profile.jl
@@ -7,7 +7,7 @@ Profiling support.
 
 ## CPU profiling
 - `@profile foo()` to profile a specific call.
-- `Profile.print()` to print the report.
+- `Profile.print()` to print the report. Paths are clickable links in supported terminals and specialized for JULIA_EDITOR etc.
 - `Profile.clear()` to clear the buffer.
 - Send a $(Sys.isbsd() ? "SIGINFO (ctrl-t)" : "SIGUSR1") signal to the process to automatically trigger a profile and print.
 
@@ -198,7 +198,9 @@ const META_OFFSET_THREADID = 5
 
 Prints profiling results to `io` (by default, `stdout`). If you do not
 supply a `data` vector, the internal buffer of accumulated backtraces
-will be used.
+will be used. Paths are clickable links in supported terminals and
+specialized for [`JULIA_EDITOR`](@ref) with line numbers, or just file
+links if no editor is set.
 
 The keyword arguments can be any combination of:
 
@@ -807,26 +809,35 @@ end
 # make a terminal-clickable link to the file and linenum.
 # Similar to `define_default_editors` in `Base.Filesystem` but for creating URIs not commands
 function editor_link(path::String, linenum::Int)
-    editor = get(ENV, "JULIA_EDITOR", "")
-
-    if editor == "code"
-        return "vscode://file/$path:$linenum"
-    elseif editor == "subl" || editor == "sublime_text"
-        return "subl://$path:$linenum"
-    elseif editor == "idea" || occursin("idea", editor)
-        return "idea://open?file=$path&line=$linenum"
-    elseif editor == "pycharm"
-        return "pycharm://open?file=$path&line=$linenum"
-    elseif editor == "atom"
-        return "atom://core/open/file?filename=$path&line=$linenum"
-    elseif editor == "emacsclient"
-        return "emacs://open?file=$path&line=$linenum"
-    elseif editor == "vim" || editor == "nvim"
-        return "vim://open?file=$path&line=$linenum"
-    else
-        # TODO: convert the path to a generic URI (line numbers are not supported by generic URI)
-        return path
+    # Note: the editor path can include spaces (if escaped) and flags.
+    editor = nothing
+    for var in ["JULIA_EDITOR", "VISUAL", "EDITOR"]
+        str = get(ENV, var, nothing)
+        str isa String || continue
+        editor = str
+        break
+    end
+    path_encoded = Base.Filesystem.encode_uri_component(path)
+    if editor !== nothing
+        if editor == "code"
+            return "vscode://file/$path_encoded:$linenum"
+        elseif editor == "subl" || editor == "sublime_text"
+            return "subl://open?url=file://$path_encoded&line=$linenum"
+        elseif editor == "idea" || occursin("idea", editor)
+            return "idea://open?file=$path_encoded&line=$linenum"
+        elseif editor == "pycharm"
+            return "pycharm://open?file=$path_encoded&line=$linenum"
+        elseif editor == "atom"
+            return "atom://core/open/file?filename=$path_encoded&line=$linenum"
+        elseif editor == "emacsclient" || editor == "emacs"
+            return "emacs://open?file=$path_encoded&line=$linenum"
+        elseif editor == "vim" || editor == "nvim"
+            # Note: Vim/Nvim may not support standard URI schemes without specific plugins
+            return "vim://open?file=$path_encoded&line=$linenum"
+        end
     end
+    # fallback to generic URI, but line numbers are not supported by generic URI
+    return Base.Filesystem.uripath(path)
 end
 
 function print_flat(io::IO, lilist::Vector{StackFrame},
diff --git a/stdlib/Profile/src/heapsnapshot_reassemble.jl b/stdlib/Profile/src/heapsnapshot_reassemble.jl
index 2413ae538b8ac..b2d86ee1f27b6 100644
--- a/stdlib/Profile/src/heapsnapshot_reassemble.jl
+++ b/stdlib/Profile/src/heapsnapshot_reassemble.jl
@@ -155,7 +155,8 @@ function assemble_snapshot(in_prefix, io::IO)
         _write_decimal_number(io, nodes.edge_count[i], _digits_buf)
         print(io, ",0,0")
     end
-    print(io, "],\"edges\":[")
+    print(io, "],\n")
+    print(io, "\"edges\":[")
     e = 1
     for n in 1:length(nodes)
         count = nodes.edge_count[n]
@@ -177,6 +178,13 @@ function assemble_snapshot(in_prefix, io::IO)
     end
     println(io, "],")
 
+    # not used. Required by microsoft/vscode-v8-heap-tools
+    # This order of these fields is required by chrome dev tools otherwise loading fails
+    println(io, "\"trace_function_infos\":[],")
+    println(io, "\"trace_tree\":[],")
+    println(io, "\"samples\":[],")
+    println(io, "\"locations\":[],")
+
     println(io, "\"strings\":[")
     open(string(in_prefix, ".strings"), "r") do strings_io
         first = true
diff --git a/stdlib/REPL/src/REPL.jl b/stdlib/REPL/src/REPL.jl
index 44fe0446240c6..272b907165341 100644
--- a/stdlib/REPL/src/REPL.jl
+++ b/stdlib/REPL/src/REPL.jl
@@ -74,7 +74,17 @@ end
 function _UndefVarError_warnfor(io::IO, m::Module, var::Symbol)
     Base.isbindingresolved(m, var) || return false
     (Base.isexported(m, var) || Base.ispublic(m, var)) || return false
-    print(io, "\nHint: a global variable of this name also exists in $m.")
+    active_mod = Base.active_module()
+    print(io, "\nHint: ")
+    if isdefined(active_mod, Symbol(m))
+        print(io, "a global variable of this name also exists in $m.")
+    else
+        if Symbol(m) == var
+            print(io, "$m is loaded but not imported in the active module $active_mod.")
+        else
+            print(io, "a global variable of this name may be made accessible by importing $m in the current active module $active_mod")
+        end
+    end
     return true
 end
 
diff --git a/stdlib/Random/src/generation.jl b/stdlib/Random/src/generation.jl
index d8bb48d2764d2..b605dff9e5d80 100644
--- a/stdlib/Random/src/generation.jl
+++ b/stdlib/Random/src/generation.jl
@@ -66,7 +66,7 @@ function _rand!(rng::AbstractRNG, z::BigFloat, sp::SamplerBigFloat)
         limbs[end] |= Limb_high_bit
     end
     z.sign = 1
-    GC.@preserve limbs unsafe_copyto!(z.d, pointer(limbs), sp.nlimbs)
+    copyto!(z.d, limbs)
     randbool
 end
 
diff --git a/stdlib/TOML/src/TOML.jl b/stdlib/TOML/src/TOML.jl
index 94d2808c0bc24..b37a5ca83c251 100644
--- a/stdlib/TOML/src/TOML.jl
+++ b/stdlib/TOML/src/TOML.jl
@@ -25,7 +25,7 @@ module Internals
 end
 
 # https://github.com/JuliaLang/julia/issues/36605
-readstring(f::AbstractString) = isfile(f) ? read(f, String) : error(repr(f), ": No such file")
+_readstring(f::AbstractString) = isfile(f) ? read(f, String) : error(repr(f), ": No such file")
 
 """
     Parser()
@@ -36,12 +36,14 @@ explicitly create a `Parser` but instead one directly use use
 will however reuse some internal data structures which can be beneficial for
 performance if a larger number of small files are parsed.
 """
-const Parser = Internals.Parser
+struct Parser
+    _p::Internals.Parser{Dates}
+end
 
 # Dates-enabled constructors
-Parser() = Parser{Dates}()
-Parser(io::IO) = Parser{Dates}(io)
-Parser(str::String; filepath=nothing) = Parser{Dates}(str; filepath)
+Parser() = Parser(Internals.Parser{Dates}())
+Parser(io::IO) = Parser(Internals.Parser{Dates}(io))
+Parser(str::String; filepath=nothing) = Parser(Internals.Parser{Dates}(str; filepath))
 
 """
     parsefile(f::AbstractString)
@@ -53,9 +55,9 @@ Parse file `f` and return the resulting table (dictionary). Throw a
 See also [`TOML.tryparsefile`](@ref).
 """
 parsefile(f::AbstractString) =
-    Internals.parse(Parser(readstring(f); filepath=abspath(f)))
+    Internals.parse(Internals.Parser{Dates}(_readstring(f); filepath=abspath(f)))
 parsefile(p::Parser, f::AbstractString) =
-    Internals.parse(Internals.reinit!(p, readstring(f); filepath=abspath(f)))
+    Internals.parse(Internals.reinit!(p._p, _readstring(f); filepath=abspath(f)))
 
 """
     tryparsefile(f::AbstractString)
@@ -67,9 +69,9 @@ Parse file `f` and return the resulting table (dictionary). Return a
 See also [`TOML.parsefile`](@ref).
 """
 tryparsefile(f::AbstractString) =
-    Internals.tryparse(Parser(readstring(f); filepath=abspath(f)))
+    Internals.tryparse(Internals.Parser{Dates}(_readstring(f); filepath=abspath(f)))
 tryparsefile(p::Parser, f::AbstractString) =
-    Internals.tryparse(Internals.reinit!(p, readstring(f); filepath=abspath(f)))
+    Internals.tryparse(Internals.reinit!(p._p, _readstring(f); filepath=abspath(f)))
 
 """
     parse(x::Union{AbstractString, IO})
@@ -80,10 +82,11 @@ Throw a [`ParserError`](@ref) upon failure.
 
 See also [`TOML.tryparse`](@ref).
 """
+parse(p::Parser) = Internals.parse(p._p)
 parse(str::AbstractString) =
-    Internals.parse(Parser(String(str)))
+    Internals.parse(Internals.Parser{Dates}(String(str)))
 parse(p::Parser, str::AbstractString) =
-    Internals.parse(Internals.reinit!(p, String(str)))
+    Internals.parse(Internals.reinit!(p._p, String(str)))
 parse(io::IO) = parse(read(io, String))
 parse(p::Parser, io::IO) = parse(p, read(io, String))
 
@@ -96,10 +99,11 @@ Return a [`ParserError`](@ref) upon failure.
 
 See also [`TOML.parse`](@ref).
 """
+tryparse(p::Parser) = Internals.tryparse(p._p)
 tryparse(str::AbstractString) =
-    Internals.tryparse(Parser(String(str)))
+    Internals.tryparse(Internals.Parser{Dates}(String(str)))
 tryparse(p::Parser, str::AbstractString) =
-    Internals.tryparse(Internals.reinit!(p, String(str)))
+    Internals.tryparse(Internals.reinit!(p._p, String(str)))
 tryparse(io::IO) = tryparse(read(io, String))
 tryparse(p::Parser, io::IO) = tryparse(p, read(io, String))
 
@@ -131,4 +135,17 @@ supported type.
 """
 const print = Internals.Printer.print
 
+public Parser, parsefile, tryparsefile, parse, tryparse, ParserError, print
+
+# These methods are private Base interfaces, but we do our best to support them over
+# the TOML stdlib types anyway to minimize downstream breakage.
+Base.TOMLCache(p::Parser) = Base.TOMLCache(p._p, Dict{String, Base.CachedTOMLDict}())
+Base.TOMLCache(p::Parser, d::Base.CachedTOMLDict) = Base.TOMLCache(p._p, d)
+Base.TOMLCache(p::Parser, d::Dict{String, Dict{String, Any}}) = Base.TOMLCache(p._p, d)
+
+Internals.reinit!(p::Parser, str::String; filepath::Union{Nothing, String}=nothing) =
+    Internals.reinit!(p._p, str; filepath)
+Internals.parse(p::Parser) = Internals.parse(p._p)
+Internals.tryparse(p::Parser) = Internals.tryparse(p._p)
+
 end
diff --git a/stdlib/Test/src/Test.jl b/stdlib/Test/src/Test.jl
index b4ada2ce3a9cf..46bc2d8790cec 100644
--- a/stdlib/Test/src/Test.jl
+++ b/stdlib/Test/src/Test.jl
@@ -2087,7 +2087,7 @@ function detect_ambiguities(mods::Module...;
     while !isempty(work)
         mod = pop!(work)
         for n in names(mod, all = true)
-            Base.isdeprecated(mod, n) && continue
+            (!Base.isbindingresolved(mod, n) || Base.isdeprecated(mod, n)) && continue
             if !isdefined(mod, n)
                 if is_in_mods(mod, recursive, mods)
                     if allowed_undefineds === nothing || GlobalRef(mod, n) ∉ allowed_undefineds
@@ -2158,7 +2158,7 @@ function detect_unbound_args(mods...;
     while !isempty(work)
         mod = pop!(work)
         for n in names(mod, all = true)
-            Base.isdeprecated(mod, n) && continue
+            (!Base.isbindingresolved(mod, n) || Base.isdeprecated(mod, n)) && continue
             if !isdefined(mod, n)
                 if is_in_mods(mod, recursive, mods)
                     if allowed_undefineds === nothing || GlobalRef(mod, n) ∉ allowed_undefineds
diff --git a/test/Makefile b/test/Makefile
index 1b9cb377c943d..6ebdd3c764fd5 100644
--- a/test/Makefile
+++ b/test/Makefile
@@ -24,6 +24,8 @@ EMBEDDING_ARGS := "JULIA=$(JULIA_EXECUTABLE)" "BIN=$(SRCDIR)/embedding" "CC=$(CC
 
 GCEXT_ARGS := "JULIA=$(JULIA_EXECUTABLE)" "BIN=$(SRCDIR)/gcext" "CC=$(CC)"
 
+TRIMMING_ARGS := "JULIA=$(JULIA_EXECUTABLE)" "BIN=$(JULIAHOME)/usr/bin" "CC=$(CC)"
+
 default:
 
 $(TESTS):
@@ -66,6 +68,9 @@ embedding:
 gcext:
 	@$(MAKE) -C $(SRCDIR)/$@ check $(GCEXT_ARGS)
 
+trimming:
+	@$(MAKE) -C $(SRCDIR)/$@ check $(TRIMMING_ARGS)
+
 clangsa:
 	@$(MAKE) -C $(SRCDIR)/$@
 
@@ -73,5 +78,6 @@ clean:
 	@$(MAKE) -C embedding $@ $(EMBEDDING_ARGS)
 	@$(MAKE) -C gcext $@ $(GCEXT_ARGS)
 	@$(MAKE) -C llvmpasses $@
+	@$(MAKE) -C trimming $@ $(TRIMMING_ARGS)
 
-.PHONY: $(TESTS) $(addprefix revise-, $(TESTS)) relocatedepot revise-relocatedepot embedding gcext clangsa clean
+.PHONY: $(TESTS) $(addprefix revise-, $(TESTS)) relocatedepot revise-relocatedepot embedding gcext trimming clangsa clean
diff --git a/test/ambiguous.jl b/test/ambiguous.jl
index acdfdc70ba30c..2f8a4193cf592 100644
--- a/test/ambiguous.jl
+++ b/test/ambiguous.jl
@@ -162,6 +162,22 @@ end
 ambs = detect_ambiguities(Ambig48312)
 @test length(ambs) == 4
 
+module UnboundAmbig55868
+    module B
+        struct C end
+        export C
+        Base.@deprecate_binding D C
+    end
+    using .B
+    export C, D
+end
+@test !Base.isbindingresolved(UnboundAmbig55868, :C)
+@test !Base.isbindingresolved(UnboundAmbig55868, :D)
+@test isempty(detect_unbound_args(UnboundAmbig55868))
+@test isempty(detect_ambiguities(UnboundAmbig55868))
+@test !Base.isbindingresolved(UnboundAmbig55868, :C)
+@test !Base.isbindingresolved(UnboundAmbig55868, :D)
+
 # Test that Core and Base are free of ambiguities
 # not using isempty so this prints more information when it fails
 @testset "detect_ambiguities" begin
diff --git a/test/cmdlineargs.jl b/test/cmdlineargs.jl
index c6720e23739d8..cc3f8950f0dc0 100644
--- a/test/cmdlineargs.jl
+++ b/test/cmdlineargs.jl
@@ -787,6 +787,17 @@ let exename = `$(Base.julia_cmd()) --startup-file=no --color=no`
     # tested in test/parallel.jl)
     @test errors_not_signals(`$exename --worker=true`)
 
+    # --trace-compile
+    let
+        io = IOBuffer()
+        v = writereadpipeline(
+            "foo(x) = begin Base.Experimental.@force_compile; x; end; foo(1)",
+            `$exename --trace-compile=stderr -i`,
+            stderr=io)
+        _stderr = String(take!(io))
+        @test occursin("precompile(Tuple{typeof(Main.foo), Int", _stderr)
+    end
+
     # --trace-compile-timing
     let
         io = IOBuffer()
@@ -798,6 +809,17 @@ let exename = `$(Base.julia_cmd()) --startup-file=no --color=no`
         @test occursin(" ms =# precompile(Tuple{typeof(Main.foo), Int", _stderr)
     end
 
+    # --trace-dispatch
+    let
+        io = IOBuffer()
+        v = writereadpipeline(
+            "foo(x) = begin Base.Experimental.@force_compile; x; end; foo(1)",
+            `$exename --trace-dispatch=stderr -i`,
+            stderr=io)
+        _stderr = String(take!(io))
+        @test occursin("precompile(Tuple{typeof(Main.foo), Int", _stderr)
+    end
+
     # test passing arguments
     mktempdir() do dir
         testfile, io = mktemp(dir)
diff --git a/test/compiler/AbstractInterpreter.jl b/test/compiler/AbstractInterpreter.jl
index bab4fe02a5168..009128b289ade 100644
--- a/test/compiler/AbstractInterpreter.jl
+++ b/test/compiler/AbstractInterpreter.jl
@@ -415,10 +415,13 @@ function CC.abstract_call(interp::NoinlineInterpreter,
     arginfo::CC.ArgInfo, si::CC.StmtInfo, sv::CC.InferenceState, max_methods::Int)
     ret = @invoke CC.abstract_call(interp::CC.AbstractInterpreter,
         arginfo::CC.ArgInfo, si::CC.StmtInfo, sv::CC.InferenceState, max_methods::Int)
-    if sv.mod in noinline_modules(interp)
-        return CC.CallMeta(ret.rt, ret.exct, ret.effects, NoinlineCallInfo(ret.info))
+    return CC.Future{CC.CallMeta}(ret, interp, sv) do ret, interp, sv
+        if sv.mod in noinline_modules(interp)
+            (;rt, exct, effects, info) = ret
+            return CC.CallMeta(rt, exct, effects, NoinlineCallInfo(info))
+        end
+        return ret
     end
-    return ret
 end
 function CC.src_inlining_policy(interp::NoinlineInterpreter,
     @nospecialize(src), @nospecialize(info::CallInfo), stmt_flag::UInt32)
diff --git a/test/compiler/EscapeAnalysis/EAUtils.jl b/test/compiler/EscapeAnalysis/EAUtils.jl
index 188ec93ebc5be..b8ad4589db626 100644
--- a/test/compiler/EscapeAnalysis/EAUtils.jl
+++ b/test/compiler/EscapeAnalysis/EAUtils.jl
@@ -116,12 +116,14 @@ CC.get_inference_world(interp::EscapeAnalyzer) = interp.world
 CC.get_inference_cache(interp::EscapeAnalyzer) = interp.inf_cache
 CC.cache_owner(::EscapeAnalyzer) = EAToken()
 
-function CC.ipo_dataflow_analysis!(interp::EscapeAnalyzer, ir::IRCode, caller::InferenceResult)
+function CC.ipo_dataflow_analysis!(interp::EscapeAnalyzer, opt::OptimizationState,
+                                   ir::IRCode, caller::InferenceResult)
     # run EA on all frames that have been optimized
-    nargs = let def = caller.linfo.def; isa(def, Method) ? Int(def.nargs) : 0; end
+    nargs = Int(opt.src.nargs)
+    𝕃ₒ = CC.optimizer_lattice(interp)
     get_escape_cache = GetEscapeCache(interp)
     estate = try
-        analyze_escapes(ir, nargs, CC.optimizer_lattice(interp), get_escape_cache)
+        analyze_escapes(ir, nargs, 𝕃ₒ, get_escape_cache)
     catch err
         @error "error happened within EA, inspect `Main.failed_escapeanalysis`"
         Main.failed_escapeanalysis = FailedAnalysis(ir, nargs, get_escape_cache)
@@ -133,7 +135,8 @@ function CC.ipo_dataflow_analysis!(interp::EscapeAnalyzer, ir::IRCode, caller::I
     end
     record_escapes!(interp, caller, estate, ir)
 
-    @invoke CC.ipo_dataflow_analysis!(interp::AbstractInterpreter, ir::IRCode, caller::InferenceResult)
+    @invoke CC.ipo_dataflow_analysis!(interp::AbstractInterpreter, opt::OptimizationState,
+                                      ir::IRCode, caller::InferenceResult)
 end
 
 function record_escapes!(interp::EscapeAnalyzer,
diff --git a/test/compiler/inference.jl b/test/compiler/inference.jl
index d1382d3c84b82..7c7726413004a 100644
--- a/test/compiler/inference.jl
+++ b/test/compiler/inference.jl
@@ -3887,113 +3887,6 @@ f_apply_cglobal(args...) = cglobal(args...)
 f37532(T, x) = (Core.bitcast(Ptr{T}, x); x)
 @test Base.return_types(f37532, Tuple{Any, Int}) == Any[Int]
 
-# PR #37749
-# Helper functions for Core.Compiler.Timings. These are normally accessed via a package -
-# usually (SnoopCompileCore).
-function time_inference(f)
-    Core.Compiler.Timings.reset_timings()
-    Core.Compiler.__set_measure_typeinf(true)
-    f()
-    Core.Compiler.__set_measure_typeinf(false)
-    Core.Compiler.Timings.close_current_timer()
-    return Core.Compiler.Timings._timings[1]
-end
-function depth(t::Core.Compiler.Timings.Timing)
-    maximum(depth.(t.children), init=0) + 1
-end
-function flatten_times(t::Core.Compiler.Timings.Timing)
-    collect(Iterators.flatten([(t.time => t.mi_info,), flatten_times.(t.children)...]))
-end
-# Some very limited testing of timing the type inference (#37749).
-@testset "Core.Compiler.Timings" begin
-    # Functions that call each other
-    @eval module M1
-        i(x) = x+5
-        i2(x) = x+2
-        h(a::Array) = i2(a[1]::Integer) + i(a[1]::Integer) + 2
-        g(y::Integer, x) = h(Any[y]) + Int(x)
-    end
-    timing1 = time_inference() do
-        @eval M1.g(2, 3.0)
-    end
-    @test occursin(r"Core.Compiler.Timings.Timing\(InferenceFrameInfo for Core.Compiler.Timings.ROOT\(\)\) with \d+ children", sprint(show, timing1))
-    # The last two functions to be inferred should be `i` and `i2`, inferred at runtime with
-    # their concrete types.
-    @test sort([mi_info.mi.def.name for (time,mi_info) in flatten_times(timing1)[end-1:end]]) == [:i, :i2]
-    @test all(child->isa(child.bt, Vector), timing1.children)
-    @test all(child->child.bt===nothing, timing1.children[1].children)
-    # Test the stacktrace
-    @test isa(stacktrace(timing1.children[1].bt), Vector{Base.StackTraces.StackFrame})
-    # Test that inference has cached some of the Method Instances
-    timing2 = time_inference() do
-        @eval M1.g(2, 3.0)
-    end
-    @test length(flatten_times(timing2)) < length(flatten_times(timing1))
-    # Printing of InferenceFrameInfo for mi.def isa Module
-    @eval module M2
-        i(x) = x+5
-        i2(x) = x+2
-        h(a::Array) = i2(a[1]::Integer) + i(a[1]::Integer) + 2
-        g(y::Integer, x) = h(Any[y]) + Int(x)
-    end
-    # BEGIN LINE NUMBER SENSITIVITY (adjust the line offset below as needed)
-    timingmod = time_inference() do
-        @eval @testset "Outer" begin
-            @testset "Inner" begin
-                for i = 1:2 M2.g(2, 3.0) end
-            end
-        end
-    end
-    @test occursin("thunk from $(@__MODULE__) starting at $(@__FILE__):$((@__LINE__) - 6)", string(timingmod.children))
-    # END LINE NUMBER SENSITIVITY
-
-    # Recursive function
-    @eval module _Recursive f(n::Integer) = n == 0 ? 0 : f(n-1) + 1 end
-    timing = time_inference() do
-        @eval _Recursive.f(Base.inferencebarrier(5))
-    end
-    @test 2 <= depth(timing) <= 3  # root -> f (-> +)
-    @test 2 <= length(flatten_times(timing)) <= 3  # root, f, +
-
-    # Functions inferred with multiple constants
-    @eval module C
-        i(x) = x === 0 ? 0 : 1 / x
-        a(x) = i(0) * i(x)
-        b() = i(0) * i(1) * i(0)
-        function loopc(n)
-            s = 0
-            for i = 1:n
-                s += i
-            end
-            return s
-        end
-        call_loopc() = loopc(5)
-        myfloor(::Type{T}, x) where T = floor(T, x)
-        d(x) = myfloor(Int16, x)
-    end
-    timing = time_inference() do
-        @eval C.a(2)
-        @eval C.b()
-        @eval C.call_loopc()
-        @eval C.d(3.2)
-    end
-    ft = flatten_times(timing)
-    @test !isempty(ft)
-    str = sprint(show, ft)
-    @test occursin("InferenceFrameInfo for /(1::$Int, ::$Int)", str)  # inference constants
-    @test occursin("InferenceFrameInfo for Core.Compiler.Timings.ROOT()", str) # qualified
-    # loopc has internal slots, check constant printing in this case
-    sel = filter(ti -> ti.second.mi.def.name === :loopc, ft)
-    ifi = sel[end].second
-    @test length(ifi.slottypes) > ifi.nargs
-    str = sprint(show, sel)
-    @test occursin("InferenceFrameInfo for $(@__MODULE__).C.loopc(5::$Int)", str)
-    # check that types aren't double-printed as `T::Type{T}`
-    sel = filter(ti -> ti.second.mi.def.name === :myfloor, ft)
-    str = sprint(show, sel)
-    @test occursin("InferenceFrameInfo for $(@__MODULE__).C.myfloor(::Type{Int16}, ::Float64)", str)
-end
-
 # issue #37638
 @test only(Base.return_types(() -> (nothing, Any[]...)[2])) isa Type
 
@@ -6152,3 +6045,6 @@ end
 t155751 = Union{AbstractArray{UInt8, 4}, Array{Float32, 4}, Grid55751{Float32, 3, _A} where _A}
 t255751 = Array{Float32, 3}
 @test Core.Compiler.tmerge_types_slow(t155751,t255751) == AbstractArray # shouldn't hang
+
+issue55882_nfields(x::Union{T,Nothing}) where T<:Number = nfields(x)
+@test Base.infer_return_type(issue55882_nfields) <: Int
diff --git a/test/compiler/inline.jl b/test/compiler/inline.jl
index 80c8ddbb08c69..fceb920352482 100644
--- a/test/compiler/inline.jl
+++ b/test/compiler/inline.jl
@@ -1570,7 +1570,6 @@ let
     @test get_finalization_count() == 1000
 end
 
-
 function cfg_finalization7(io)
     for i = -999:1000
         o = DoAllocWithField(0)
@@ -1597,7 +1596,6 @@ let
     @test get_finalization_count() == 1000
 end
 
-
 # optimize `[push!|pushfirst!](::Vector{Any}, x...)`
 @testset "optimize `$f(::Vector{Any}, x...)`" for f = Any[push!, pushfirst!]
     @eval begin
diff --git a/test/core.jl b/test/core.jl
index d41a58a7ccb2e..b27832209a835 100644
--- a/test/core.jl
+++ b/test/core.jl
@@ -34,7 +34,7 @@ for (T, c) in (
         (Core.CodeInfo, []),
         (Core.CodeInstance, [:next, :min_world, :max_world, :inferred, :debuginfo, :ipo_purity_bits, :invoke, :specptr, :specsigflags, :precompile]),
         (Core.Method, [:primary_world, :deleted_world]),
-        (Core.MethodInstance, [:cache, :precompiled]),
+        (Core.MethodInstance, [:cache, :flags]),
         (Core.MethodTable, [:defs, :leafcache, :cache, :max_args]),
         (Core.TypeMapEntry, [:next, :min_world, :max_world]),
         (Core.TypeMapLevel, [:arg1, :targ, :name1, :tname, :list, :any]),
@@ -1183,7 +1183,7 @@ end
 
 # Make sure that `Module` is not resolved to `Core.Module` during sysimg generation
 # so that users can define their own binding named `Module` in Main.
-@test !Base.isbindingresolved(Main, :Module)
+@test success(`$(Base.julia_cmd()) -e '@assert !Base.isbindingresolved(Main, :Module)'`)
 
 # Module() constructor
 @test names(Module(:anonymous), all = true, imported = true) == [:anonymous]
@@ -8293,3 +8293,14 @@ end
 # to properly give error messages for basic kwargs...
 Core.eval(Core.Compiler, quote issue50174(;a=1) = a end)
 @test_throws MethodError Core.Compiler.issue50174(;b=2)
+
+let s = mktemp() do path, io
+        xxx = 42
+        redirect_stdout(io) do
+            Base.@assume_effects :nothrow @show xxx
+        end
+        flush(io)
+        read(path, String)
+    end
+    @test strip(s) == "xxx = 42"
+end
diff --git a/test/dict.jl b/test/dict.jl
index 13c60d5a6a053..909afb3607907 100644
--- a/test/dict.jl
+++ b/test/dict.jl
@@ -1049,7 +1049,7 @@ Dict(1 => rand(2,3), 'c' => "asdf") # just make sure this does not trigger a dep
 
     # issue #26939
     d26939 = WeakKeyDict()
-    (@noinline d -> d[big"1.0" + 1.1] = 1)(d26939)
+    (@noinline d -> d[big"1" + 1] = 1)(d26939)
     GC.gc() # primarily to make sure this doesn't segfault
     @test count(d26939) == 0
     @test length(d26939.ht) == 1
diff --git a/test/errorshow.jl b/test/errorshow.jl
index 3ede370553212..db22fea1131d1 100644
--- a/test/errorshow.jl
+++ b/test/errorshow.jl
@@ -739,8 +739,7 @@ end
 pop!(Base.Experimental._hint_handlers[DomainError])  # order is undefined, don't copy this
 
 struct ANumber <: Number end
-let err_str
-    err_str = @except_str ANumber()(3 + 4) MethodError
+let err_str = @except_str ANumber()(3 + 4) MethodError
     @test occursin("objects of type $(curmod_prefix)ANumber are not callable", err_str)
     @test count(==("Maybe you forgot to use an operator such as *, ^, %, / etc. ?"), split(err_str, '\n')) == 1
     # issue 40478
@@ -748,22 +747,25 @@ let err_str
     @test count(==("Maybe you forgot to use an operator such as *, ^, %, / etc. ?"), split(err_str, '\n')) == 1
 end
 
-let err_str
-    a = [1 2; 3 4];
+let a = [1 2; 3 4];
     err_str = @except_str (a[1][2] = 5) MethodError
     @test occursin("\nAre you trying to index into an array? For multi-dimensional arrays, separate the indices with commas: ", err_str)
     @test occursin("a[1, 2]", err_str)
     @test occursin("rather than a[1][2]", err_str)
 end
 
-let err_str
-    d = Dict
+let d = Dict
     err_str = @except_str (d[1] = 5) MethodError
     @test occursin("\nYou attempted to index the type Dict, rather than an instance of the type. Make sure you create the type using its constructor: ", err_str)
     @test occursin("d = Dict([...])", err_str)
     @test occursin(" rather than d = Dict", err_str)
 end
 
+let s = Some("foo")
+    err_str = @except_str (s[] = "bar") MethodError
+    @test !occursin("You attempted to index the type String", err_str)
+end
+
 # Execute backtrace once before checking formatting, see #38858
 backtrace()
 
diff --git a/test/file.jl b/test/file.jl
index de258c92e02bc..a4262c4eaaa21 100644
--- a/test/file.jl
+++ b/test/file.jl
@@ -2128,6 +2128,16 @@ Base.joinpath(x::URI50890) = URI50890(x.f)
         @test !isnothing(Base.Filesystem.getusername(s.uid))
         @test !isnothing(Base.Filesystem.getgroupname(s.gid))
     end
+    s = Base.Filesystem.StatStruct()
+    stat_show_str = sprint(show, s)
+    stat_show_str_multi = sprint(show, MIME("text/plain"), s)
+    @test startswith(stat_show_str, "StatStruct(\"\" ENOENT: ") && endswith(stat_show_str, ")")
+    @test startswith(stat_show_str_multi, "StatStruct for \"\"\n ENOENT: ") && !endswith(stat_show_str_multi, r"\s")
+    s = Base.Filesystem.StatStruct("my/test", Ptr{UInt8}(0), Int32(Base.UV_ENOTDIR))
+    stat_show_str = sprint(show, s)
+    stat_show_str_multi = sprint(show, MIME("text/plain"), s)
+    @test startswith(stat_show_str, "StatStruct(\"my/test\" ENOTDIR: ") && endswith(stat_show_str, ")")
+    @test startswith(stat_show_str_multi, "StatStruct for \"my/test\"\n ENOTDIR: ") && !endswith(stat_show_str_multi, r"\s")
 end
 
 @testset "diskstat() works" begin
diff --git a/test/floatfuncs.jl b/test/floatfuncs.jl
index f33ec75b58322..d5d697634bcfa 100644
--- a/test/floatfuncs.jl
+++ b/test/floatfuncs.jl
@@ -257,6 +257,35 @@ end
     end
 end
 
+@testset "isapprox and unsigned integers" begin
+    for T in Base.BitUnsigned_types
+        # Test also combinations of different integer types
+        W = widen(T)
+        # The order of the operands for difference between unsigned integers is
+        # very important, test both combinations.
+        @test isapprox(T(42), T(42); rtol=T(0), atol=0.5)
+        @test isapprox(T(42), W(42); rtol=T(0), atol=0.5)
+        @test !isapprox(T(0), T(1); rtol=T(0), atol=0.5)
+        @test !isapprox(T(1), T(0); rtol=T(0), atol=0.5)
+        @test isapprox(T(1), T(3); atol=T(2))
+        @test isapprox(T(4), T(2); atol=T(2))
+        @test isapprox(T(1), W(3); atol=T(2))
+        @test isapprox(T(4), W(2); atol=T(2))
+        @test isapprox(T(5), T(7); atol=typemax(T))
+        @test isapprox(T(8), T(6); atol=typemax(T))
+        @test isapprox(T(1), T(2); rtol=1)
+        @test isapprox(T(6), T(3); rtol=1)
+        @test isapprox(T(1), W(2); rtol=1)
+        @test isapprox(T(6), W(3); rtol=1)
+        @test !isapprox(typemin(T), typemax(T))
+        @test !isapprox(typemax(T), typemin(T))
+        @test !isapprox(typemin(T), typemax(T); atol=typemax(T)-T(1))
+        @test !isapprox(typemax(T), typemin(T); atol=typemax(T)-T(1))
+        @test isapprox(typemin(T), typemax(T); atol=typemax(T))
+        @test isapprox(typemax(T), typemin(T); atol=typemax(T))
+    end
+end
+
 @testset "Conversion from floating point to unsigned integer near extremes (#51063)" begin
     @test_throws InexactError UInt32(4.2949673f9)
     @test_throws InexactError UInt64(1.8446744f19)
diff --git a/test/gc.jl b/test/gc.jl
index e46ff0ed73fd9..c532f17f04eb5 100644
--- a/test/gc.jl
+++ b/test/gc.jl
@@ -49,6 +49,13 @@ function issue_54275_test()
     @test !live_bytes_has_grown_too_much
 end
 
+function full_sweep_reasons_test()
+    GC.gc()
+    reasons = Base.full_sweep_reasons()
+    @test reasons[:FULL_SWEEP_REASON_FORCED_FULL_SWEEP] >= 1
+    @test keys(reasons) == Set(Base.FULL_SWEEP_REASONS)
+end
+
 # !!! note:
 #     Since we run our tests on 32bit OS as well we confine ourselves
 #     to parameters that allocate about 512MB of objects. Max RSS is lower
@@ -73,6 +80,10 @@ end
     @test isempty(Docs.undocumented_names(GC))
 end
 
+@testset "Full GC reasons" begin
+    full_sweep_reasons_test()
+end
+
 #testset doesn't work here because this needs to run in top level
 #Check that we ensure objects in toplevel exprs are rooted
 global dims54422 = [] # allocate the Binding
diff --git a/test/loading.jl b/test/loading.jl
index bdaca7f9dc69e..b66fd632f23fa 100644
--- a/test/loading.jl
+++ b/test/loading.jl
@@ -1155,6 +1155,19 @@ end
         finally
             copy!(LOAD_PATH, old_load_path)
         end
+
+        # Extension with cycles in dependencies
+        code = """
+        using CyclicExtensions
+        Base.get_extension(CyclicExtensions, :ExtA) isa Module || error("expected extension to load")
+        Base.get_extension(CyclicExtensions, :ExtB) isa Module || error("expected extension to load")
+        CyclicExtensions.greet()
+        """
+        proj = joinpath(@__DIR__, "project", "Extensions", "CyclicExtensions")
+        cmd =  `$(Base.julia_cmd()) --startup-file=no -e $code`
+        cmd = addenv(cmd, "JULIA_LOAD_PATH" => proj)
+        @test occursin("Hello Cycles!", String(read(cmd)))
+
     finally
         try
             rm(depot_path, force=true, recursive=true)
diff --git a/test/misc.jl b/test/misc.jl
index 66b70956935cd..e089395ce4557 100644
--- a/test/misc.jl
+++ b/test/misc.jl
@@ -360,6 +360,15 @@ let foo() = 1
     @test @timev foo() true
 end
 
+# this is internal, but used for easy testing
+@test sprint(Base.time_print, 1e9) == "  1.000000 seconds"
+@test sprint(Base.time_print, 1e9, 111, 0, 222) == "  1.000000 seconds (222 allocations: 111 bytes)"
+@test sprint(Base.time_print, 1e9, 111, 0.5e9, 222) == "  1.000000 seconds (222 allocations: 111 bytes, 50.00% gc time)"
+@test sprint(Base.time_print, 1e9, 111, 0, 222, 333) == "  1.000000 seconds (222 allocations: 111 bytes, 333 lock conflicts)"
+@test sprint(Base.time_print, 1e9, 0, 0, 0, 333) == "  1.000000 seconds (333 lock conflicts)"
+@test sprint(Base.time_print, 1e9, 111, 0, 222, 333, 0.25e9) == "  1.000000 seconds (222 allocations: 111 bytes, 333 lock conflicts, 25.00% compilation time)"
+@test sprint(Base.time_print, 1e9, 111, 0.5e9, 222, 333, 0.25e9, 0.175e9) == "  1.000000 seconds (222 allocations: 111 bytes, 50.00% gc time, 333 lock conflicts, 25.00% compilation time: 70% of which was recompilation)"
+
 # @showtime
 @test @showtime true
 let foo() = true
diff --git a/test/mpfr.jl b/test/mpfr.jl
index 9a9698ba72c2c..c212bdfc92821 100644
--- a/test/mpfr.jl
+++ b/test/mpfr.jl
@@ -1088,3 +1088,12 @@ end
         clear_flags()
     end
 end
+
+@testset "BigFloatData truncation OOB read" begin
+    @testset "T: $T" for T ∈ (UInt8, UInt16, UInt32, UInt64, UInt128)
+        v = Base.MPFR.BigFloatData{T}(fill(typemax(T), 1 + Base.MPFR.offset_p_limbs))
+        @testset "bit_count: $bit_count" for bit_count ∈ (0:10:80)
+            @test Base.MPFR.truncated(UInt128, v, bit_count) isa Any
+        end
+    end
+end
diff --git a/test/project/Extensions/CyclicExtensions/Manifest.toml b/test/project/Extensions/CyclicExtensions/Manifest.toml
new file mode 100644
index 0000000000000..a506825cf7995
--- /dev/null
+++ b/test/project/Extensions/CyclicExtensions/Manifest.toml
@@ -0,0 +1,21 @@
+# This file is machine-generated - editing it directly is not advised
+
+julia_version = "1.10.4"
+manifest_format = "2.0"
+project_hash = "ec25ff8df3a5e2212a173c3de2c7d716cc47cd36"
+
+[[deps.ExtDep]]
+deps = ["SomePackage"]
+path = "../ExtDep.jl"
+uuid = "fa069be4-f60b-4d4c-8b95-f8008775090c"
+version = "0.1.0"
+
+[[deps.ExtDep2]]
+path = "../ExtDep2"
+uuid = "55982ee5-2ad5-4c40-8cfe-5e9e1b01500d"
+version = "0.1.0"
+
+[[deps.SomePackage]]
+path = "../SomePackage"
+uuid = "678608ae-7bb3-42c7-98b1-82102067a3d8"
+version = "0.1.0"
diff --git a/test/project/Extensions/CyclicExtensions/Project.toml b/test/project/Extensions/CyclicExtensions/Project.toml
new file mode 100644
index 0000000000000..08d539dcc40ae
--- /dev/null
+++ b/test/project/Extensions/CyclicExtensions/Project.toml
@@ -0,0 +1,13 @@
+name = "CyclicExtensions"
+uuid = "17d4f0df-b55c-4714-ac4b-55fa23f7355c"
+version = "0.1.0"
+
+[deps]
+ExtDep = "fa069be4-f60b-4d4c-8b95-f8008775090c"
+
+[weakdeps]
+SomePackage = "678608ae-7bb3-42c7-98b1-82102067a3d8"
+
+[extensions]
+ExtA = ["SomePackage"]
+ExtB = ["SomePackage"]
diff --git a/test/project/Extensions/CyclicExtensions/ext/ExtA.jl b/test/project/Extensions/CyclicExtensions/ext/ExtA.jl
new file mode 100644
index 0000000000000..fa0c0961633cb
--- /dev/null
+++ b/test/project/Extensions/CyclicExtensions/ext/ExtA.jl
@@ -0,0 +1,6 @@
+module ExtA
+
+using CyclicExtensions
+using SomePackage
+
+end
diff --git a/test/project/Extensions/CyclicExtensions/ext/ExtB.jl b/test/project/Extensions/CyclicExtensions/ext/ExtB.jl
new file mode 100644
index 0000000000000..8f6da556d39b8
--- /dev/null
+++ b/test/project/Extensions/CyclicExtensions/ext/ExtB.jl
@@ -0,0 +1,6 @@
+module ExtB
+
+using CyclicExtensions
+using SomePackage
+
+end
diff --git a/test/project/Extensions/CyclicExtensions/src/CyclicExtensions.jl b/test/project/Extensions/CyclicExtensions/src/CyclicExtensions.jl
new file mode 100644
index 0000000000000..f1c2ec2077562
--- /dev/null
+++ b/test/project/Extensions/CyclicExtensions/src/CyclicExtensions.jl
@@ -0,0 +1,7 @@
+module CyclicExtensions
+
+using ExtDep
+
+greet() = print("Hello Cycles!")
+
+end # module CyclicExtensions
diff --git a/test/relocatedepot.jl b/test/relocatedepot.jl
index 039d422c35e25..2ef6dec90dbc1 100644
--- a/test/relocatedepot.jl
+++ b/test/relocatedepot.jl
@@ -1,3 +1,5 @@
+# This file is a part of Julia. License is MIT: https://julialang.org/license
+
 using Test
 
 
@@ -26,16 +28,38 @@ end
 
 if !test_relocated_depot
 
-    @testset "insert @depot tag in path" begin
+    @testset "edge cases when inserting @depot tag in path" begin
 
+        # insert @depot only once for first match
         test_harness() do
             mktempdir() do dir
                 pushfirst!(DEPOT_PATH, dir)
-                path = dir*dir
-                @test Base.replace_depot_path(path) == "@depot"*dir
+                if Sys.iswindows()
+                    # dirs start with a drive letter instead of a path separator
+                    path = dir*Base.Filesystem.pathsep()*dir
+                    @test Base.replace_depot_path(path) == "@depot"*Base.Filesystem.pathsep()*dir
+                else
+                    path = dir*dir
+                    @test Base.replace_depot_path(path) == "@depot"*dir
+                end
+            end
+
+            # 55340
+            empty!(DEPOT_PATH)
+            mktempdir() do dir
+                jlrc = joinpath(dir, "julia-rc2")
+                jl   = joinpath(dir, "julia")
+                mkdir(jl)
+                push!(DEPOT_PATH, jl)
+                @test Base.replace_depot_path(jl) == "@depot"
+                @test Base.replace_depot_path(string(jl,Base.Filesystem.pathsep())) ==
+                            string("@depot",Base.Filesystem.pathsep())
+                @test Base.replace_depot_path(jlrc) != "@depot-rc2"
+                @test Base.replace_depot_path(jlrc) == jlrc
             end
         end
 
+        # deal with and without trailing path separators
         test_harness() do
             mktempdir() do dir
                 pushfirst!(DEPOT_PATH, dir)
@@ -43,9 +67,9 @@ if !test_relocated_depot
                 if isdirpath(DEPOT_PATH[1])
                     DEPOT_PATH[1] = dirname(DEPOT_PATH[1]) # strip trailing pathsep
                 end
-                tag = joinpath("@depot", "") # append a pathsep
+                tag = string("@depot", Base.Filesystem.pathsep())
                 @test startswith(Base.replace_depot_path(path), tag)
-                DEPOT_PATH[1] = joinpath(DEPOT_PATH[1], "") # append a pathsep
+                DEPOT_PATH[1] = string(DEPOT_PATH[1], Base.Filesystem.pathsep())
                 @test startswith(Base.replace_depot_path(path), tag)
                 popfirst!(DEPOT_PATH)
                 @test !startswith(Base.replace_depot_path(path), tag)
diff --git a/test/rounding.jl b/test/rounding.jl
index 76b15ec1d9118..6fad6f62e8dfe 100644
--- a/test/rounding.jl
+++ b/test/rounding.jl
@@ -470,3 +470,28 @@ end
         @test prevfloat(f) < i
     end
 end
+
+@testset "π to `BigFloat` with `setrounding`" begin
+    function irrational_to_big_float(c::AbstractIrrational)
+        BigFloat(c)
+    end
+
+    function irrational_to_big_float_with_rounding_mode(c::AbstractIrrational, rm::RoundingMode)
+        f = () -> irrational_to_big_float(c)
+        setrounding(f, BigFloat, rm)
+    end
+
+    function irrational_to_big_float_with_rounding_mode_and_precision(c::AbstractIrrational, rm::RoundingMode, prec::Int)
+        f = () -> irrational_to_big_float_with_rounding_mode(c, rm)
+        setprecision(f, BigFloat, prec)
+    end
+
+    for c ∈ (π, MathConstants.γ, MathConstants.catalan)
+        for p ∈ 1:40
+            @test (
+                irrational_to_big_float_with_rounding_mode_and_precision(c, RoundDown, p) < c <
+                irrational_to_big_float_with_rounding_mode_and_precision(c, RoundUp, p)
+            )
+        end
+    end
+end
diff --git a/test/staged.jl b/test/staged.jl
index aec4a3bf135d3..1b28144639f97 100644
--- a/test/staged.jl
+++ b/test/staged.jl
@@ -381,11 +381,17 @@ let
     @test length(ir.cfg.blocks) == 1
 end
 
+function generate_lambda_ex(world::UInt, source::LineNumberNode,
+                            argnames, spnames, @nospecialize body)
+    stub = Core.GeneratedFunctionStub(identity, Core.svec(argnames...), Core.svec(spnames...))
+    return stub(world, source, body)
+end
+
 # Test that `Core.CachedGenerator` works as expected
 struct Generator54916 <: Core.CachedGenerator end
 function (::Generator54916)(world::UInt, source::LineNumberNode, args...)
-    stub = Core.GeneratedFunctionStub(identity, Core.svec(:doit54916, :func, :arg), Core.svec())
-    return stub(world, source, :(func(arg)))
+    return generate_lambda_ex(world, source,
+        (:doit54916, :func, :arg), (), :(func(arg)))
 end
 @eval function doit54916(func, arg)
     $(Expr(:meta, :generated, Generator54916()))
@@ -412,8 +418,8 @@ function generator49715(world, source, self, f, tt)
     sig = Tuple{f, tt.parameters...}
     mi = Base._which(sig; world)
     error("oh no")
-    stub = Core.GeneratedFunctionStub(identity, Core.svec(:methodinstance, :ctx, :x, :f), Core.svec())
-    stub(world, source, :(nothing))
+    return generate_lambda_ex(world, source,
+        (:doit49715, :f, :tt), (), nothing)
 end
 @eval function doit49715(f, tt)
     $(Expr(:meta, :generated, generator49715))
@@ -426,9 +432,10 @@ function overdubbee54341(a, b)
     a + b
 end
 const overdubee_codeinfo54341 = code_lowered(overdubbee54341, Tuple{Any, Any})[1]
-function overdub_generator54341(world::UInt, source::LineNumberNode, args...)
-    if length(args) != 2
-        :(error("Wrong number of arguments"))
+function overdub_generator54341(world::UInt, source::LineNumberNode, selftype, fargtypes)
+    if length(fargtypes) != 2
+        return generate_lambda_ex(world, source,
+            (:overdub54341, :args), (), :(error("Wrong number of arguments")))
     else
         return copy(overdubee_codeinfo54341)
     end
@@ -438,3 +445,7 @@ end
     $(Expr(:meta, :generated_only))
 end
 @test overdub54341(1, 2) == 3
+# check if the inlining pass handles `nargs`/`isva` correctly
+@test first(only(code_typed((Int,Int)) do x, y; @inline overdub54341(x, y); end)) isa Core.CodeInfo
+@test first(only(code_typed((Int,)) do x; @inline overdub54341(x, 1); end)) isa Core.CodeInfo
+@test_throws "Wrong number of arguments" overdub54341(1, 2, 3)
diff --git a/test/trimming/Makefile b/test/trimming/Makefile
new file mode 100644
index 0000000000000..c6e105d637013
--- /dev/null
+++ b/test/trimming/Makefile
@@ -0,0 +1,55 @@
+# This file is a part of Julia. License is MIT: https://julialang.org/license
+
+# This Makefile template requires the following variables to be set
+# in the environment or on the command-line:
+#   JULIA: path to julia[.exe] executable
+#   BIN:   binary build directory
+
+ifndef JULIA
+  $(error "Please pass JULIA=[path of target julia binary], or set as environment variable!")
+endif
+ifndef BIN
+  $(error "Please pass BIN=[path of build directory], or set as environment variable!")
+endif
+
+#=============================================================================
+# location of test source
+SRCDIR := $(abspath $(dir $(lastword $(MAKEFILE_LIST))))
+JULIAHOME := $(abspath $(SRCDIR)/../..)
+BUILDSCRIPT := $(BIN)/../share/julia/juliac-buildscript.jl
+include $(JULIAHOME)/Make.inc
+
+# get the executable suffix, if any
+EXE := $(suffix $(abspath $(JULIA)))
+
+# get compiler and linker flags. (see: `contrib/julia-config.jl`)
+JULIA_CONFIG := $(JULIA) -e 'include(joinpath(Sys.BINDIR, Base.DATAROOTDIR, "julia", "julia-config.jl"))' --
+CPPFLAGS_ADD :=
+CFLAGS_ADD = $(shell $(JULIA_CONFIG) --cflags)
+LDFLAGS_ADD = -lm $(shell $(JULIA_CONFIG) --ldflags --ldlibs) -ljulia-internal
+
+#=============================================================================
+
+release: hello$(EXE)
+
+hello.o: $(SRCDIR)/hello.jl $(BUILDSCRIPT)
+	$(JULIA) -t 1 -J $(BIN)/../lib/julia/sys.so --startup-file=no --history-file=no --output-o $@ --output-incremental=no --strip-ir --strip-metadata --trim $(BUILDSCRIPT) $(SRCDIR)/hello.jl --output-exe true
+
+init.o: $(SRCDIR)/init.c
+	$(CC) -c -o $@ $< $(CPPFLAGS_ADD) $(CPPFLAGS) $(CFLAGS_ADD) $(CFLAGS)
+
+hello$(EXE): hello.o init.o
+	$(CC) -o $@ $(WHOLE_ARCHIVE) hello.o $(NO_WHOLE_ARCHIVE) init.o $(CPPFLAGS_ADD) $(CPPFLAGS) $(CFLAGS_ADD) $(CFLAGS) $(LDFLAGS_ADD) $(LDFLAGS)
+
+check: hello$(EXE)
+	$(JULIA) --depwarn=error $(SRCDIR)/../runtests.jl $(SRCDIR)/trimming
+
+clean:
+	-rm -f hello$(EXE) init.o hello.o
+
+.PHONY: release clean check
+
+# Makefile debugging trick:
+# call print-VARIABLE to see the runtime value of any variable
+print-%:
+	@echo '$*=$($*)'
diff --git a/test/trimming/hello.jl b/test/trimming/hello.jl
new file mode 100644
index 0000000000000..307bf820f325b
--- /dev/null
+++ b/test/trimming/hello.jl
@@ -0,0 +1,6 @@
+module MyApp
+Base.@ccallable function main()::Cint
+    println(Core.stdout, "Hello, world!")
+    return 0
+end
+end
diff --git a/test/trimming/init.c b/test/trimming/init.c
new file mode 100644
index 0000000000000..ea1b02f8e5c8f
--- /dev/null
+++ b/test/trimming/init.c
@@ -0,0 +1,9 @@
+#include <julia.h>
+
+__attribute__((constructor)) void static_init(void)
+{
+    if (jl_is_initialized())
+        return;
+    julia_init(JL_IMAGE_IN_MEMORY);
+    jl_exception_clear();
+}
diff --git a/test/trimming/trimming.jl b/test/trimming/trimming.jl
new file mode 100644
index 0000000000000..dfacae7f8e531
--- /dev/null
+++ b/test/trimming/trimming.jl
@@ -0,0 +1,7 @@
+using Test
+
+exe_path = joinpath(@__DIR__, "hello"*splitext(Base.julia_exename())[2])
+
+@test readchomp(`$exe_path`) == "Hello, world!"
+
+@test filesize(exe_path) < filesize(unsafe_string(Base.JLOptions().image_file))/10