diff --git a/base/inference.jl b/base/inference.jl
index a9829626bed31..ecb08f74ab081 100644
--- a/base/inference.jl
+++ b/base/inference.jl
@@ -5,6 +5,7 @@ import Core: _apply, svec, apply_type, Builtin, IntrinsicFunction, MethodInstanc
 #### parameters limiting potentially-infinite types ####
 const MAX_TYPEUNION_LEN = 3
 const MAX_TYPE_DEPTH = 8
+const TUPLE_COMPLEXITY_LIMIT_DEPTH = 3
 
 const MAX_INLINE_CONST_SIZE = 256
 
@@ -306,6 +307,20 @@ function get_staged(li::MethodInstance)
     return ccall(:jl_code_for_staged, Any, (Any,), li)::CodeInfo
 end
 
+
+#### debugging utilities ####
+
+function print_callstack(sv::InferenceState)
+    while sv !== nothing
+        println(sv.linfo)
+        for cycle in sv.callers_in_cycle
+            println(' ', cycle.linfo)
+        end
+        sv = sv.parent
+    end
+end
+
+
 #### helper functions ####
 
 @inline slot_id(s) = isa(s, SlotNumber) ? (s::SlotNumber).id : (s::TypedSlot).id # using a function to ensure we can infer this
@@ -703,7 +718,7 @@ function type_depth(t::ANY)
         if t.var.ub === Any && t.var.lb === Bottom
             return type_depth(t.body)
         end
-        return max(type_depth(t.var.ub)+1, type_depth(t.var.lb)+1, type_depth(t.body))
+        return max(type_depth(t.var.ub) + 1, type_depth(t.var.lb) + 1, type_depth(t.body))
     end
     return 0
 end
@@ -715,8 +730,8 @@ function limit_type_depth(t::ANY, d::Int)
 end
 
 function limit_type_depth(t::ANY, d::Int, cov::Bool, vars::Vector{TypeVar}=TypeVar[])
-    if isa(t,Union)
-        if d > MAX_TYPE_DEPTH
+    if isa(t, Union)
+        if d < 0
             if cov
                 return Any
             else
@@ -725,9 +740,9 @@ function limit_type_depth(t::ANY, d::Int, cov::Bool, vars::Vector{TypeVar}=TypeV
                 return var
             end
         end
-        return Union{limit_type_depth(t.a, d+1, cov, vars),
-                     limit_type_depth(t.b, d+1, cov, vars)}
-    elseif isa(t,UnionAll)
+        return Union{limit_type_depth(t.a, d - 1, cov, vars),
+                     limit_type_depth(t.b, d - 1, cov, vars)}
+    elseif isa(t, UnionAll)
         v = t.var
         if v.ub === Any
             if v.lb === Bottom
@@ -735,9 +750,9 @@ function limit_type_depth(t::ANY, d::Int, cov::Bool, vars::Vector{TypeVar}=TypeV
             end
             ub = Any
         else
-            ub = limit_type_depth(v.ub, d+1, true)
+            ub = limit_type_depth(v.ub, d - 1, true)
         end
-        if v.lb === Bottom || type_depth(v.lb) > MAX_TYPE_DEPTH
+        if v.lb === Bottom || type_depth(v.lb) > d
             # note: lower bounds need to be widened by making them lower
             lb = Bottom
         else
@@ -750,7 +765,7 @@ function limit_type_depth(t::ANY, d::Int, cov::Bool, vars::Vector{TypeVar}=TypeV
     end
     P = t.parameters
     isempty(P) && return t
-    if d > MAX_TYPE_DEPTH
+    if d < 0
         if isvarargtype(t)
             # never replace Vararg with non-Vararg
             return Vararg{limit_type_depth(P[1], d, cov, vars), P[2]}
@@ -768,7 +783,7 @@ function limit_type_depth(t::ANY, d::Int, cov::Bool, vars::Vector{TypeVar}=TypeV
         return var
     end
     stillcov = cov && (t.name === Tuple.name)
-    Q = map(x->limit_type_depth(x, d+1, stillcov, vars), P)
+    Q = map(x -> limit_type_depth(x, d - 1, stillcov, vars), P)
     R = t.name.wrapper{Q...}
     if cov && !stillcov
         for var in vars
@@ -778,6 +793,253 @@ function limit_type_depth(t::ANY, d::Int, cov::Bool, vars::Vector{TypeVar}=TypeV
     return R
 end
 
+# limit the complexity of type `t` to be simpler than the comparison type `compare`
+# no new values may be introduced, so the parameter `source` encodes the set of all values already present
+function limit_type_size(t::ANY, compare::ANY, source::ANY)
+    source = svec(unwrap_unionall(compare), unwrap_unionall(source))
+    source[1] === source[2] && (source = svec(source[1]))
+    type_more_complex(t, compare, source, TUPLE_COMPLEXITY_LIMIT_DEPTH) || return t
+    r = _limit_type_size(t, compare, source)
+    @assert t <: r
+    #@assert r === _limit_type_size(r, t, source) # this monotonicity constraint is slightly stronger than actually required,
+      # since we only actually need to demonstrate that repeated application would reaches a fixed point,
+      #not that it is already at the fixed point
+    return r
+end
+
+sym_isless(a::Symbol, b::Symbol) = ccall(:strcmp, Int32, (Ptr{UInt8}, Ptr{UInt8}), a, b) < 0
+
+function type_more_complex(t::ANY, c::ANY, sources::SimpleVector, tupledepth::Int)
+    # detect cases where the comparison is trivial
+    if t === c
+        return false
+    elseif t === Union{}
+        return false # Bottom is as simple as they come
+    elseif isa(t, DataType) && isempty(t.parameters)
+        return false # fastpath: unparameterized types are always finite
+    elseif tupledepth > 0 && isa(unwrap_unionall(t), DataType) && isa(c, Type) && c !== Union{} && c <: t
+        return false # t is already wider than the comparison in the type lattice
+    elseif tupledepth > 0 && is_derived_type_from_any(unwrap_unionall(t), sources)
+        return false # t isn't something new
+    end
+    # peel off wrappers
+    if isa(c, UnionAll)
+        # allow wrapping type with fewer UnionAlls than comparison if in a covariant context
+        if !isa(t, UnionAll) && tupledepth == 0
+            return true
+        end
+        t = unwrap_unionall(t)
+        c = unwrap_unionall(c)
+    end
+    # rules for various comparison types
+    if isa(c, TypeVar)
+        if isa(t, TypeVar)
+            return !(t.lb === Union{} || t.lb === c.lb) || # simplify lb towards Union{}
+                   type_more_complex(t.ub, c.ub, sources, tupledepth)
+        end
+        c.lb === Union{} || return true
+        return type_more_complex(t, c.ub, sources, max(tupledepth, 1)) # allow replacing a TypeVar with a concrete value
+    elseif isa(c, Union)
+        if isa(t, Union)
+            return type_more_complex(t.a, c.a, sources, tupledepth) ||
+                   type_more_complex(t.b, c.b, sources, tupledepth)
+        end
+        return type_more_complex(t, c.a, sources, tupledepth) &&
+               type_more_complex(t, c.b, sources, tupledepth)
+    elseif isa(t, Int) && isa(c, Int)
+        return t !== 1 # alternatively, could use !(0 <= t < c)
+    end
+    # base case for data types
+    if isa(t, DataType)
+        tP = t.parameters
+        if isa(c, DataType) && t.name === c.name
+            cP = c.parameters
+            length(cP) < length(tP) && return true
+            ntail = length(cP) - length(tP) # assume parameters were dropped from the tuple head
+            # allow creating variation within a nested tuple, but only so deep
+            if t.name === Tuple.name && tupledepth > 0
+                tupledepth -= 1
+            elseif !isvarargtype(t)
+                tupledepth = 0
+            end
+            isgenerator = (t.name.name === :Generator && t.name.module === _topmod(t.name.module))
+            for i = 1:length(tP)
+                tPi = tP[i]
+                cPi = cP[i + ntail]
+                if isgenerator
+                    let tPi = unwrap_unionall(tPi),
+                        cPi = unwrap_unionall(cPi)
+                        if isa(tPi, DataType) && isa(cPi, DataType) &&
+                                !tPi.abstract && !cPi.abstract &&
+                                sym_isless(cPi.name.name, tPi.name.name)
+                            # allow collect on (anonymous) Generators to nest, provided that their functions are appropriately ordered
+                            # TODO: is there a better way?
+                            continue
+                        end
+                    end
+                end
+                type_more_complex(tPi, cPi, sources, tupledepth) && return true
+            end
+            return false
+        end
+        if isType(t) # allow taking typeof any source type anywhere as Type{...}, as long as it isn't nesting Type{Type{...}}
+            tt = unwrap_unionall(t.parameters[1])
+            if isa(tt, DataType) && !isType(tt)
+                is_derived_type_from_any(tt, sources) || return true
+                return false
+            end
+        end
+    end
+    return true
+end
+
+function is_derived_type(t::ANY, c::ANY) # try to find `type` somewhere in `comparison` type
+    t === c && return true
+    if isa(c, TypeVar)
+        # see if it is replacing a TypeVar upper bound with something simpler
+        return is_derived_type(t, c.ub)
+    elseif isa(c, Union)
+        # see if it is one of the elements of the union
+        return is_derived_type(t, c.a) || is_derived_type(t, c.b)
+    elseif isa(c, UnionAll)
+        # see if it is derived from the body
+        return is_derived_type(t, c.body)
+    elseif isa(c, DataType)
+        if isa(t, DataType)
+            # see if it is one of the supertypes of a parameter
+            super = supertype(c)
+            while super !== Any
+                t === super && return true
+                super = supertype(super)
+            end
+        end
+        # see if it was extracted from a type parameter
+        cP = c.parameters
+        for p in cP
+            is_derived_type(t, p) && return true
+        end
+        if isleaftype(c) && isbits(c)
+            # see if it was extracted from a fieldtype
+            # however, only look through types that can be inlined
+            # to ensure monotonicity of derivation
+            # since we know that for immutable types,
+            # the field types must have been constructed prior to the type,
+            # it cannot have a reference cycle in the type graph
+            cF = c.types
+            for f in cF
+                is_derived_type(t, f) && return true
+            end
+        end
+    end
+    return false
+end
+
+function is_derived_type_from_any(t::ANY, sources::SimpleVector)
+    for s in sources
+        is_derived_type(t, s) && return true
+    end
+    return false
+end
+
+function _limit_type_size(t::ANY, c::ANY, sources::SimpleVector) # type vs. comparison which was derived from source
+    if t === c
+        return t # quick egal test
+    elseif t === Union{}
+        return t # easy case
+    elseif isa(t, DataType) && isempty(t.parameters)
+        return t # fast path: unparameterized are always simple
+    elseif isa(unwrap_unionall(t), DataType) && isa(c, Type) && c !== Union{} && c <: t
+        return t # t is already wider than the comparison in the type lattice
+    elseif is_derived_type_from_any(unwrap_unionall(t), sources)
+        return t # t isn't something new
+    end
+    if isa(t, TypeVar)
+        if isa(c, TypeVar)
+            if t.ub === c.ub && t.lb === c.lb
+                return t
+            end
+        end
+    elseif isa(t, Union)
+        if isa(c, Union)
+            a = _limit_type_size(t.a, c.a, sources)
+            b = _limit_type_size(t.b, c.b, sources)
+            return Union{a, b}
+        end
+    elseif isa(t, UnionAll)
+        if isa(c, UnionAll)
+            tv = t.var
+            cv = c.var
+            if tv.ub === cv.ub
+                if tv.lb === cv.lb
+                    return UnionAll(tv, _limit_type_size(t.body, c.body, sources))
+                end
+                ub = tv.ub
+            else
+                ub = _limit_type_size(tv.ub, cv.ub, sources)
+            end
+            if tv.lb === cv.lb
+                lb = tv.lb
+            else
+                # note: lower bounds need to be widened by making them lower
+                lb = Bottom
+            end
+            v2 = TypeVar(tv.name, lb, ub)
+            return UnionAll(v2, _limit_type_size(t{v2}, c{v2}, sources))
+        end
+        tbody = _limit_type_size(t.body, c, sources)
+        tbody === t.body && return t
+        return UnionAll(t.var, tbody)
+    elseif isa(t, DataType)
+        if isa(c, DataType)
+            tP = t.parameters
+            cP = c.parameters
+            if t.name === c.name && !isempty(cP)
+                if isvarargtype(t)
+                    VaT = _limit_type_size(tP[1], cP[1], sources)
+                    N = tP[2]
+                    if isa(N, TypeVar) || N === cP[2]
+                        return Vararg{VaT, N}
+                    end
+                    return Vararg{VaT}
+                elseif t.name === Tuple.name
+                    # for covariant datatypes (aka Tuple),
+                    # apply type-size limit elementwise
+                    np = min(length(tP), length(cP))
+                    Q = Any[ tP[i] for i in 1:np ]
+                    if length(tP) > np # implies Tuple
+                        # combine tp[np:end] into tP[np] using Vararg
+                        Q[np] = tuple_tail_elem(Bottom, Any[ tP[i] for i in np:length(tP) ])
+                    end
+                    for i = 1:np
+                        Q[i] = _limit_type_size(Q[i], cP[i], sources)
+                    end
+                    return Tuple{Q...}
+                end
+            end
+        end
+        if isType(t) # allow taking typeof as Type{...}, but ensure it doesn't start nesting
+            tt = unwrap_unionall(t.parameters[1])
+            if isa(tt, DataType) && !isType(tt)
+                is_derived_type_from_any(tt, sources) && return t
+            end
+        end
+        if isvarargtype(t)
+            # never replace Vararg with non-Vararg
+            return Vararg
+        end
+        widert = t.name.wrapper
+        if !(t <: widert)
+            # This can happen when a typevar has bounds too wide for its context, e.g.
+            # `Complex{T} where T` is not a subtype of `Complex`. In that case widen even
+            # faster to something safe to ensure the result is a supertype of the input.
+            return Any
+        end
+        return widert
+    end
+    return Any
+end
+
+
 const DataType_name_fieldindex = fieldindex(DataType, :name)
 const DataType_parameters_fieldindex = fieldindex(DataType, :parameters)
 const DataType_types_fieldindex = fieldindex(DataType, :types)
@@ -881,7 +1143,7 @@ function getfield_tfunc(s00::ANY, name)
         if isempty(s.parameters)
             return R
         end
-        return limit_type_depth(R, 0)
+        return limit_type_depth(R, MAX_TYPE_DEPTH)
     end
     fld = name.val
     if isa(fld,Symbol)
@@ -916,7 +1178,7 @@ function getfield_tfunc(s00::ANY, name)
     # conservatively limit the type depth here,
     # since the UnionAll type bound is otherwise incorrect
     # in the current type system
-    return rewrap_unionall(limit_type_depth(R, 0), s00)
+    return rewrap_unionall(limit_type_depth(R, MAX_TYPE_DEPTH), s00)
 end
 add_tfunc(getfield, 2, 2, (s::ANY, name::ANY) -> getfield_tfunc(s, name))
 add_tfunc(setfield!, 3, 3, (o::ANY, f::ANY, v::ANY) -> v)
@@ -1094,7 +1356,7 @@ function apply_type_tfunc(headtypetype::ANY, args::ANY...)
     if isvarargtype(headtype)
         return Type
     end
-    if uncertain && type_too_complex(appl,0)
+    if uncertain && type_too_complex(appl, MAX_TYPE_DEPTH)
         return Type{<:headtype}
     end
     if istuple
@@ -1407,59 +1669,36 @@ end
 
 function abstract_call_method(method::Method, f::ANY, sig::ANY, sparams::SimpleVector, sv::InferenceState)
     sigtuple = unwrap_unionall(sig)::DataType
-    recomputesvec = false
-
-    # limit argument type tuple growth
-    msig = unwrap_unionall(method.sig)
-    lsig = length(msig.parameters)
-    ls = length(sigtuple.parameters)
-    td = type_depth(sig)
-    mightlimitlength = ls > lsig + 1
-    mightlimitdepth = td > 2
-    limitlength = false
-    if mightlimitlength || mightlimitdepth
-        # TODO: FIXME: this heuristic depends on non-local state making type-inference unpredictable
+
+    tm = _topmod(sv)
+    if (# promote_typeof signature may be used with many arguments
+          !istopfunction(tm, f, :promote_typeof)
+        # assume getindex methods aren't directly recursive, since wrappers like ReshapedArrays won't look like it here
+        # should still manage to detect recursive growth either via other intermediate methods or actual type-equal signature recursion
+       && !istopfunction(tm, f, :getindex)
+       && !istopfunction(tm, f, :setindex!)
+        # the construct-to-convert method is a bottleneck in inference,
+        # so just assume that recursion will get prevented at some other point
+       && !(method.sig == Tuple{Type, Any}))
+        # otherwise: limit argument type tuple growth of all other functions
+        msig = unwrap_unionall(method.sig)
+        lsig = length(msig.parameters)
+        ls = length(sigtuple.parameters)
+        # look through the parents list to find the topmost
+        # function call to the same method
         cyclei = 0
         infstate = sv
+        topmost = nothing
         while infstate !== nothing
             infstate = infstate::InferenceState
             if method === infstate.linfo.def
-                if mightlimitlength && ls > length(unwrap_unionall(infstate.linfo.specTypes).parameters)
-                    limitlength = true
-                end
-                if mightlimitdepth && td > type_depth(infstate.linfo.specTypes)
-                    # impose limit if we recur and the argument types grow beyond MAX_TYPE_DEPTH
-                    if td > MAX_TYPE_DEPTH
-                        sig = limit_type_depth(sig, 0)
-                        sigtuple = unwrap_unionall(sig)
-                        recomputesvec = true
-                        break
-                    else
-                        p1, p2 = sigtuple.parameters, unwrap_unionall(infstate.linfo.specTypes).parameters
-                        if length(p2) == ls
-                            limitdepth = false
-                            newsig = Vector{Any}(ls)
-                            for i = 1:ls
-                                if p1[i] <: Function && type_depth(p1[i]) > type_depth(p2[i]) &&
-                                    isa(p1[i],DataType)
-                                    # if a Function argument is growing (e.g. nested closures)
-                                    # then widen to the outermost function type. without this
-                                    # inference fails to terminate on do_quadgk.
-                                    newsig[i] = p1[i].name.wrapper
-                                    limitdepth  = true
-                                else
-                                    newsig[i] = limit_type_depth(p1[i], 1)
-                                end
-                            end
-                            if limitdepth
-                                sigtuple = Tuple{newsig...}
-                                sig = rewrap_unionall(sigtuple, sig)
-                                recomputesvec = true
-                                break
-                            end
-                        end
-                    end
+                if infstate.linfo.specTypes == sig
+                    # avoid widening when detecting self-recursion
+                    # TODO: merge call cycle and return right away
+                    topmost = nothing
+                    break
                 end
+                topmost === nothing && (topmost = infstate)
             end
             # iterate through the cycle before walking to the parent
             if cyclei < length(infstate.callers_in_cycle)
@@ -1470,35 +1709,57 @@ function abstract_call_method(method::Method, f::ANY, sig::ANY, sparams::SimpleV
                 infstate = infstate.parent
             end
         end
-    end
 
-    # limit length based on size of definition signature.
-    # for example, given function f(T, Any...), limit to 3 arguments
-    # instead of the default (MAX_TUPLETYPE_LEN)
-    if limitlength
-        tm = _topmod(sv)
-        if !istopfunction(tm, f, :promote_typeof)
-            fst = sigtuple.parameters[lsig + 1]
-            allsame = true
-            # allow specializing on longer arglists if all the trailing
-            # arguments are the same, since there is no exponential
-            # blowup in this case.
-            for i = (lsig + 2):ls
-                if sigtuple.parameters[i] != fst
-                    allsame = false
-                    break
+        # TODO: FIXME: this heuristic depends on non-local state making type-inference unpredictable
+        # it also should be integrated into the cycle resolution and iterated to convergence
+        if topmost !== nothing
+            # impose limit if we recur on the same method and the argument type complexity is growing or is beyond MAX_TYPE_DEPTH
+            newsig = sig
+            if !isempty(topmost.callers_in_cycle)
+                # already discovered this method causes dependent self-recursion
+                # widen fully to avoid making the cycle any larger
+                newsig = method.sig
+            else
+                comparison = topmost.linfo.specTypes
+                if ls > lsig + 1 && ls > length(unwrap_unionall(comparison).parameters)
+                    # limit length based on size of definition signature.
+                    # for example, given function f(T, Any...), limit to 3 arguments
+                    # instead of the default (MAX_TUPLETYPE_LEN)
+                    fst = sigtuple.parameters[lsig + 1]
+                    allsame = true
+                    # allow specializing on longer arglists if all the trailing
+                    # arguments are the same, since there is no exponential
+                    # blowup in this case.
+                    for i = (lsig + 2):ls
+                        if sigtuple.parameters[i] != fst
+                            allsame = false
+                            break
+                        end
+                    end
+                    if !allsame
+                        sigtuple = limit_tuple_type_n(sigtuple, lsig + 1)
+                        newsig = rewrap_unionall(sigtuple, newsig)
+                    end
+                end
+                td = type_depth(newsig)
+                max_type_depth = min(MAX_TYPE_DEPTH, type_depth(comparison))
+                if td > max_type_depth
+                    # limit growth in type depth
+                    newsig = limit_type_depth(newsig, max_type_depth)
                 end
+                # see if the type is still too big, and limit it further if required
+                newsig = limit_type_size(newsig, comparison, sv.linfo.specTypes)
             end
-            if !allsame
-                sigtuple = limit_tuple_type_n(sigtuple, lsig + 1)
-                sig = rewrap_unionall(sigtuple, sig)
-                recomputesvec = true
+            if newsig !== sig
+                sig = newsig
+                sigtuple = unwrap_unionall(sig)
+                sparams = svec()
             end
         end
     end
 
     # if sig changed, may need to recompute the sparams environment
-    if isa(method.sig, UnionAll) && (recomputesvec || isempty(sparams))
+    if isa(method.sig, UnionAll) && isempty(sparams)
         recomputed = ccall(:jl_env_from_type_intersection, Ref{SimpleVector}, (Any, Any), sig, method.sig)
         sig = recomputed[1]
         if !isa(unwrap_unionall(sig), DataType) # probably Union{}
@@ -1565,7 +1826,7 @@ function precise_container_type(arg::ANY, typ::ANY, vtypes::VarTable, sv::Infere
         if isvatuple(tti0) && length(tti0.parameters) == 1
             return Any[Vararg{unwrapva(tti0.parameters[1])}]
         else
-            return tti0.parameters
+            return Any[ p for p in tti0.parameters ]
         end
     elseif tti0 <: Array
         return Any[Vararg{eltype(tti0)}]
@@ -2201,18 +2462,18 @@ function abstract_interpret(e::ANY, vtypes::VarTable, sv::InferenceState)
     return vtypes
 end
 
-function type_too_complex(t::ANY, d)
-    if d > MAX_TYPE_DEPTH
+function type_too_complex(t::ANY, d::Int)
+    if d < 0
         return true
-    elseif isa(t,Union)
-        return type_too_complex(t.a, d+1) || type_too_complex(t.b, d+1)
-    elseif isa(t,TypeVar)
-        return type_too_complex(t.lb,d+1) || type_too_complex(t.ub,d+1)
-    elseif isa(t,UnionAll)
+    elseif isa(t, Union)
+        return type_too_complex(t.a, d - 1) || type_too_complex(t.b, d - 1)
+    elseif isa(t, TypeVar)
+        return type_too_complex(t.lb, d - 1) || type_too_complex(t.ub, d - 1)
+    elseif isa(t, UnionAll)
         return type_too_complex(t.var, d) || type_too_complex(t.body, d)
-    elseif isa(t,DataType)
+    elseif isa(t, DataType)
         for x in (t.parameters)::SimpleVector
-            if type_too_complex(x, d+1)
+            if type_too_complex(x, d - 1)
                 return true
             end
         end
@@ -2317,7 +2578,7 @@ function tmerge(typea::ANY, typeb::ANY)
         end
     end
     u = Union{typea, typeb}
-    if unionlen(u) > MAX_TYPEUNION_LEN || type_too_complex(u, 0)
+    if unionlen(u) > MAX_TYPEUNION_LEN || type_too_complex(u, MAX_TYPE_DEPTH)
         # don't let type unions get too big
         # TODO: something smarter, like a common supertype
         return Any
@@ -2653,6 +2914,7 @@ function typeinf_edge(method::Method, atypes::ANY, sparams::SimpleVector, caller
     return frame.bestguess, nothing
 end
 
+
 #### entry points for inferring a MethodInstance given a type signature ####
 
 # compute an inferred AST and return type
diff --git a/base/linalg/dense.jl b/base/linalg/dense.jl
index c1dc43965e13b..590156356c06a 100644
--- a/base/linalg/dense.jl
+++ b/base/linalg/dense.jl
@@ -653,12 +653,15 @@ function inv(A::StridedMatrix{T}) where T
     AA = convert(AbstractArray{S}, A)
     if istriu(AA)
         Ai = inv(UpperTriangular(AA))
+        Ai = convert(typeof(parent(Ai)), Ai)
     elseif istril(AA)
         Ai = inv(LowerTriangular(AA))
+        Ai = convert(typeof(parent(Ai)), Ai)
     else
         Ai = inv(lufact(AA))
+        Ai = convert(typeof(parent(Ai)), Ai)
     end
-    return convert(typeof(parent(Ai)), Ai)
+    return Ai
 end
 
 """
diff --git a/base/multidimensional.jl b/base/multidimensional.jl
index 0f125757aca10..a5dfd377fbad6 100644
--- a/base/multidimensional.jl
+++ b/base/multidimensional.jl
@@ -435,13 +435,9 @@ getindex(t::Tuple,  i::CartesianIndex{1}) = getindex(t, i.I[1])
 # These are not defined on directly on getindex to avoid
 # ambiguities for AbstractArray subtypes. See the note in abstractarray.jl
 
-@generated function _getindex(l::IndexStyle, A::AbstractArray, I::Union{Real, AbstractArray}...)
-    N = length(I)
-    quote
-        @_inline_meta
-        @boundscheck checkbounds(A, I...)
-        _unsafe_getindex(l, _maybe_reshape(l, A, I...), I...)
-    end
+@inline function _getindex(l::IndexStyle, A::AbstractArray, I::Union{Real, AbstractArray}...)
+    @boundscheck checkbounds(A, I...)
+    return _unsafe_getindex(l, _maybe_reshape(l, A, I...), I...)
 end
 # But we can speed up IndexCartesian arrays by reshaping them to the appropriate dimensionality:
 _maybe_reshape(::IndexLinear, A::AbstractArray, I...) = A
@@ -450,31 +446,26 @@ _maybe_reshape(::IndexCartesian, A::AbstractVector, I...) = A
 @inline __maybe_reshape(A::AbstractArray{T,N}, ::NTuple{N,Any}) where {T,N} = A
 @inline __maybe_reshape(A::AbstractArray, ::NTuple{N,Any}) where {N} = reshape(A, Val{N})
 
-@generated function _unsafe_getindex(::IndexStyle, A::AbstractArray, I::Union{Real, AbstractArray}...)
-    N = length(I)
-    quote
-        # This is specifically not inlined to prevent exessive allocations in type unstable code
-        @nexprs $N d->(I_d = I[d])
-        shape = @ncall $N index_shape I
-        dest = similar(A, shape)
-        map(unsafe_length, indices(dest)) == map(unsafe_length, shape) || throw_checksize_error(dest, shape)
-        @ncall $N _unsafe_getindex! dest A I
-    end
+function _unsafe_getindex(::IndexStyle, A::AbstractArray, I::Vararg{Union{Real, AbstractArray}, N}) where N
+    # This is specifically not inlined to prevent excessive allocations in type unstable code
+    shape = index_shape(I...)
+    dest = similar(A, shape)
+    map(unsafe_length, indices(dest)) == map(unsafe_length, shape) || throw_checksize_error(dest, shape)
+    _unsafe_getindex!(dest, A, I...) # usually a generated function, don't allow it to impact inference result
+    return dest
 end
 
 # Always index with the exactly indices provided.
-@generated function _unsafe_getindex!(dest::AbstractArray, src::AbstractArray, I::Union{Real, AbstractArray}...)
-    N = length(I)
+@generated function _unsafe_getindex!(dest::AbstractArray, src::AbstractArray, I::Vararg{Union{Real, AbstractArray}, N}) where N
     quote
-        $(Expr(:meta, :inline))
-        @nexprs $N d->(J_d = I[d])
+        @_inline_meta
         D = eachindex(dest)
         Ds = start(D)
-        @inbounds @nloops $N j d->J_d begin
+        @inbounds @nloops $N j d->I[d] begin
             d, Ds = next(D, Ds)
             dest[d] = @ncall $N getindex src j
         end
-        dest
+        return dest
     end
 end
 
diff --git a/base/subarray.jl b/base/subarray.jl
index 9c2e31aae95e9..3a1b3ddf4d256 100644
--- a/base/subarray.jl
+++ b/base/subarray.jl
@@ -264,7 +264,7 @@ end
 # indexing uses the indices along the given dimension. Otherwise
 # linear indexing always starts with 1.
 compute_offset1(parent, stride1::Integer, I::Tuple) =
-    (@_inline_meta; compute_offset1(parent, stride1, find_extended_dims(I)..., I))
+    (@_inline_meta; compute_offset1(parent, stride1, find_extended_dims(1, I...), find_extended_inds(I...), I))
 compute_offset1(parent, stride1::Integer, dims::Tuple{Int}, inds::Tuple{Slice}, I::Tuple) =
     (@_inline_meta; compute_linindex(parent, I) - stride1*first(indices(parent, dims[1])))  # index-preserving case
 compute_offset1(parent, stride1::Integer, dims, inds, I::Tuple) =
@@ -287,12 +287,12 @@ function compute_linindex(f, s, IP::Tuple, I::Tuple{Any, Vararg{Any}})
 end
 compute_linindex(f, s, IP::Tuple, I::Tuple{}) = f
 
-find_extended_dims(I) = (@_inline_meta; _find_extended_dims((), (), 1, I...))
-_find_extended_dims(dims, inds, dim) = dims, inds
-_find_extended_dims(dims, inds, dim, ::ScalarIndex, I...) =
-    (@_inline_meta; _find_extended_dims(dims, inds, dim+1, I...))
-_find_extended_dims(dims, inds, dim, i1, I...) =
-    (@_inline_meta; _find_extended_dims((dims..., dim), (inds..., i1), dim+1, I...))
+find_extended_dims(dim, ::ScalarIndex, I...) = (@_inline_meta; find_extended_dims(dim + 1, I...))
+find_extended_dims(dim, i1, I...) = (@_inline_meta; (dim, find_extended_dims(dim + 1, I...)...))
+find_extended_dims(dim) = ()
+find_extended_inds(::ScalarIndex, I...) = (@_inline_meta; find_extended_inds(I...))
+find_extended_inds(i1, I...) = (@_inline_meta; (i1, find_extended_inds(I...)...))
+find_extended_inds() = ()
 
 unsafe_convert(::Type{Ptr{T}}, V::SubArray{T,N,P,<:Tuple{Vararg{RangeIndex}}}) where {T,N,P} =
     unsafe_convert(Ptr{T}, V.parent) + (first_index(V)-1)*sizeof(T)
diff --git a/doc/src/devdocs/functions.md b/doc/src/devdocs/functions.md
index f693d692f9499..70c509ec341d2 100644
--- a/doc/src/devdocs/functions.md
+++ b/doc/src/devdocs/functions.md
@@ -251,54 +251,3 @@ The next problem was the `@test` macro, which generated a 0-argument closure for
 This is not really necessary, since each test case is simply run once in place. Therefore I modified
 `@test` to expand to a try-catch block that records the test result (true, false, or exception
 raised) and calls the test suite handler on it.
-
-However this caused a new problem. When many tests are grouped together in a single function,
-e.g. a single top level expression, or some other test grouping function, that function could
-have a very large number of exception handlers. This triggered a kind of dataflow analysis worst
-case, where type inference spun around for minutes enumerating possible paths through the forest
-of handlers. This was fixed by simply bailing out of type inference when it encounters more than
-some number of handlers (currently 25). Presumably no performance-critical function will have
-more than 25 exception handlers. If one ever does, I'm willing to raise the limit to 26.
-
-A minor issue occurs during the bootstrap process due to storing all constructors in a single
-method table. In the second bootstrap step, where `inference.ji` is compiled using `inference0.ji`,
-constructors for `inference0`'s types remain in the table, so there are still references to the
-old inference module and `inference.ji` is 2x the size it should be. This was fixed in `dump.c` by
-filtering definitions from "replaced modules" out of method tables and caches before saving a
-system image. A "replaced module" is one that satisfies the condition `m != jl_get_global(m->parent, m->name)`
--- in other words, some newer module has taken its name and place.
-
-Another type inference worst case was triggered by the following code from the [QuadGK.jl package](https://github.com/JuliaMath/QuadGK.jl),
-formerly part of Base:
-
-```julia
-function do_quadgk(f, s, n, ::Type{Tw}, abstol, reltol, maxevals, nrm) where Tw
-    if eltype(s) <: Real # check for infinite or semi-infinite intervals
-        s1 = s[1]; s2 = s[end]; inf1 = isinf(s1); inf2 = isinf(s2)
-        if inf1 || inf2
-            if inf1 && inf2 # x = t/(1-t^2) coordinate transformation
-                return do_quadgk(t -> begin t2 = t*t; den = 1 / (1 - t2);
-                                            f(t*den) * (1+t2)*den*den; end,
-                                 map(x -> isinf(x) ? copysign(one(x), x) : 2x / (1+hypot(1,2x)), s),
-                                 n, Tw, abstol, reltol, maxevals, nrm)
-            end
-            s0,si = inf1 ? (s2,s1) : (s1,s2)
-            if si < 0 # x = s0 - t/(1-t)
-                return do_quadgk(t -> begin den = 1 / (1 - t);
-                                            f(s0 - t*den) * den*den; end,
-                                 reverse!(map(x -> 1 / (1 + 1 / (s0 - x)), s)),
-                                 n, Tw, abstol, reltol, maxevals, nrm)
-            else # x = s0 + t/(1-t)
-                return do_quadgk(t -> begin den = 1 / (1 - t);
-                                            f(s0 + t*den) * den*den; end,
-                                 map(x -> 1 / (1 + 1 / (x - s0)), s),
-                                 n, Tw, abstol, reltol, maxevals, nrm)
-            end
-        end
-    end
-```
-
-This code has a 3-way tail recursion, where each call wraps the current function argument `f`
-in a different new closure. Inference must consider 3^n (where n is the call depth) possible signatures.
-This blows up way too quickly, so logic was added to `typeinf_uncached` to immediately widen any
-argument that is a subtype of `Function` and that grows in depth down the stack.
diff --git a/test/inference.jl b/test/inference.jl
index 542f5fb032e3e..e4b8d33cd8b3f 100644
--- a/test/inference.jl
+++ b/test/inference.jl
@@ -810,7 +810,7 @@ for A in (1,)
 end
 
 # issue #21848
-@test Core.Inference.limit_type_depth(Ref{Complex{T} where T}, Core.Inference.MAX_TYPE_DEPTH) == Ref
+@test Core.Inference.limit_type_depth(Ref{Complex{T} where T}, 0) == Ref
 let T = Tuple{Tuple{Int64, Void},
               Tuple{Tuple{Int64, Void},
                     Tuple{Int64, Tuple{Tuple{Int64, Void},
@@ -910,3 +910,31 @@ end
 @test isdefined_tfunc(Tuple{Any,Vararg{Any}}, Const(1)) === Const(true)
 @test isdefined_tfunc(Tuple{Any,Vararg{Any}}, Const(2)) === Bool
 @test isdefined_tfunc(Tuple{Any,Vararg{Any}}, Const(3)) === Bool
+
+# demonstrate that inference must converge
+# while doing constant propagation
+Base.@pure plus1(x) = x + 1
+f21933(x::Val{T}) where {T} = f(Val{plus1(T)}())
+@code_typed f21933(Val{1}())
+Base.return_types(f21933, (Val{1},))
+
+function count_specializations(method::Method)
+    n = 0
+    Base.visit(method.specializations) do m
+        n += 1
+    end
+    return n::Int
+end
+
+# demonstrate that inference can complete without waiting for MAX_TUPLETYPE_LEN or MAX_TYPE_DEPTH
+copy_dims_out(out) = ()
+copy_dims_out(out, dim::Int, tail...) =  copy_dims_out((out..., dim), tail...)
+copy_dims_out(out, dim::Colon, tail...) = copy_dims_out((out..., dim), tail...)
+@test Base.return_types(copy_dims_out, (Tuple{}, Vararg{Union{Int,Colon}})) == Any[Tuple{}, Tuple{}, Tuple{}]
+@test all(m -> 2 < count_specializations(m) < 15, methods(copy_dims_out))
+
+copy_dims_pair(out) = ()
+copy_dims_pair(out, dim::Int, tail...) =  copy_dims_out(out => dim, tail...)
+copy_dims_pair(out, dim::Colon, tail...) = copy_dims_out(out => dim, tail...)
+@test Base.return_types(copy_dims_pair, (Tuple{}, Vararg{Union{Int,Colon}})) == Any[Tuple{}, Tuple{}, Tuple{}]
+@test all(m -> 5 < count_specializations(m) < 25, methods(copy_dims_out))