SciML · ChrisRackauckas · Sep 29, 2023 · Sep 14, 2023 · Sep 14, 2023 · Sep 14, 2023
diff --git a/src/trustRegion.jl b/src/trustRegion.jl
@@ -203,6 +203,7 @@
     shrink_counter::Int
     du
     u_tmp
+    u_gauss_newton
     u_cauchy
     fu_new
     make_new_J::Bool
@@ -229,6 +230,7 @@
         linsolve_kwargs)
     u_tmp = zero(u)
     u_cauchy = zero(u)
+    u_gauss_newton = zero(u)
 
     loss_new = loss
     H = zero(J)
@@ -246,8 +248,8 @@
    # set default type for all trust region parameters
    trustType = floatType
    if radius_update_scheme == RadiusUpdateSchemes.NLsolve
        max_trust_radius = convert(trustType, Inf)
        initial_trust_radius = norm(u0) > 0 ? convert(trustType, norm(u0)) : one(trustType)
    else
        max_trust_radius = convert(trustType, alg.max_trust_radius)
        if iszero(max_trust_radius)
@@ -265,14 +267,13 @@
     expand_factor = convert(trustType, alg.expand_factor)
 
     # Parameters for the Schemes
-    floatType = typeof(r)
     p1 = convert(floatType, 0.0)
     p2 = convert(floatType, 0.0)
     p3 = convert(floatType, 0.0)
    p4 = convert(floatType, 0.0)
    ϵ = convert(floatType, 1.0e-8)
    if radius_update_scheme === RadiusUpdateSchemes.NLsolve
        p1 = convert(floatType, 0.5)
    elseif radius_update_scheme === RadiusUpdateSchemes.Hei
        step_threshold = convert(trustType, 0.0)
        shrink_threshold = convert(trustType, 0.25)
@@ -321,28 +322,30 @@
         jac_cache, false, maxiters, internalnorm, ReturnCode.Default, abstol, prob,
         radius_update_scheme, initial_trust_radius, max_trust_radius, step_threshold,
         shrink_threshold, expand_threshold, shrink_factor, expand_factor, loss, loss_new,
-        H, g, shrink_counter, du, u_tmp, u_cauchy, fu_new, make_new_J, r, p1, p2, p3, p4, ϵ,
+        H, g, shrink_counter, du, u_tmp, u_gauss_newton, u_cauchy, fu_new, make_new_J, r, p1, p2, p3, p4, ϵ,
         NLStats(1, 0, 0, 0, 0))
 end
 
 isinplace(::TrustRegionCache{iip}) where {iip} = iip
 
 function perform_step!(cache::TrustRegionCache{true})
-    @unpack make_new_J, J, fu, f, u, p, u_tmp, alg, linsolve = cache
+    @unpack make_new_J, J, fu, f, u, p, u_gauss_newton, alg, linsolve = cache
     if cache.make_new_J
         jacobian!!(J, cache)
         mul!(cache.H, J', J)
         mul!(cache.g, J', fu)
         cache.stats.njacs += 1
-    end
 
-    # do not use A = cache.H, b = _vec(cache.g) since it is equivalent 
-    # to  A = cache.J, b = _vec(fu) as long as the Jacobian is non-singular
-    linres = dolinsolve(alg.precs, linsolve, A = J, b = _vec(fu), 
-        linu = _vec(u_tmp),
+        # do not use A = cache.H, b = _vec(cache.g) since it is equivalent 
+        # to  A = cache.J, b = _vec(fu) as long as the Jacobian is non-singular
+        linres = dolinsolve(alg.precs, linsolve, A = J, b = _vec(fu), 
+                            linu = _vec(u_gauss_newton),
         p = p, reltol = cache.abstol)
-    cache.linsolve = linres.cache
-    cache.u_tmp .= -1 .* u_tmp
+        cache.linsolve = linres.cache
+        @. cache.u_gauss_newton = -1 * u_gauss_newton
+    end
+
+    # Compute dogleg step
     dogleg!(cache)
 
     # Compute the potentially new u
@@ -363,11 +366,10 @@
         cache.H = J' * J
         cache.g = J' * fu
         cache.stats.njacs += 1
+        cache.u_gauss_newton = -1 .* (cache.H \ cache.g)
     end
 
-    @unpack g, H = cache
     # Compute the Newton step.
-    cache.u_tmp = -1 .* (H \ g)
     dogleg!(cache)
 
     # Compute the potentially new u
@@ -435,42 +437,42 @@

    elseif radius_update_scheme === RadiusUpdateSchemes.NLsolve
        # accept/reject decision
        if r > cache.step_threshold # accept
            take_step!(cache)
            cache.loss = cache.loss_new
            cache.make_new_J = true
        else # reject
            cache.make_new_J = false
        end

        # trust region update 
        if r < 1//10 # cache.shrink_threshold 
            cache.trust_r *= 1//2 # cache.shrink_factor 
        elseif r >= 9//10 # cache.expand_threshold 
            cache.trust_r = 2 * norm(cache.du) # cache.expand_factor * norm(cache.du) 
        elseif r >= 1//2 # cache.p1 
            cache.trust_r = max(cache.trust_r, 2*norm(cache.du)) # cache.expand_factor * norm(cache.du))
        end

        # convergence test
        if iszero(cache.fu) || cache.internalnorm(cache.fu) < cache.abstol
            cache.force_stop = true
        end

    elseif radius_update_scheme === RadiusUpdateSchemes.NW
        # accept/reject decision
        if r > cache.step_threshold # accept
            take_step!(cache)
            cache.loss = cache.loss_new
            cache.make_new_J = true
        else # reject
            cache.make_new_J = false
        end

        if r < 1 // 4
            cache.trust_r = (1 // 4) * norm(cache.du)
        elseif (r > (3 // 4)) && abs(norm(cache.du) - cache.trust_r)/cache.trust_r < 1e-6
            cache.trust_r = min(2*cache.trust_r, cache.max_trust_r)
        end  

    elseif radius_update_scheme === RadiusUpdateSchemes.Hei
@@ -566,41 +568,42 @@
 end
 
 function dogleg!(cache::TrustRegionCache{true})
-    @unpack u_tmp, u_cauchy, trust_r = cache
+    @unpack u_tmp, u_gauss_newton, u_cauchy, trust_r = cache
 
     # Take the full Gauss-Newton step if lies within the trust region.
-    if norm(u_tmp) ≤ trust_r
-        cache.du .= u_tmp 
+    if norm(u_gauss_newton) ≤ trust_r
+        cache.du .= u_gauss_newton
         return
     end
 
     # Take intersection of steepest descent direction and trust region if Cauchy point lies outside of trust region
     l_grad = norm(cache.g) # length of the gradient
     d_cauchy = l_grad^3 / dot(cache.g, cache.H, cache.g) # distance of the cauchy point from the current iterate
-    if d_cauchy > trust_r 
+    if d_cauchy >= trust_r 
         @. cache.du = - (trust_r/l_grad) * cache.g # step to the end of the trust region
         return
     end
-    
+
     # Take the intersection of dogled with trust region if Cauchy point lies inside the trust region
     @. u_cauchy = - (d_cauchy/l_grad) * cache.g # compute Cauchy point
-    @. u_tmp -= u_cauchy # calf of the dogleg -- use u_tmp to avoid allocation
+    @. u_tmp = u_gauss_newton - u_cauchy # calf of the dogleg -- use u_tmp to avoid allocation
+
     a = dot(u_tmp, u_tmp)
     b = 2*dot(u_cauchy, u_tmp)
     c = d_cauchy^2 - trust_r^2
    aux = max(b^2 - 4*a*c, 0.0) # technically guaranteed to be non-negative but hedging against floating point issues
    τ = (-b + sqrt(aux)) / (2*a) # stepsize along dogleg to trust region boundary

    @. cache.du = u_cauchy + τ * u_tmp
 end
 
 
 function dogleg!(cache::TrustRegionCache{false})
-    @unpack u_tmp, u_cauchy, trust_r = cache
+    @unpack u_tmp, u_gauss_newton, u_cauchy, trust_r = cache
 
     # Take the full Gauss-Newton step if lies within the trust region.
-    if norm(u_tmp) ≤ trust_r
-        cache.du = deepcopy(u_tmp)
+    if norm(u_gauss_newton) ≤ trust_r
+        cache.du = deepcopy(u_gauss_newton)
         return
     end
 
@@ -614,7 +617,7 @@
 
     # Take the intersection of dogled with trust region if Cauchy point lies inside the trust region
     u_cauchy = - (d_cauchy/l_grad) * cache.g # compute Cauchy point
-    u_tmp -= u_cauchy # calf of the dogleg -- use u_tmp to avoid allocation
+    u_tmp = u_gauss_newton - u_cauchy # calf of the dogleg
     a = dot(u_tmp, u_tmp)
     b = 2*dot(u_cauchy, u_tmp)
     c = d_cauchy^2 - trust_r^2