diff --git a/src/trustRegion.jl b/src/trustRegion.jl
index 20577ef67..f2ccb7f63 100644
--- a/src/trustRegion.jl
+++ b/src/trustRegion.jl
@@ -25,6 +25,20 @@ states as `RadiusUpdateSchemes.T`. Simply put the desired scheme as follows:
     """
     Simple
 
+    """
+    `RadiusUpdateSchemes.NLsolve`
+
+    The same updating scheme as in NLsolve's (https://github.com/JuliaNLSolvers/NLsolve.jl) trust region dogleg implementation. 
+    """
+    NLsolve
+
+    """
+    `RadiusUpdateSchemes.NocedalWright`
+
+    Trust region updating scheme as in Nocedal and Wright [see Alg 11.5, page 291]. 
+    """
+    NocedalWright
+
     """
     `RadiusUpdateSchemes.Hei`
 
@@ -146,7 +160,7 @@ end
 function TrustRegion(; concrete_jac = nothing, linsolve = nothing, precs = DEFAULT_PRECS,
     radius_update_scheme::RadiusUpdateSchemes.T = RadiusUpdateSchemes.Simple, #defaults to conventional radius update
     max_trust_radius::Real = 0 // 1, initial_trust_radius::Real = 0 // 1,
-    step_threshold::Real = 1 // 10, shrink_threshold::Real = 1 // 4,
+    step_threshold::Real = 1 // 10000, shrink_threshold::Real = 1 // 4,
     expand_threshold::Real = 3 // 4, shrink_factor::Real = 1 // 4,
     expand_factor::Real = 2 // 1, max_shrink_times::Int = 32, adkwargs...)
     ad = default_adargs_to_adtype(; adkwargs...)
@@ -187,8 +201,10 @@ end
     H
     g
     shrink_counter::Int
-    step_size
+    du
     u_tmp
+    u_gauss_newton
+    u_cauchy
     fu_new
     make_new_J::Bool
     r::floatType
@@ -212,55 +228,68 @@ function SciMLBase.__init(prob::NonlinearProblem{uType, iip}, alg::TrustRegion,
     loss = get_loss(fu1)
     uf, linsolve, J, fu2, jac_cache, du = jacobian_caches(alg, f, u, p, Val(iip);
         linsolve_kwargs)
-
-    radius_update_scheme = alg.radius_update_scheme
-    max_trust_radius = convert(eltype(u), alg.max_trust_radius)
-    initial_trust_radius = convert(eltype(u), alg.initial_trust_radius)
-    step_threshold = convert(eltype(u), alg.step_threshold)
-    shrink_threshold = convert(eltype(u), alg.shrink_threshold)
-    expand_threshold = convert(eltype(u), alg.expand_threshold)
-    shrink_factor = convert(eltype(u), alg.shrink_factor)
-    expand_factor = convert(eltype(u), alg.expand_factor)
-    # Set default trust region radius if not specified
-    if iszero(max_trust_radius)
-        max_trust_radius = convert(eltype(u), max(norm(fu1), maximum(u) - minimum(u)))
-    end
-    if iszero(initial_trust_radius)
-        initial_trust_radius = convert(eltype(u), max_trust_radius / 11)
-    end
+    u_tmp = zero(u)
+    u_cauchy = zero(u)
+    u_gauss_newton = zero(u)
 
     loss_new = loss
     H = zero(J)
     g = _mutable_zero(fu1)
     shrink_counter = 0
-    step_size = zero(u)
     fu_new = zero(fu1)
     make_new_J = true
     r = loss
 
+    floatType = typeof(r)
+
+    # set trust region update scheme
+    radius_update_scheme = alg.radius_update_scheme
+
+    # set default type for all trust region parameters
+    trustType = floatType
+    if radius_update_scheme == RadiusUpdateSchemes.NLsolve
+        max_trust_radius = convert(trustType, Inf)
+        initial_trust_radius = norm(u0) > 0 ? convert(trustType, norm(u0)) : one(trustType)
+    else
+        max_trust_radius = convert(trustType, alg.max_trust_radius)
+        if iszero(max_trust_radius)
+            max_trust_radius = convert(trustType, max(norm(fu1), maximum(u) - minimum(u)))
+        end
+        initial_trust_radius = convert(trustType, alg.initial_trust_radius)
+        if iszero(initial_trust_radius)
+            initial_trust_radius = convert(trustType, max_trust_radius / 11)
+        end
+    end
+    step_threshold = convert(trustType, alg.step_threshold)
+    shrink_threshold = convert(trustType, alg.shrink_threshold)
+    expand_threshold = convert(trustType, alg.expand_threshold)
+    shrink_factor = convert(trustType, alg.shrink_factor)
+    expand_factor = convert(trustType, alg.expand_factor)
+
     # Parameters for the Schemes
-    p1 = convert(eltype(u), 0.0)
-    p2 = convert(eltype(u), 0.0)
-    p3 = convert(eltype(u), 0.0)
-    p4 = convert(eltype(u), 0.0)
-    ϵ = convert(eltype(u), 1.0e-8)
-    if radius_update_scheme === RadiusUpdateSchemes.Hei
-        step_threshold = convert(eltype(u), 0.0)
-        shrink_threshold = convert(eltype(u), 0.25)
-        expand_threshold = convert(eltype(u), 0.25)
-        p1 = convert(eltype(u), 5.0) # M
-        p2 = convert(eltype(u), 0.1) # β
-        p3 = convert(eltype(u), 0.15) # γ1
-        p4 = convert(eltype(u), 0.15) # γ2
-        initial_trust_radius = convert(eltype(u), 1.0)
+    p1 = convert(floatType, 0.0)
+    p2 = convert(floatType, 0.0)
+    p3 = convert(floatType, 0.0)
+    p4 = convert(floatType, 0.0)
+    ϵ = convert(floatType, 1.0e-8)
+    if radius_update_scheme === RadiusUpdateSchemes.NLsolve
+        p1 = convert(floatType, 0.5)
+    elseif radius_update_scheme === RadiusUpdateSchemes.Hei
+        step_threshold = convert(trustType, 0.0)
+        shrink_threshold = convert(trustType, 0.25)
+        expand_threshold = convert(trustType, 0.25)
+        p1 = convert(floatType, 5.0)  # M
+        p2 = convert(floatType, 0.1)  # β
+        p3 = convert(floatType, 0.15) # γ1
+        p4 = convert(floatType, 0.15) # γ2
+        initial_trust_radius = convert(trustType, 1.0)
     elseif radius_update_scheme === RadiusUpdateSchemes.Yuan
-        step_threshold = convert(eltype(u), 0.0001)
-        shrink_threshold = convert(eltype(u), 0.25)
-        expand_threshold = convert(eltype(u), 0.25)
-        p1 = convert(eltype(u), 2.0) # μ
-        p2 = convert(eltype(u), 1 / 6) # c5
-        p3 = convert(eltype(u), 6.0) # c6
-        p4 = convert(eltype(u), 0.0)
+        step_threshold = convert(trustType, 0.0001)
+        shrink_threshold = convert(trustType, 0.25)
+        expand_threshold = convert(trustType, 0.25)
+        p1 = convert(floatType, 2.0)   # μ
+        p2 = convert(floatType, 1 / 6) # c5
+        p3 = convert(floatType, 6.0)   # c6
         if iip
             auto_jacvec!(g, (fu, x) -> f(fu, x, p), u, fu1)
         else
@@ -270,54 +299,58 @@ function SciMLBase.__init(prob::NonlinearProblem{uType, iip}, alg::TrustRegion,
                 g = auto_jacvec(x -> f(x, p), u, fu1)
             end
         end
-        initial_trust_radius = convert(eltype(u), p1 * norm(g))
+        initial_trust_radius = convert(trustType, p1 * norm(g))
     elseif radius_update_scheme === RadiusUpdateSchemes.Fan
-        step_threshold = convert(eltype(u), 0.0001)
-        shrink_threshold = convert(eltype(u), 0.25)
-        expand_threshold = convert(eltype(u), 0.75)
-        p1 = convert(eltype(u), 0.1) # μ
-        p2 = convert(eltype(u), 1 / 4) # c5
-        p3 = convert(eltype(u), 12) # c6
-        p4 = convert(eltype(u), 1.0e18) # M
-        initial_trust_radius = convert(eltype(u), p1 * (norm(fu1)^0.99))
+        step_threshold = convert(trustType, 0.0001)
+        shrink_threshold = convert(trustType, 0.25)
+        expand_threshold = convert(trustType, 0.75)
+        p1 = convert(floatType, 0.1) # μ
+        p2 = convert(floatType, 0.25) # c5
+        p3 = convert(floatType, 12.0) # c6
+        p4 = convert(floatType, 1.0e18) # M
+        initial_trust_radius = convert(trustType, p1 * (norm(fu1)^0.99))
     elseif radius_update_scheme === RadiusUpdateSchemes.Bastin
-        step_threshold = convert(eltype(u), 0.05)
-        shrink_threshold = convert(eltype(u), 0.05)
-        expand_threshold = convert(eltype(u), 0.9)
-        p1 = convert(eltype(u), 2.5)  #alpha_1
-        p2 = convert(eltype(u), 0.25) # alpha_2
-        p3 = convert(eltype(u), 0) # not required
-        p4 = convert(eltype(u), 0) # not required
-        initial_trust_radius = convert(eltype(u), 1.0)
+        step_threshold = convert(trustType, 0.05)
+        shrink_threshold = convert(trustType, 0.05)
+        expand_threshold = convert(trustType, 0.9)
+        p1 = convert(floatType, 2.5)  # alpha_1
+        p2 = convert(floatType, 0.25) # alpha_2
+        initial_trust_radius = convert(trustType, 1.0)
     end
 
     return TrustRegionCache{iip}(f, alg, u_prev, u, fu_prev, fu1, fu2, p, uf, linsolve, J,
         jac_cache, false, maxiters, internalnorm, ReturnCode.Default, abstol, prob,
         radius_update_scheme, initial_trust_radius, max_trust_radius, step_threshold,
         shrink_threshold, expand_threshold, shrink_factor, expand_factor, loss, loss_new,
-        H, g, shrink_counter, step_size, du, fu_new, make_new_J, r, p1, p2, p3, p4, ϵ,
+        H, g, shrink_counter, du, u_tmp, u_gauss_newton, u_cauchy, fu_new, make_new_J, r,
+        p1, p2, p3, p4, ϵ,
         NLStats(1, 0, 0, 0, 0))
 end
 
 isinplace(::TrustRegionCache{iip}) where {iip} = iip
 
 function perform_step!(cache::TrustRegionCache{true})
-    @unpack make_new_J, J, fu, f, u, p, u_tmp, alg, linsolve = cache
+    @unpack make_new_J, J, fu, f, u, p, u_gauss_newton, alg, linsolve = cache
     if cache.make_new_J
         jacobian!!(J, cache)
-        mul!(cache.H, J, J)
-        mul!(cache.g, J, fu)
+        mul!(cache.H, J', J)
+        mul!(cache.g, J', fu)
         cache.stats.njacs += 1
+
+        # do not use A = cache.H, b = _vec(cache.g) since it is equivalent 
+        # to  A = cache.J, b = _vec(fu) as long as the Jacobian is non-singular
+        linres = dolinsolve(alg.precs, linsolve, A = J, b = _vec(fu),
+            linu = _vec(u_gauss_newton),
+            p = p, reltol = cache.abstol)
+        cache.linsolve = linres.cache
+        @. cache.u_gauss_newton = -1 * u_gauss_newton
     end
 
-    linres = dolinsolve(alg.precs, linsolve; A = cache.H, b = _vec(cache.g),
-        linu = _vec(u_tmp), p, reltol = cache.abstol)
-    cache.linsolve = linres.cache
-    cache.u_tmp .= -1 .* u_tmp
+    # Compute dogleg step
     dogleg!(cache)
 
     # Compute the potentially new u
-    cache.u_tmp .= u .+ cache.step_size
+    @. cache.u_tmp = u + cache.du
     f(cache.fu_new, cache.u_tmp, p)
     trust_region_step!(cache)
     cache.stats.nf += 1
@@ -331,18 +364,18 @@ function perform_step!(cache::TrustRegionCache{false})
 
     if make_new_J
         J = jacobian!!(cache.J, cache)
-        cache.H = J * J
-        cache.g = J * fu
+        cache.H = J' * J
+        cache.g = J' * fu
         cache.stats.njacs += 1
+        cache.u_gauss_newton = -1 .* (cache.H \ cache.g)
     end
 
-    @unpack g, H = cache
     # Compute the Newton step.
-    cache.u_tmp = -H \ g
     dogleg!(cache)
 
     # Compute the potentially new u
-    cache.u_tmp = u .+ cache.step_size
+    cache.u_tmp = u + cache.du
+
     cache.fu_new = f(cache.u_tmp, p)
     trust_region_step!(cache)
     cache.stats.nf += 1
@@ -355,25 +388,25 @@ function retrospective_step!(cache::TrustRegionCache)
     @unpack J, fu_prev, fu, u_prev, u = cache
     J = jacobian!!(deepcopy(J), cache)
     if J isa Number
-        cache.H = J * J
-        cache.g = J * fu
+        cache.H = J' * J
+        cache.g = J' * fu
     else
-        mul!(cache.H, J, J)
-        mul!(cache.g, J, fu)
+        mul!(cache.H, J', J)
+        mul!(cache.g, J', fu)
     end
     cache.stats.njacs += 1
-    @unpack H, g, step_size = cache
+    @unpack H, g, du = cache
 
     return -(get_loss(fu_prev) - get_loss(fu)) /
-           (step_size' * g + step_size' * H * step_size / 2)
+           (dot(du, g) + dot(du, H, du) / 2)
 end
 
 function trust_region_step!(cache::TrustRegionCache)
-    @unpack fu_new, step_size, g, H, loss, max_trust_r, radius_update_scheme = cache
+    @unpack fu_new, du, g, H, loss, max_trust_r, radius_update_scheme = cache
     cache.loss_new = get_loss(fu_new)
 
     # Compute the ratio of the actual reduction to the predicted reduction.
-    cache.r = -(loss - cache.loss_new) / (step_size' * g + step_size' * H * step_size / 2)
+    cache.r = -(loss - cache.loss_new) / (dot(du, g) + dot(du, H, du) / 2)
     @unpack r = cache
 
     if radius_update_scheme === RadiusUpdateSchemes.Simple
@@ -403,6 +436,51 @@ function trust_region_step!(cache::TrustRegionCache)
             cache.force_stop = true
         end
 
+    elseif radius_update_scheme === RadiusUpdateSchemes.NLsolve
+        # accept/reject decision
+        if r > cache.step_threshold # accept
+            take_step!(cache)
+            cache.loss = cache.loss_new
+            cache.make_new_J = true
+        else # reject
+            cache.make_new_J = false
+        end
+
+        # trust region update 
+        if r < 1 // 10 # cache.shrink_threshold 
+            cache.trust_r *= 1 // 2 # cache.shrink_factor 
+        elseif r >= 9 // 10 # cache.expand_threshold 
+            cache.trust_r = 2 * norm(cache.du) # cache.expand_factor * norm(cache.du) 
+        elseif r >= 1 // 2 # cache.p1 
+            cache.trust_r = max(cache.trust_r, 2 * norm(cache.du)) # cache.expand_factor * norm(cache.du))
+        end
+
+        # convergence test
+        if iszero(cache.fu) || cache.internalnorm(cache.fu) < cache.abstol
+            cache.force_stop = true
+        end
+
+    elseif radius_update_scheme === RadiusUpdateSchemes.NocedalWright
+        # accept/reject decision
+        if r > cache.step_threshold # accept
+            take_step!(cache)
+            cache.loss = cache.loss_new
+            cache.make_new_J = true
+        else # reject
+            cache.make_new_J = false
+        end
+
+        if r < 1 // 4
+            cache.trust_r = (1 // 4) * norm(cache.du)
+        elseif (r > (3 // 4)) && abs(norm(cache.du) - cache.trust_r) / cache.trust_r < 1e-6
+            cache.trust_r = min(2 * cache.trust_r, cache.max_trust_r)
+        end
+
+        # convergence test
+        if iszero(cache.fu) || cache.internalnorm(cache.fu) < cache.abstol
+            cache.force_stop = true
+        end
+
     elseif radius_update_scheme === RadiusUpdateSchemes.Hei
         if r > cache.step_threshold
             take_step!(cache)
@@ -413,14 +491,14 @@ function trust_region_step!(cache::TrustRegionCache)
         end
         # Hei's radius update scheme
         @unpack shrink_threshold, p1, p2, p3, p4 = cache
-        if rfunc(r, shrink_threshold, p1, p3, p4, p2) * cache.internalnorm(step_size) <
+        if rfunc(r, shrink_threshold, p1, p3, p4, p2) * cache.internalnorm(du) <
            cache.trust_r
             cache.shrink_counter += 1
         else
             cache.shrink_counter = 0
         end
         cache.trust_r = rfunc(r, shrink_threshold, p1, p3, p4, p2) *
-                        cache.internalnorm(step_size)
+                        cache.internalnorm(du)
 
         if iszero(cache.fu) || cache.internalnorm(cache.fu) < cache.abstol ||
            cache.internalnorm(g) < cache.ϵ
@@ -432,7 +510,7 @@ function trust_region_step!(cache::TrustRegionCache)
             cache.p1 = cache.p2 * cache.p1
             cache.shrink_counter += 1
         elseif r >= cache.expand_threshold &&
-               cache.internalnorm(step_size) > cache.trust_r / 2
+               cache.internalnorm(du) > cache.trust_r / 2
             cache.p1 = cache.p3 * cache.p1
             cache.shrink_counter = 0
         end
@@ -481,7 +559,7 @@ function trust_region_step!(cache::TrustRegionCache)
             cache.loss = cache.loss_new
             cache.make_new_J = true
             if retrospective_step!(cache) >= cache.expand_threshold
-                cache.trust_r = max(cache.p1 * cache.internalnorm(step_size), cache.trust_r)
+                cache.trust_r = max(cache.p1 * cache.internalnorm(du), cache.trust_r)
             end
 
         else
@@ -495,31 +573,63 @@ function trust_region_step!(cache::TrustRegionCache)
     end
 end
 
-function dogleg!(cache::TrustRegionCache)
-    @unpack u_tmp, trust_r = cache
+function dogleg!(cache::TrustRegionCache{true})
+    @unpack u_tmp, u_gauss_newton, u_cauchy, trust_r = cache
+
+    # Take the full Gauss-Newton step if lies within the trust region.
+    if norm(u_gauss_newton) ≤ trust_r
+        cache.du .= u_gauss_newton
+        return
+    end
+
+    # Take intersection of steepest descent direction and trust region if Cauchy point lies outside of trust region
+    l_grad = norm(cache.g) # length of the gradient
+    d_cauchy = l_grad^3 / dot(cache.g, cache.H, cache.g) # distance of the cauchy point from the current iterate
+    if d_cauchy >= trust_r
+        @. cache.du = -(trust_r / l_grad) * cache.g # step to the end of the trust region
+        return
+    end
+
+    # Take the intersection of dogled with trust region if Cauchy point lies inside the trust region
+    @. u_cauchy = -(d_cauchy / l_grad) * cache.g # compute Cauchy point
+    @. u_tmp = u_gauss_newton - u_cauchy # calf of the dogleg -- use u_tmp to avoid allocation
+
+    a = dot(u_tmp, u_tmp)
+    b = 2 * dot(u_cauchy, u_tmp)
+    c = d_cauchy^2 - trust_r^2
+    aux = max(b^2 - 4 * a * c, 0.0) # technically guaranteed to be non-negative but hedging against floating point issues
+    τ = (-b + sqrt(aux)) / (2 * a) # stepsize along dogleg to trust region boundary
+
+    @. cache.du = u_cauchy + τ * u_tmp
+end
+
+function dogleg!(cache::TrustRegionCache{false})
+    @unpack u_tmp, u_gauss_newton, u_cauchy, trust_r = cache
 
-    # Test if the full step is within the trust region.
-    if norm(u_tmp) ≤ trust_r
-        cache.step_size = deepcopy(u_tmp)
+    # Take the full Gauss-Newton step if lies within the trust region.
+    if norm(u_gauss_newton) ≤ trust_r
+        cache.du = deepcopy(u_gauss_newton)
         return
     end
 
-    # Calcualte Cauchy point, optimum along the steepest descent direction.
-    δsd = -cache.g
-    norm_δsd = norm(δsd)
-    if norm_δsd ≥ trust_r
-        cache.step_size = δsd .* trust_r / norm_δsd
+    ## Take intersection of steepest descent direction and trust region if Cauchy point lies outside of trust region
+    l_grad = norm(cache.g)
+    d_cauchy = l_grad^3 / dot(cache.g, cache.H, cache.g) # distance of the cauchy point from the current iterate
+    if d_cauchy > trust_r # cauchy point lies outside of trust region
+        cache.du = -(trust_r / l_grad) * cache.g # step to the end of the trust region
         return
     end
 
-    # Find the intersection point on the boundary.
-    N_sd = u_tmp - δsd
-    dot_N_sd = dot(N_sd, N_sd)
-    dot_sd_N_sd = dot(δsd, N_sd)
-    dot_sd = dot(δsd, δsd)
-    fact = dot_sd_N_sd^2 - dot_N_sd * (dot_sd - trust_r^2)
-    τ = (-dot_sd_N_sd + sqrt(fact)) / dot_N_sd
-    cache.step_size = δsd + τ * N_sd
+    # Take the intersection of dogled with trust region if Cauchy point lies inside the trust region
+    u_cauchy = -(d_cauchy / l_grad) * cache.g # compute Cauchy point
+    u_tmp = u_gauss_newton - u_cauchy # calf of the dogleg
+    a = dot(u_tmp, u_tmp)
+    b = 2 * dot(u_cauchy, u_tmp)
+    c = d_cauchy^2 - trust_r^2
+    aux = max(b^2 - 4 * a * c, 0.0) # technically guaranteed to be non-negative but hedging against floating point issues
+    τ = (-b + sqrt(aux)) / (2 * a) # stepsize along dogleg to trust region boundary
+
+    cache.du = u_cauchy + τ * u_tmp
 end
 
 function take_step!(cache::TrustRegionCache{true})
diff --git a/test/basictests.jl b/test/basictests.jl
index 54e63e93d..7022c6e0e 100644
--- a/test/basictests.jl
+++ b/test/basictests.jl
@@ -141,8 +141,9 @@ end
         return solve(prob, TrustRegion(; radius_update_scheme); abstol = 1e-9, kwargs...)
     end
 
-    radius_update_schemes = [RadiusUpdateSchemes.Simple, RadiusUpdateSchemes.Hei,
-        RadiusUpdateSchemes.Yuan, RadiusUpdateSchemes.Fan, RadiusUpdateSchemes.Bastin]
+    radius_update_schemes = [RadiusUpdateSchemes.Simple, RadiusUpdateSchemes.NocedalWright,
+        RadiusUpdateSchemes.NLsolve, RadiusUpdateSchemes.Hei, RadiusUpdateSchemes.Yuan,
+        RadiusUpdateSchemes.Fan, RadiusUpdateSchemes.Bastin]
     u0s = VERSION ≥ v"1.9" ? ([1.0, 1.0], @SVector[1.0, 1.0], 1.0) : ([1.0, 1.0], 1.0)
 
     @testset "[OOP] u0: $(typeof(u0)) radius_update_scheme: $(radius_update_scheme)" for u0 in u0s,
@@ -284,7 +285,7 @@ end
                 maxiters)
             sol_oop = benchmark_nlsolve_oop(quadratic_f, u0; radius_update_scheme,
                 maxiters)
-            @test sol_iip.u ≈ sol_iip.u
+            @test sol_iip.u ≈ sol_oop.u
         end
     end
 end