From 3557f7283345f9c141443b1af6ea5546329ce9a4 Mon Sep 17 00:00:00 2001
From: Robert Hallberg <Robert.Hallberg@noaa.gov>
Date: Tue, 23 Jan 2024 15:06:28 -0500
Subject: [PATCH] *Do 6 Newton method iterations in cuberoot

  Modified the cuberoot function to do 6 iterations with Newton's method,
starting from the optimal first guess of (3/8)**(1/3).  Following guidance from
performance testing of the previous version, all convergence testing has been
removed and the same number of iterations are applied regardless of the input
value.  The form of Newton's method used here does one division per iteration,
but it is good at polishing the root to give an accurate final solution and by
simply repeatedly using the same instructions it appears to optimize well.  This
commit changes answers at roundoff for code that uses the cuberoot function, so
ideally this PR would be dealt with before the cuberoot becomes widely used.
---
 src/framework/MOM_intrinsic_functions.F90 | 67 +++--------------------
 1 file changed, 9 insertions(+), 58 deletions(-)

diff --git a/src/framework/MOM_intrinsic_functions.F90 b/src/framework/MOM_intrinsic_functions.F90
index 808bc408a8..69c6a6cbe9 100644
--- a/src/framework/MOM_intrinsic_functions.F90
+++ b/src/framework/MOM_intrinsic_functions.F90
@@ -34,19 +34,10 @@ elemental function cuberoot(x) result(root)
   real, intent(in) :: x !< The argument of cuberoot in arbitrary units cubed [A3]
   real :: root !< The real cube root of x in arbitrary units [A]
 
+  ! Local variables
   real :: asx ! The absolute value of x rescaled by an integer power of 8 to put it into
               ! the range from 0.125 < asx <= 1.0, in ambiguous units cubed [B3]
   real :: root_asx ! The cube root of asx [B]
-  real :: num ! The numerator of an expression for the evolving estimate of the cube root of asx
-              ! in arbitrary units that can grow or shrink with each iteration [B C]
-  real :: den ! The denominator of an expression for the evolving estimate of the cube root of asx
-              ! in arbitrary units that can grow or shrink with each iteration [C]
-  real :: num_prev ! The numerator of an expression for the previous iteration of the evolving estimate
-              ! of the cube root of asx in arbitrary units that can grow or shrink with each iteration [B D]
-  real :: den_prev ! The denominator of an expression for the previous iteration of the evolving estimate of
-              ! the cube root of asx in arbitrary units that can grow or shrink with each iteration [D]
-  real, parameter :: den_min = 2.**(minexponent(1.) / 4 + 4)  ! A value of den that triggers rescaling [C]
-  real, parameter :: den_max = 2.**(maxexponent(1.) / 4 - 2)  ! A value of den that triggers rescaling [C]
   integer :: ex_3 ! One third of the exponent part of x, used to rescale x to get a.
   integer :: itt
 
@@ -58,55 +49,15 @@ elemental function cuberoot(x) result(root)
     ! Here asx is in the range of 0.125 <= asx < 1.0
     asx = scale(abs(x), -3*ex_3)
 
-    ! This first estimate is one iteration of Newton's method with a starting guess of 1.  It is
-    ! always an over-estimate of the true solution, but it is a good approximation for asx near 1.
-    num = 2.0 + asx
-    den = 3.0
-    ! Iteratively determine Root = asx**1/3 using Newton's method, noting that in this case Newton's
-    ! method converges monotonically from above and needs no bounding.  For the range of asx from
-    ! 0.125 to 1.0 with the first guess used above, 6 iterations suffice to converge to roundoff.
-    do itt=1,9
-      ! Newton's method iterates estimates as Root = Root - (Root**3 - asx) / (3.0 * Root**2), or
-      ! equivalently as Root = (2.0*Root**2 + asx) / (3.0 * Root**2).
-      ! Keeping the estimates in a fractional form Root = num / den allows this calculation with
-      ! fewer (or no) real divisions during the iterations before doing a single real division
-      ! at the end, and it is therefore more computationally efficient.
-
-      num_prev = num ; den_prev = den
-      num = 2.0 * num_prev**3 + asx * den_prev**3
-      den = 3.0 * (den_prev * num_prev**2)
-
-      if ((num * den_prev == num_prev * den) .or. (itt == 9)) then
-        !   If successive estimates of root are identical, this is a converged solution.
-        root_asx = num / den
-        exit
-      elseif (num * den_prev > num_prev * den) then
-        !   If the estimates are increasing, this also indicates convergence, but for a more subtle
-        ! reason.  Because Newton's method converges monotonically from above (at least for infinite
-        ! precision math), the only reason why this estimate could increase is if the iterations
-        ! have converged to a roundoff-level limit cycle around an irrational or otherwise
-        ! unrepresentable solution, with values only changing in the last bit or two.  If so, we
-        ! should stop iterating and accept the one of the current or previous solutions, both of
-        ! which will be within numerical roundoff of the true solution.
-        root_asx = num / den
-        ! Pick the more accurate of the last two iterations.
-        ! Given that both of the two previous iterations are within roundoff of the true
-        ! solution, this next step might be overkill.
-        if ( abs(den_prev**3*root_asx**3 - den_prev**3*asx) > abs(num_prev**3 - den_prev**3*asx) ) then
-          ! The previous iteration was slightly more accurate, so use that for root_asx.
-          root_asx = num_prev / den_prev
-        endif
-        exit
-      endif
-
-      ! Because successive estimates of the numerator and denominator tend to be the cube of their
-      ! predecessors, the numerator and denominator need to be rescaled by division when they get
-      ! too large or small to avoid overflow or underflow in the convergence test below.
-      if ((den > den_max) .or. (den < den_min)) then
-        num = scale(num, -exponent(den))
-        den = scale(den, -exponent(den))
-      endif
+    ! Iteratively determine root_asx = asx**1/3 using Newton's method, noting that in the case of a
+    ! cube root solver, Newton's method converges monotonically from above and needs no bounding.
 
+    !   A first estimate of (3/8)**(1/3) gives the same fractional errors after one Newton's method
+    ! iteration when asx is 1. and 0.125 (and smaller errors in between) and with this first guess
+    ! Newton's method converges to 30 decimal places within 6 iterations for 0.125 <= asx <= 1.0.
+    root_asx = 0.721124785153704
+    do itt=1,6
+      root_asx = root_asx - (root_asx**3 - asx) / (3.0 * root_asx**2)
     enddo
 
     root = sign(scale(root_asx, ex_3), x)