[flang] IEEE_REM (#115936)

Implement the IEEE 60559:2020 remainder function.
llvm · Nov 13, 2024 · 92604cf · 92604cf
1 parent 1b8e0cf
commit 92604cf
Show file tree

Hide file tree

Showing 7 changed files with 182 additions and 6 deletions.
diff --git a/flang/include/flang/Optimizer/Builder/IntrinsicCall.h b/flang/include/flang/Optimizer/Builder/IntrinsicCall.h
@@ -290,6 +290,7 @@ struct IntrinsicLibrary {
   mlir::Value genIeeeQuietCompare(mlir::Type resultType,
                                   llvm::ArrayRef<mlir::Value>);
   mlir::Value genIeeeReal(mlir::Type, llvm::ArrayRef<mlir::Value>);
+  mlir::Value genIeeeRem(mlir::Type, llvm::ArrayRef<mlir::Value>);
   mlir::Value genIeeeRint(mlir::Type, llvm::ArrayRef<mlir::Value>);
   template <bool isFlag>
   void genIeeeSetFlagOrHaltingMode(llvm::ArrayRef<fir::ExtendedValue>);

diff --git a/flang/lib/Optimizer/Builder/IntrinsicCall.cpp b/flang/lib/Optimizer/Builder/IntrinsicCall.cpp
@@ -97,7 +97,6 @@ static bool isStaticallyPresent(const fir::ExtendedValue &exv) {
 
 /// IEEE module procedure names not yet implemented for genModuleProcTODO.
 static constexpr char ieee_get_underflow_mode[] = "ieee_get_underflow_mode";
-static constexpr char ieee_rem[] = "ieee_rem";
 static constexpr char ieee_set_underflow_mode[] = "ieee_set_underflow_mode";
 
 using I = IntrinsicLibrary;
@@ -362,7 +361,7 @@ static constexpr IntrinsicHandler handlers[]{
     {"ieee_quiet_lt", &I::genIeeeQuietCompare<mlir::arith::CmpFPredicate::OLT>},
     {"ieee_quiet_ne", &I::genIeeeQuietCompare<mlir::arith::CmpFPredicate::UNE>},
     {"ieee_real", &I::genIeeeReal},
-    {"ieee_rem", &I::genModuleProcTODO<ieee_rem>},
+    {"ieee_rem", &I::genIeeeRem},
     {"ieee_rint", &I::genIeeeRint},
     {"ieee_round_eq", &I::genIeeeTypeCompare<mlir::arith::CmpIPredicate::eq>},
     {"ieee_round_ne", &I::genIeeeTypeCompare<mlir::arith::CmpIPredicate::ne>},
@@ -1298,6 +1297,14 @@ static constexpr MathOperation mathOperations[] = {
      genFuncType<Ty::Complex<8>, Ty::Complex<8>, Ty::Integer<8>>, genLibCall},
     {"pow", RTNAME_STRING(cqpowk), FuncTypeComplex16Complex16Integer8,
      genLibF128Call},
+    {"remainder", "remainderf",
+     genFuncType<Ty::Real<4>, Ty::Real<4>, Ty::Real<4>>, genLibCall},
+    {"remainder", "remainder",
+     genFuncType<Ty::Real<8>, Ty::Real<8>, Ty::Real<8>>, genLibCall},
+    {"remainder", "remainderl",
+     genFuncType<Ty::Real<10>, Ty::Real<10>, Ty::Real<10>>, genLibCall},
+    {"remainder", RTNAME_STRING(RemainderF128), FuncTypeReal16Real16Real16,
+     genLibF128Call},
     {"sign", "copysignf", genFuncType<Ty::Real<4>, Ty::Real<4>, Ty::Real<4>>,
      genMathOp<mlir::math::CopySignOp>},
     {"sign", "copysign", genFuncType<Ty::Real<8>, Ty::Real<8>, Ty::Real<8>>,
@@ -5030,6 +5037,32 @@ mlir::Value IntrinsicLibrary::genIeeeReal(mlir::Type resultType,
   return ifOp1.getResult(0);
 }
 
+// IEEE_REM
+mlir::Value IntrinsicLibrary::genIeeeRem(mlir::Type resultType,
+                                         llvm::ArrayRef<mlir::Value> args) {
+  // Return the remainder of X divided by Y.
+  // Signal IEEE_UNDERFLOW if X is subnormal and Y is infinite.
+  // Signal IEEE_INVALID if X is infinite or Y is zero and neither is a NaN.
+  assert(args.size() == 2);
+  mlir::Value x = args[0];
+  mlir::Value y = args[1];
+  if (mlir::dyn_cast<mlir::FloatType>(resultType).getWidth() < 32) {
+    mlir::Type f32Ty = mlir::FloatType::getF32(builder.getContext());
+    x = builder.create<fir::ConvertOp>(loc, f32Ty, x);
+    y = builder.create<fir::ConvertOp>(loc, f32Ty, y);
+  } else {
+    x = builder.create<fir::ConvertOp>(loc, resultType, x);
+    y = builder.create<fir::ConvertOp>(loc, resultType, y);
+  }
+  // remainder calls do not signal IEEE_UNDERFLOW.
+  mlir::Value underflow = builder.create<mlir::arith::AndIOp>(
+      loc, genIsFPClass(builder.getI1Type(), x, subnormalTest),
+      genIsFPClass(builder.getI1Type(), y, infiniteTest));
+  mlir::Value result = genRuntimeCall("remainder", x.getType(), {x, y});
+  genRaiseExcept(_FORTRAN_RUNTIME_IEEE_UNDERFLOW, underflow);
+  return builder.create<fir::ConvertOp>(loc, resultType, result);
+}
+
 // IEEE_RINT
 mlir::Value IntrinsicLibrary::genIeeeRint(mlir::Type resultType,
                                           llvm::ArrayRef<mlir::Value> args) {

diff --git a/flang/module/ieee_arithmetic.f90 b/flang/module/ieee_arithmetic.f90
@@ -161,6 +161,11 @@ end function ieee_round_ne
 
 ! Define specifics with 1 or 2 INTEGER, LOGICAL, or REAL arguments for
 ! generic G.
+!
+! The result type of most function specifics is either a fixed type or
+! the type of the first argument. The result type of a SPECIFICS_rRR
+! function call is the highest precision argument type.
+
 #define SPECIFICS_I(G) \
   G(1) G(2) G(4) G(8) G(16)
 #define SPECIFICS_L(G) \
@@ -234,13 +239,26 @@ end function ieee_round_ne
   G(8,2) G(8,3) G(8,4) G(8,8) G(8,10) G(8,16) \
   G(10,2) G(10,3) G(10,4) G(10,8) G(10,10) G(10,16) \
   G(16,2) G(16,3) G(16,4) G(16,8) G(16,10) G(16,16)
+#define SPECIFICS_rRR(G) \
+  G(2,2,2) G(2,2,3) G(4,2,4) G(8,2,8) G(10,2,10) G(16,2,16) \
+  G(2,3,2) G(3,3,3) G(4,3,4) G(8,3,8) G(10,3,10) G(16,3,16) \
+  G(4,4,2) G(4,4,3) G(4,4,4) G(8,4,8) G(10,4,10) G(16,4,16) \
+  G(8,8,2) G(8,8,3) G(8,8,4) G(8,8,8) G(10,8,10) G(16,8,16) \
+  G(10,10,2) G(10,10,3) G(10,10,4) G(10,10,8) G(10,10,10) G(16,10,16) \
+  G(16,16,2) G(16,16,3) G(16,16,4) G(16,16,8) G(16,16,10) G(16,16,16)
 #else
 #define SPECIFICS_RR(G) \
   G(2,2) G(2,3) G(2,4) G(2,8) G(2,16) \
   G(3,2) G(3,3) G(3,4) G(3,8) G(3,16) \
   G(4,2) G(4,3) G(4,4) G(4,8) G(4,16) \
   G(8,2) G(8,3) G(8,4) G(8,8) G(8,16) \
   G(16,2) G(16,3) G(16,4) G(16,8) G(16,16)
+#define SPECIFICS_rRR(G) \
+  G(2,2,2) G(2,2,3) G(4,2,4) G(8,2,8) G(16,2,16) \
+  G(2,3,2) G(3,3,3) G(4,3,4) G(8,3,8) G(16,3,16) \
+  G(4,4,2) G(4,4,3) G(4,4,4) G(8,4,8) G(16,4,16) \
+  G(8,8,2) G(8,8,3) G(8,8,4) G(8,8,8) G(16,8,16) \
+  G(16,16,2) G(16,16,3) G(16,16,4) G(16,16,8) G(16,16,16)
 #endif
 #else
 #if __x86_64__
@@ -250,12 +268,23 @@ end function ieee_round_ne
   G(4,2) G(4,3) G(4,4) G(4,8) G(4,10) \
   G(8,2) G(8,3) G(8,4) G(8,8) G(8,10) \
   G(10,2) G(10,3) G(10,4) G(10,8) G(10,10)
+#define SPECIFICS_rRR(G) \
+  G(2,2,2) G(2,2,3) G(4,2,4) G(8,2,8) G(10,2,10) \
+  G(2,3,2) G(3,3,3) G(4,3,4) G(8,3,8) G(10,3,10) \
+  G(4,4,2) G(4,4,3) G(4,4,4) G(8,4,8) G(10,4,10) \
+  G(8,8,2) G(8,8,3) G(8,8,4) G(8,8,8) G(10,8,10) \
+  G(10,10,2) G(10,10,3) G(10,10,4) G(10,10,8) G(10,10,10)
 #else
 #define SPECIFICS_RR(G) \
   G(2,2) G(2,3) G(2,4) G(2,8) \
   G(3,2) G(3,3) G(3,4) G(3,8) \
   G(4,2) G(4,3) G(4,4) G(4,8) \
   G(8,2) G(8,3) G(8,4) G(8,8)
+#define SPECIFICS_rRR(G) \
+  G(2,2,2) G(2,2,3) G(4,2,4) G(8,2,8) \
+  G(2,3,2) G(3,3,3) G(4,3,4) G(8,3,8) \
+  G(4,4,2) G(4,4,3) G(4,4,4) G(8,4,8) \
+  G(8,8,2) G(8,8,3) G(8,8,4) G(8,8,8)
 #endif
 #endif
 
@@ -467,16 +496,16 @@ end function ieee_quiet_ne_a##AKIND;
   public :: ieee_quiet_ne
 #undef IEEE_QUIET_NE_R
 
-#define IEEE_REM_RR(XKIND, YKIND) \
-  elemental real(XKIND) function ieee_rem_a##XKIND##_a##YKIND(x, y); \
+#define IEEE_REM_rRR(RKIND, XKIND, YKIND) \
+  elemental real(RKIND) function ieee_rem_a##XKIND##_a##YKIND(x, y); \
     real(XKIND), intent(in) :: x; \
     real(YKIND), intent(in) :: y; \
   end function ieee_rem_a##XKIND##_a##YKIND;
   interface ieee_rem
-    SPECIFICS_RR(IEEE_REM_RR)
+    SPECIFICS_rRR(IEEE_REM_rRR)
   end interface ieee_rem
   public :: ieee_rem
-#undef IEEE_REM_RR
+#undef IEEE_REM_rRR
 
 #define IEEE_RINT_R(XKIND) \
   elemental real(XKIND) function ieee_rint_a##XKIND(x, round); \

diff --git a/flang/runtime/Float128Math/CMakeLists.txt b/flang/runtime/Float128Math/CMakeLists.txt
@@ -51,6 +51,7 @@ set(sources
   norm2.cpp
   pow.cpp
   random.cpp
+  remainder.cpp
   round.cpp
   rrspacing.cpp
   scale.cpp

diff --git a/flang/runtime/Float128Math/math-entries.h b/flang/runtime/Float128Math/math-entries.h
@@ -96,6 +96,7 @@ DEFINE_FALLBACK_F128(Nearbyint)
 DEFINE_FALLBACK_F128(Nextafter)
 DEFINE_FALLBACK_F128(Pow)
 DEFINE_FALLBACK_F128(Qnan)
+DEFINE_FALLBACK_F128(Remainder)
 DEFINE_FALLBACK_F128(Round)
 DEFINE_FALLBACK_F128(Sin)
 DEFINE_FALLBACK_F128(Sinh)
@@ -144,6 +145,7 @@ DEFINE_SIMPLE_ALIAS(Lround, lroundq)
 DEFINE_SIMPLE_ALIAS(Nearbyint, nearbyintq)
 DEFINE_SIMPLE_ALIAS(Nextafter, nextafterq)
 DEFINE_SIMPLE_ALIAS(Pow, powq)
+DEFINE_SIMPLE_ALIAS(Remainder, remainderq)
 DEFINE_SIMPLE_ALIAS(Round, roundq)
 DEFINE_SIMPLE_ALIAS(Sin, sinq)
 DEFINE_SIMPLE_ALIAS(Sinh, sinhq)
@@ -196,6 +198,7 @@ DEFINE_SIMPLE_ALIAS(Lround, std::lround)
 DEFINE_SIMPLE_ALIAS(Nearbyint, std::nearbyint)
 DEFINE_SIMPLE_ALIAS(Nextafter, std::nextafter)
 DEFINE_SIMPLE_ALIAS(Pow, std::pow)
+DEFINE_SIMPLE_ALIAS(Remainder, std::remainder)
 DEFINE_SIMPLE_ALIAS(Round, std::round)
 DEFINE_SIMPLE_ALIAS(Sin, std::sin)
 DEFINE_SIMPLE_ALIAS(Sinh, std::sinh)

diff --git a/flang/runtime/Float128Math/remainder.cpp b/flang/runtime/Float128Math/remainder.cpp
@@ -0,0 +1,23 @@
+//===-- runtime/Float128Math/remainder.cpp --------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "math-entries.h"
+
+namespace Fortran::runtime {
+extern "C" {
+
+#if HAS_LDBL128 || HAS_FLOAT128
+CppTypeFor<TypeCategory::Real, 16> RTDEF(RemainderF128)(
+    CppTypeFor<TypeCategory::Real, 16> x,
+    CppTypeFor<TypeCategory::Real, 16> y) {
+  return Remainder<true>::invoke(x, y);
+}
+#endif
+
+} // extern "C"
+} // namespace Fortran::runtime
diff --git a/flang/test/Lower/Intrinsics/ieee_rem.f90 b/flang/test/Lower/Intrinsics/ieee_rem.f90
@@ -0,0 +1,86 @@
+! RUN: bbc -emit-hlfir -o - %s | FileCheck %s
+
+! CHECK-LABEL: c.func @_QQmain
+  use ieee_arithmetic, only: ieee_rem
+
+  ! CHECK:     %[[V_0:[0-9]+]] = fir.alloca f16 {bindc_name = "x2", uniq_name = "_QFEx2"}
+  ! CHECK:     %[[V_1:[0-9]+]]:2 = hlfir.declare %[[V_0]] {uniq_name = "_QFEx2"} : (!fir.ref<f16>) -> (!fir.ref<f16>, !fir.ref<f16>)
+  ! CHECK:     %[[V_2:[0-9]+]] = fir.alloca f32 {bindc_name = "x4", uniq_name = "_QFEx4"}
+  ! CHECK:     %[[V_3:[0-9]+]]:2 = hlfir.declare %[[V_2]] {uniq_name = "_QFEx4"} : (!fir.ref<f32>) -> (!fir.ref<f32>, !fir.ref<f32>)
+  ! CHECK:     %[[V_4:[0-9]+]] = fir.alloca f64 {bindc_name = "x8", uniq_name = "_QFEx8"}
+  ! CHECK:     %[[V_5:[0-9]+]]:2 = hlfir.declare %[[V_4]] {uniq_name = "_QFEx8"} : (!fir.ref<f64>) -> (!fir.ref<f64>, !fir.ref<f64>)
+  ! CHECK:     %[[V_6:[0-9]+]] = fir.alloca f16 {bindc_name = "y2", uniq_name = "_QFEy2"}
+  ! CHECK:     %[[V_7:[0-9]+]]:2 = hlfir.declare %[[V_6]] {uniq_name = "_QFEy2"} : (!fir.ref<f16>) -> (!fir.ref<f16>, !fir.ref<f16>)
+  ! CHECK:     %[[V_8:[0-9]+]] = fir.alloca f32 {bindc_name = "y4", uniq_name = "_QFEy4"}
+  ! CHECK:     %[[V_9:[0-9]+]]:2 = hlfir.declare %[[V_8]] {uniq_name = "_QFEy4"} : (!fir.ref<f32>) -> (!fir.ref<f32>, !fir.ref<f32>)
+  ! CHECK:     %[[V_10:[0-9]+]] = fir.alloca f64 {bindc_name = "y8", uniq_name = "_QFEy8"}
+  ! CHECK:     %[[V_11:[0-9]+]]:2 = hlfir.declare %[[V_10]] {uniq_name = "_QFEy8"} : (!fir.ref<f64>) -> (!fir.ref<f64>, !fir.ref<f64>)
+  real(2) :: x2, y2
+  real(4) :: x4, y4
+  real(8) :: x8, y8
+
+  ! CHECK:     hlfir.assign %cst{{[_0-9]*}} to %[[V_3]]#0 : f32, !fir.ref<f32>
+  x4 = 3.3_4
+  ! CHECK:     hlfir.assign %cst{{[_0-9]*}} to %[[V_9]]#0 : f32, !fir.ref<f32>
+  y4 = -0.0_4
+  ! CHECK-DAG: %[[V_12:[0-9]+]] = fir.load %[[V_3]]#0 : !fir.ref<f32>
+  ! CHECK-DAG: %[[V_13:[0-9]+]] = fir.load %[[V_9]]#0 : !fir.ref<f32>
+  ! CHECK-DAG: %[[V_14:[0-9]+]] = fir.convert %[[V_12]] : (f32) -> f32
+  ! CHECK-DAG: %[[V_15:[0-9]+]] = fir.convert %[[V_13]] : (f32) -> f32
+  ! CHECK-DAG: %[[V_16:[0-9]+]] = "llvm.intr.is.fpclass"(%[[V_15]]) <{bit = 516 : i32}> : (f32) -> i1
+  ! CHECK-DAG: %[[V_17:[0-9]+]] = "llvm.intr.is.fpclass"(%[[V_14]]) <{bit = 144 : i32}> : (f32) -> i1
+  ! CHECK-DAG: %[[V_18:[0-9]+]] = arith.andi %[[V_17]], %[[V_16]] : i1
+  ! CHECK-DAG: %[[V_19:[0-9]+]] = fir.call @remainderf(%[[V_14]], %[[V_15]]) fastmath<contract> : (f32, f32) -> f32
+  ! CHECK:     fir.if %[[V_18]] {
+  ! CHECK:       %[[V_40:[0-9]+]] = fir.call @_FortranAMapException(%c16{{.*}}) fastmath<contract> : (i32) -> i32
+  ! CHECK:       %[[V_41:[0-9]+]] = fir.call @feraiseexcept(%[[V_40]]) fastmath<contract> : (i32) -> i32
+  ! CHECK:     }
+  ! CHECK:     %[[V_20:[0-9]+]] = fir.convert %[[V_19]] : (f32) -> f32
+  ! CHECK:     hlfir.assign %[[V_20]] to %[[V_3]]#0 : f32, !fir.ref<f32>
+  x4 = ieee_rem(x4, y4)
+! print*, x4
+
+  ! CHECK:     hlfir.assign %cst{{[_0-9]*}} to %[[V_1]]#0 : f16, !fir.ref<f16>
+  x2 = 3.0_2
+  ! CHECK:     hlfir.assign %cst{{[_0-9]*}} to %[[V_11]]#0 : f64, !fir.ref<f64>
+  y8 = 2.0_8
+  ! CHECK-DAG: %[[V_21:[0-9]+]] = fir.load %[[V_1]]#0 : !fir.ref<f16>
+  ! CHECK-DAG: %[[V_22:[0-9]+]] = fir.load %[[V_11]]#0 : !fir.ref<f64>
+  ! CHECK-DAG: %[[V_23:[0-9]+]] = fir.convert %[[V_21]] : (f16) -> f64
+  ! CHECK-DAG: %[[V_24:[0-9]+]] = fir.convert %[[V_22]] : (f64) -> f64
+  ! CHECK-DAG: %[[V_25:[0-9]+]] = "llvm.intr.is.fpclass"(%[[V_24]]) <{bit = 516 : i32}> : (f64) -> i1
+  ! CHECK-DAG: %[[V_26:[0-9]+]] = "llvm.intr.is.fpclass"(%[[V_23]]) <{bit = 144 : i32}> : (f64) -> i1
+  ! CHECK-DAG: %[[V_27:[0-9]+]] = arith.andi %[[V_26]], %[[V_25]] : i1
+  ! CHECK-DAG: %[[V_28:[0-9]+]] = fir.call @remainder(%[[V_23]], %[[V_24]]) fastmath<contract> : (f64, f64) -> f64
+  ! CHECK:     fir.if %[[V_27]] {
+  ! CHECK:       %[[V_40:[0-9]+]] = fir.call @_FortranAMapException(%c16{{.*}}) fastmath<contract> : (i32) -> i32
+  ! CHECK:       %[[V_41:[0-9]+]] = fir.call @feraiseexcept(%[[V_40]]) fastmath<contract> : (i32) -> i32
+  ! CHECK:     }
+  ! CHECK:     %[[V_29:[0-9]+]] = fir.convert %[[V_28]] : (f64) -> f64
+  ! CHECK:     %[[V_30:[0-9]+]] = fir.convert %[[V_29]] : (f64) -> f16
+  ! CHECK:     hlfir.assign %[[V_30]] to %[[V_1]]#0 : f16, !fir.ref<f16>
+  x2 = ieee_rem(x2, y8)
+! print*, x2
+
+  ! CHECK:     hlfir.assign %cst{{[_0-9]*}} to %[[V_5]]#0 : f64, !fir.ref<f64>
+  x8 = huge(x8)
+  ! CHECK:     hlfir.assign %cst{{[_0-9]*}} to %[[V_7]]#0 : f16, !fir.ref<f16>
+  y2 = tiny(y2)
+  ! CHECK-DAG: %[[V_31:[0-9]+]] = fir.load %[[V_5]]#0 : !fir.ref<f64>
+  ! CHECK-DAG: %[[V_32:[0-9]+]] = fir.load %[[V_7]]#0 : !fir.ref<f16>
+  ! CHECK-DAG: %[[V_33:[0-9]+]] = fir.convert %[[V_31]] : (f64) -> f64
+  ! CHECK-DAG: %[[V_34:[0-9]+]] = fir.convert %[[V_32]] : (f16) -> f64
+  ! CHECK-DAG: %[[V_35:[0-9]+]] = "llvm.intr.is.fpclass"(%[[V_34]]) <{bit = 516 : i32}> : (f64) -> i1
+  ! CHECK-DAG: %[[V_36:[0-9]+]] = "llvm.intr.is.fpclass"(%[[V_33]]) <{bit = 144 : i32}> : (f64) -> i1
+  ! CHECK-DAG: %[[V_37:[0-9]+]] = arith.andi %[[V_36]], %[[V_35]] : i1
+  ! CHECK-DAG: %[[V_38:[0-9]+]] = fir.call @remainder(%[[V_33]], %[[V_34]]) fastmath<contract> : (f64, f64) -> f64
+  ! CHECK:     fir.if %[[V_37]] {
+  ! CHECK:       %[[V_40:[0-9]+]] = fir.call @_FortranAMapException(%c16{{.*}}) fastmath<contract> : (i32) -> i32
+  ! CHECK:       %[[V_41:[0-9]+]] = fir.call @feraiseexcept(%[[V_40]]) fastmath<contract> : (i32) -> i32
+  ! CHECK:     }
+  ! CHECK:     %[[V_39:[0-9]+]] = fir.convert %[[V_38]] : (f64) -> f64
+  ! CHECK:     hlfir.assign %[[V_39]] to %[[V_5]]#0 : f64, !fir.ref<f64>
+  x8 = ieee_rem(x8, y2)
+! print*, x8
+
+end