diff --git a/src/coreclr/jit/compiler.h b/src/coreclr/jit/compiler.h index deb7b97943dc81..f43f0616f35c9a 100644 --- a/src/coreclr/jit/compiler.h +++ b/src/coreclr/jit/compiler.h @@ -1594,7 +1594,7 @@ enum class ProfileChecks : unsigned int { CHECK_NONE = 0, CHECK_HASLIKELIHOOD = 1 << 0, // check all FlowEdges for hasLikelihood - CHECK_LIKELIHOODSUM = 1 << 1, // check block succesor likelihoods sum to 1 + CHECK_LIKELIHOODSUM = 1 << 1, // check block succesor likelihoods sum to 1 CHECK_LIKELY = 1 << 2, // fully check likelihood based weights RAISE_ASSERT = 1 << 3, // assert on check failure CHECK_ALL_BLOCKS = 1 << 4, // check blocks even if bbHasProfileWeight is false @@ -4525,6 +4525,11 @@ class Compiler CORINFO_THIS_TRANSFORM constraintCallThisTransform, NamedIntrinsic* pIntrinsicName, bool* isSpecialIntrinsic = nullptr); + GenTree* impEstimateIntrinsic(CORINFO_METHOD_HANDLE method, + CORINFO_SIG_INFO* sig, + CorInfoType callJitType, + NamedIntrinsic intrinsicName, + bool tailCall); GenTree* impMathIntrinsic(CORINFO_METHOD_HANDLE method, CORINFO_SIG_INFO* sig, var_types callType, diff --git a/src/coreclr/jit/importercalls.cpp b/src/coreclr/jit/importercalls.cpp index 4dc64523217230..9d1f8b4e899227 100644 --- a/src/coreclr/jit/importercalls.cpp +++ b/src/coreclr/jit/importercalls.cpp @@ -3125,7 +3125,15 @@ GenTree* Compiler::impIntrinsic(GenTree* newobjThis, // To be fixed in https://github.com/dotnet/runtime/pull/77465 const bool tier0opts = !opts.compDbgCode && !opts.jitFlags->IsSet(JitFlags::JIT_FLAG_MIN_OPT); - if (!mustExpand && tier0opts) + if (tier0opts) + { + // The *Estimate APIs are allowed to differ in behavior across hardware + // so ensure we treat them as "betterToExpand" to get deterministic behavior + + betterToExpand |= (ni == NI_System_Math_ReciprocalEstimate); + betterToExpand |= (ni == NI_System_Math_ReciprocalSqrtEstimate); + } + else if (!mustExpand) { switch (ni) { @@ -3189,9 +3197,9 @@ GenTree* Compiler::impIntrinsic(GenTree* newobjThis, break; default: - // Unsafe.* are all small enough to prefer expansions. + // Various intrinsics are all small enough to prefer expansions. + betterToExpand |= ni >= NI_SYSTEM_MATH_START && ni <= NI_SYSTEM_MATH_END; betterToExpand |= ni >= NI_SRCS_UNSAFE_START && ni <= NI_SRCS_UNSAFE_END; - // Same for these betterToExpand |= ni >= NI_PRIMITIVE_START && ni <= NI_PRIMITIVE_END; break; } @@ -4146,6 +4154,13 @@ GenTree* Compiler::impIntrinsic(GenTree* newobjThis, break; } + case NI_System_Math_ReciprocalEstimate: + case NI_System_Math_ReciprocalSqrtEstimate: + { + retNode = impEstimateIntrinsic(method, sig, callJitType, ni, tailCall); + break; + } + case NI_System_Array_Clone: case NI_System_Collections_Generic_Comparer_get_Default: case NI_System_Collections_Generic_EqualityComparer_get_Default: @@ -7413,13 +7428,15 @@ bool Compiler::IsTargetIntrinsic(NamedIntrinsic intrinsicName) // instructions to directly compute round/ceiling/floor/truncate. case NI_System_Math_Abs: + case NI_System_Math_ReciprocalEstimate: + case NI_System_Math_ReciprocalSqrtEstimate: case NI_System_Math_Sqrt: return true; case NI_System_Math_Ceiling: case NI_System_Math_Floor: - case NI_System_Math_Truncate: case NI_System_Math_Round: + case NI_System_Math_Truncate: return compOpportunisticallyDependsOn(InstructionSet_SSE41); case NI_System_Math_FusedMultiplyAdd: @@ -7434,11 +7451,13 @@ bool Compiler::IsTargetIntrinsic(NamedIntrinsic intrinsicName) case NI_System_Math_Abs: case NI_System_Math_Ceiling: case NI_System_Math_Floor: - case NI_System_Math_Truncate: - case NI_System_Math_Round: - case NI_System_Math_Sqrt: case NI_System_Math_Max: case NI_System_Math_Min: + case NI_System_Math_ReciprocalEstimate: + case NI_System_Math_ReciprocalSqrtEstimate: + case NI_System_Math_Round: + case NI_System_Math_Sqrt: + case NI_System_Math_Truncate: return true; case NI_System_Math_FusedMultiplyAdd: @@ -7513,6 +7532,8 @@ bool Compiler::IsMathIntrinsic(NamedIntrinsic intrinsicName) case NI_System_Math_MinMagnitudeNumber: case NI_System_Math_MinNumber: case NI_System_Math_Pow: + case NI_System_Math_ReciprocalEstimate: + case NI_System_Math_ReciprocalSqrtEstimate: case NI_System_Math_Round: case NI_System_Math_Sin: case NI_System_Math_Sinh: @@ -8730,6 +8751,119 @@ void Compiler::impCheckCanInline(GenTreeCall* call, } } +//------------------------------------------------------------------------ +// impEstimateIntrinsic: Imports one of the *Estimate intrinsics which are +// explicitly allowed to differ in result based on the hardware they're running +// against +// +// Arguments: +// method - The handle of the method being imported +// callType - The underlying type for the call +// intrinsicName - The intrinsic being imported +// tailCall - true if the method is a tail call; otherwise false +// +GenTree* Compiler::impEstimateIntrinsic(CORINFO_METHOD_HANDLE method, + CORINFO_SIG_INFO* sig, + CorInfoType callJitType, + NamedIntrinsic intrinsicName, + bool tailCall) +{ + var_types callType = JITtype2varType(callJitType); + + assert(varTypeIsFloating(callType)); + assert(sig->numArgs == 1); + +#if defined(FEATURE_HW_INTRINSICS) + // We use compExactlyDependsOn since these are estimate APIs where + // the behavior is explicitly allowed to differ across machines and + // we want to ensure that it gets marked as such in R2R. + + var_types simdType = TYP_UNKNOWN; + NamedIntrinsic intrinsicId = NI_Illegal; + + switch (intrinsicName) + { + case NI_System_Math_ReciprocalEstimate: + { +#if defined(TARGET_XARCH) + if (compExactlyDependsOn(InstructionSet_AVX512F)) + { + simdType = TYP_SIMD16; + intrinsicId = NI_AVX512F_Reciprocal14Scalar; + } + else if ((callType == TYP_FLOAT) && compExactlyDependsOn(InstructionSet_SSE)) + { + simdType = TYP_SIMD16; + intrinsicId = NI_SSE_ReciprocalScalar; + } +#elif defined(TARGET_ARM64) + if (compExactlyDependsOn(InstructionSet_AdvSimd_Arm64)) + { + simdType = TYP_SIMD8; + intrinsicId = NI_AdvSimd_Arm64_ReciprocalEstimateScalar; + } +#endif // TARGET_ARM64 + break; + } + + case NI_System_Math_ReciprocalSqrtEstimate: + { +#if defined(TARGET_XARCH) + if (compExactlyDependsOn(InstructionSet_AVX512F)) + { + simdType = TYP_SIMD16; + intrinsicId = NI_AVX512F_ReciprocalSqrt14Scalar; + } + else if ((callType == TYP_FLOAT) && compExactlyDependsOn(InstructionSet_SSE)) + { + simdType = TYP_SIMD16; + intrinsicId = NI_SSE_ReciprocalSqrtScalar; + } +#elif defined(TARGET_ARM64) + if (compExactlyDependsOn(InstructionSet_AdvSimd_Arm64)) + { + simdType = TYP_SIMD8; + intrinsicId = NI_AdvSimd_Arm64_ReciprocalSquareRootEstimateScalar; + } +#endif // TARGET_ARM64 + break; + } + + default: + { + unreached(); + } + } + + if (intrinsicId != NI_Illegal) + { + unsigned simdSize = 0; + + if (simdType == TYP_SIMD8) + { + simdSize = 8; + } + else + { + assert(simdType == TYP_SIMD16); + simdSize = 16; + } + + GenTree* op1 = impPopStack().val; + + op1 = gtNewSimdCreateScalarUnsafeNode(simdType, op1, callJitType, simdSize); + op1 = gtNewSimdHWIntrinsicNode(simdType, op1, intrinsicId, callJitType, simdSize); + + return gtNewSimdToScalarNode(callType, op1, callJitType, simdSize); + } +#endif // FEATURE_HW_INTRINSICS + + // TODO-CQ: Returning this as an intrinsic blocks inlining and is undesirable + // return impMathIntrinsic(method, sig, callType, intrinsicName, tailCall); + + return nullptr; +} + GenTree* Compiler::impMathIntrinsic(CORINFO_METHOD_HANDLE method, CORINFO_SIG_INFO* sig, var_types callType, @@ -10339,7 +10473,20 @@ NamedIntrinsic Compiler::lookupPrimitiveFloatNamedIntrinsic(CORINFO_METHOD_HANDL case 'R': { - if (strcmp(methodName, "Round") == 0) + if (strncmp(methodName, "Reciprocal", 10) == 0) + { + methodName += 10; + + if (strcmp(methodName, "Estimate") == 0) + { + result = NI_System_Math_ReciprocalEstimate; + } + else if (strcmp(methodName, "SqrtEstimate") == 0) + { + result = NI_System_Math_ReciprocalSqrtEstimate; + } + } + else if (strcmp(methodName, "Round") == 0) { result = NI_System_Math_Round; } diff --git a/src/coreclr/jit/namedintrinsiclist.h b/src/coreclr/jit/namedintrinsiclist.h index 67eec1059e82e8..25c0521461d854 100644 --- a/src/coreclr/jit/namedintrinsiclist.h +++ b/src/coreclr/jit/namedintrinsiclist.h @@ -51,6 +51,8 @@ enum NamedIntrinsic : unsigned short NI_System_Math_MinMagnitudeNumber, NI_System_Math_MinNumber, NI_System_Math_Pow, + NI_System_Math_ReciprocalEstimate, + NI_System_Math_ReciprocalSqrtEstimate, NI_System_Math_Round, NI_System_Math_Sin, NI_System_Math_Sinh, diff --git a/src/libraries/System.Numerics.Tensors/src/System/Numerics/Tensors/netcore/TensorPrimitives.Reciprocal.cs b/src/libraries/System.Numerics.Tensors/src/System/Numerics/Tensors/netcore/TensorPrimitives.Reciprocal.cs index 50ef635a21addc..16d0df26157cb2 100644 --- a/src/libraries/System.Numerics.Tensors/src/System/Numerics/Tensors/netcore/TensorPrimitives.Reciprocal.cs +++ b/src/libraries/System.Numerics.Tensors/src/System/Numerics/Tensors/netcore/TensorPrimitives.Reciprocal.cs @@ -95,6 +95,14 @@ public static void ReciprocalSqrtEstimate(ReadOnlySpan x, Span destinat public static Vector128 Invoke(Vector128 x) { +#if NET9_0_OR_GREATER + if (Avx512F.VL.IsSupported) + { + if (typeof(T) == typeof(float)) return Avx512F.VL.Reciprocal14(x.AsSingle()).As(); + if (typeof(T) == typeof(double)) return Avx512F.VL.Reciprocal14(x.AsDouble()).As(); + } +#endif + if (Sse.IsSupported) { if (typeof(T) == typeof(float)) return Sse.Reciprocal(x.AsSingle()).As(); @@ -115,6 +123,14 @@ public static Vector128 Invoke(Vector128 x) public static Vector256 Invoke(Vector256 x) { +#if NET9_0_OR_GREATER + if (Avx512F.VL.IsSupported) + { + if (typeof(T) == typeof(float)) return Avx512F.VL.Reciprocal14(x.AsSingle()).As(); + if (typeof(T) == typeof(double)) return Avx512F.VL.Reciprocal14(x.AsDouble()).As(); + } +#endif + if (Avx.IsSupported) { if (typeof(T) == typeof(float)) return Avx.Reciprocal(x.AsSingle()).As(); @@ -125,11 +141,13 @@ public static Vector256 Invoke(Vector256 x) public static Vector512 Invoke(Vector512 x) { +#if NET9_0_OR_GREATER if (Avx512F.IsSupported) { if (typeof(T) == typeof(float)) return Avx512F.Reciprocal14(x.AsSingle()).As(); if (typeof(T) == typeof(double)) return Avx512F.Reciprocal14(x.AsDouble()).As(); } +#endif return Vector512.One / x; } @@ -143,6 +161,14 @@ public static Vector512 Invoke(Vector512 x) public static Vector128 Invoke(Vector128 x) { +#if NET9_0_OR_GREATER + if (Avx512F.VL.IsSupported) + { + if (typeof(T) == typeof(float)) return Avx512F.VL.ReciprocalSqrt14(x.AsSingle()).As(); + if (typeof(T) == typeof(double)) return Avx512F.VL.ReciprocalSqrt14(x.AsDouble()).As(); + } +#endif + if (Sse.IsSupported) { if (typeof(T) == typeof(float)) return Sse.ReciprocalSqrt(x.AsSingle()).As(); @@ -163,6 +189,14 @@ public static Vector128 Invoke(Vector128 x) public static Vector256 Invoke(Vector256 x) { +#if NET9_0_OR_GREATER + if (Avx512F.VL.IsSupported) + { + if (typeof(T) == typeof(float)) return Avx512F.VL.ReciprocalSqrt14(x.AsSingle()).As(); + if (typeof(T) == typeof(double)) return Avx512F.VL.ReciprocalSqrt14(x.AsDouble()).As(); + } +#endif + if (Avx.IsSupported) { if (typeof(T) == typeof(float)) return Avx.ReciprocalSqrt(x.AsSingle()).As(); @@ -173,11 +207,13 @@ public static Vector256 Invoke(Vector256 x) public static Vector512 Invoke(Vector512 x) { +#if NET9_0_OR_GREATER if (Avx512F.IsSupported) { if (typeof(T) == typeof(float)) return Avx512F.ReciprocalSqrt14(x.AsSingle()).As(); if (typeof(T) == typeof(double)) return Avx512F.ReciprocalSqrt14(x.AsDouble()).As(); } +#endif return Vector512.One / Vector512.Sqrt(x); } diff --git a/src/libraries/System.Numerics.Tensors/tests/Net8Tests/System.Numerics.Tensors.Net8.Tests.csproj b/src/libraries/System.Numerics.Tensors/tests/Net8Tests/System.Numerics.Tensors.Net8.Tests.csproj index eb08d0e5974a6f..3b8f867b355c0a 100644 --- a/src/libraries/System.Numerics.Tensors/tests/Net8Tests/System.Numerics.Tensors.Net8.Tests.csproj +++ b/src/libraries/System.Numerics.Tensors/tests/Net8Tests/System.Numerics.Tensors.Net8.Tests.csproj @@ -10,6 +10,7 @@ $(NetCoreAppCurrent) true + $(DefineConstants);SNT_NET8_TESTS diff --git a/src/libraries/System.Numerics.Tensors/tests/TensorPrimitives.Generic.cs b/src/libraries/System.Numerics.Tensors/tests/TensorPrimitives.Generic.cs index 1875b6059b7fa9..2768a03a070475 100644 --- a/src/libraries/System.Numerics.Tensors/tests/TensorPrimitives.Generic.cs +++ b/src/libraries/System.Numerics.Tensors/tests/TensorPrimitives.Generic.cs @@ -386,7 +386,12 @@ public static IEnumerable SpanDestinationFunctionsToTest() yield return Create(TensorPrimitives.Reciprocal, f => T.One / f); yield return Create(TensorPrimitives.ReciprocalEstimate, T.ReciprocalEstimate, T.CreateTruncating(Helpers.DefaultToleranceForEstimates)); yield return Create(TensorPrimitives.ReciprocalSqrt, f => T.One / T.Sqrt(f)); + +#if !SNT_NET8_TESTS + // Avoid running with the net8 tests due to: https://github.com/dotnet/runtime/issues/101846 yield return Create(TensorPrimitives.ReciprocalSqrtEstimate, T.ReciprocalSqrtEstimate, T.CreateTruncating(Helpers.DefaultToleranceForEstimates)); +#endif + yield return Create(TensorPrimitives.Round, T.Round); yield return Create(TensorPrimitives.Sin, T.Sin, trigTolerance); yield return Create(TensorPrimitives.Sinh, T.Sinh, Helpers.DetermineTolerance(doubleTolerance: 1e-14)); diff --git a/src/libraries/System.Private.CoreLib/src/System/Double.cs b/src/libraries/System.Private.CoreLib/src/System/Double.cs index 04e8269c78f464..81a6792b599cd5 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Double.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Double.cs @@ -865,9 +865,11 @@ bool IFloatingPoint.TryWriteSignificandLittleEndian(Span destinati public static double Lerp(double value1, double value2, double amount) => (value1 * (1.0 - amount)) + (value2 * amount); /// + [Intrinsic] public static double ReciprocalEstimate(double x) => Math.ReciprocalEstimate(x); /// + [Intrinsic] public static double ReciprocalSqrtEstimate(double x) => Math.ReciprocalSqrtEstimate(x); /// diff --git a/src/libraries/System.Private.CoreLib/src/System/Math.cs b/src/libraries/System.Private.CoreLib/src/System/Math.cs index e2fe2051ce5c9e..521abb22465e7b 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Math.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Math.cs @@ -1195,19 +1195,11 @@ public static double MinMagnitude(double x, double y) /// On ARM64 hardware this may use the FRECPE instruction which performs a single Newton-Raphson iteration. /// On hardware without specialized support, this may just return 1.0 / d. /// + [Intrinsic] [MethodImpl(MethodImplOptions.AggressiveInlining)] public static double ReciprocalEstimate(double d) { - // x86 doesn't provide an estimate instruction for double-precision reciprocal - - if (AdvSimd.Arm64.IsSupported) - { - return AdvSimd.Arm64.ReciprocalEstimateScalar(Vector64.CreateScalar(d)).ToScalar(); - } - else - { - return 1.0 / d; - } + return 1.0 / d; } /// Returns an estimate of the reciprocal square root of a specified number. @@ -1217,19 +1209,11 @@ public static double ReciprocalEstimate(double d) /// On ARM64 hardware this may use the FRSQRTE instruction which performs a single Newton-Raphson iteration. /// On hardware without specialized support, this may just return 1.0 / Sqrt(d). /// + [Intrinsic] [MethodImpl(MethodImplOptions.AggressiveInlining)] public static double ReciprocalSqrtEstimate(double d) { - // x86 doesn't provide an estimate instruction for double-precision reciprocal square root - - if (AdvSimd.Arm64.IsSupported) - { - return AdvSimd.Arm64.ReciprocalSquareRootEstimateScalar(Vector64.CreateScalar(d)).ToScalar(); - } - else - { - return 1.0 / Sqrt(d); - } + return 1.0 / Sqrt(d); } [MethodImpl(MethodImplOptions.AggressiveInlining)] diff --git a/src/libraries/System.Private.CoreLib/src/System/MathF.cs b/src/libraries/System.Private.CoreLib/src/System/MathF.cs index cc0795255d0c89..05b404abccc51a 100644 --- a/src/libraries/System.Private.CoreLib/src/System/MathF.cs +++ b/src/libraries/System.Private.CoreLib/src/System/MathF.cs @@ -313,21 +313,11 @@ public static float MinMagnitude(float x, float y) /// On ARM64 hardware this may use the FRECPE instruction which performs a single Newton-Raphson iteration. /// On hardware without specialized support, this may just return 1.0 / x. /// + [Intrinsic] [MethodImpl(MethodImplOptions.AggressiveInlining)] public static float ReciprocalEstimate(float x) { - if (Sse.IsSupported) - { - return Sse.ReciprocalScalar(Vector128.CreateScalarUnsafe(x)).ToScalar(); - } - else if (AdvSimd.Arm64.IsSupported) - { - return AdvSimd.Arm64.ReciprocalEstimateScalar(Vector64.CreateScalarUnsafe(x)).ToScalar(); - } - else - { - return 1.0f / x; - } + return 1.0f / x; } /// Returns an estimate of the reciprocal square root of a specified number. @@ -338,21 +328,11 @@ public static float ReciprocalEstimate(float x) /// On ARM64 hardware this may use the FRSQRTE instruction which performs a single Newton-Raphson iteration. /// On hardware without specialized support, this may just return 1.0 / Sqrt(x). /// + [Intrinsic] [MethodImpl(MethodImplOptions.AggressiveInlining)] public static float ReciprocalSqrtEstimate(float x) { - if (Sse.IsSupported) - { - return Sse.ReciprocalSqrtScalar(Vector128.CreateScalarUnsafe(x)).ToScalar(); - } - else if (AdvSimd.Arm64.IsSupported) - { - return AdvSimd.Arm64.ReciprocalSquareRootEstimateScalar(Vector64.CreateScalarUnsafe(x)).ToScalar(); - } - else - { - return 1.0f / Sqrt(x); - } + return 1.0f / Sqrt(x); } [Intrinsic] diff --git a/src/libraries/System.Private.CoreLib/src/System/Single.cs b/src/libraries/System.Private.CoreLib/src/System/Single.cs index 8a8a38aa2c0b48..fe96bb1b419f7a 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Single.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Single.cs @@ -848,9 +848,11 @@ bool IFloatingPoint.TryWriteSignificandLittleEndian(Span destinatio public static float Lerp(float value1, float value2, float amount) => (value1 * (1.0f - amount)) + (value2 * amount); /// + [Intrinsic] public static float ReciprocalEstimate(float x) => MathF.ReciprocalEstimate(x); /// + [Intrinsic] public static float ReciprocalSqrtEstimate(float x) => MathF.ReciprocalSqrtEstimate(x); ///