From 5969b40f2798a91c551343d57bc9364c37890a14 Mon Sep 17 00:00:00 2001 From: James Baldwin-Brown Date: Wed, 10 Apr 2024 17:54:59 -0600 Subject: [PATCH 1/3] integrated changes for float64 handling to softfloat.c --- libsrc/float/softfloat/softfloat.c | 404 +++++++++++++++++++---------- 1 file changed, 263 insertions(+), 141 deletions(-) diff --git a/libsrc/float/softfloat/softfloat.c b/libsrc/float/softfloat/softfloat.c index 4adb4beb76..024645f5f4 100644 --- a/libsrc/float/softfloat/softfloat.c +++ b/libsrc/float/softfloat/softfloat.c @@ -186,7 +186,8 @@ static float32 roundAndPackFloat32( flag zSign, int16 zExp, bits32 zSig ) } if ( roundBits ) float_exception_flags |= float_flag_inexact; zSig = ( zSig + roundIncrement )>>7; - zSig &= ~ ( ( ( roundBits ^ 0x40 ) == 0 ) & roundNearestEven ); + /* cast to bits32 to avoid truncation problems with small integer sizes */ + zSig &= ~ ( ( ( ((bits32) roundBits) ^ 0x40ul ) == 0ul ) & ((bits32) roundNearestEven) ); if ( zSig == 0 ) zExp = 0; return packFloat32( zSign, zExp, zSig ); @@ -217,10 +218,10 @@ static float32 | floating-point value `a'. *----------------------------------------------------------------------------*/ -INLINE bits32 extractFloat64Frac1( float64 a ) +INLINE bits32 extractFloat64Frac1( float64 *a ) { - return a.low; + return a->low; } @@ -229,10 +230,10 @@ INLINE bits32 extractFloat64Frac1( float64 a ) | floating-point value `a'. *----------------------------------------------------------------------------*/ -INLINE bits32 extractFloat64Frac0( float64 a ) +INLINE bits32 extractFloat64Frac0( float64 *a ) { - return a.high & 0x000FFFFF; + return a->high & 0x000FFFFF; } @@ -240,10 +241,10 @@ INLINE bits32 extractFloat64Frac0( float64 a ) | Returns the exponent bits of the double-precision floating-point value `a'. *----------------------------------------------------------------------------*/ -INLINE int16 extractFloat64Exp( float64 a ) +INLINE int16 extractFloat64Exp( float64 *a ) { - return ( a.high>>20 ) & 0x7FF; + return ( a->high>>20 ) & 0x7FF; } @@ -251,10 +252,10 @@ INLINE int16 extractFloat64Exp( float64 a ) | Returns the sign bit of the double-precision floating-point value `a'. *----------------------------------------------------------------------------*/ -INLINE flag extractFloat64Sign( float64 a ) +INLINE flag extractFloat64Sign( float64 *a ) { - return a.high>>31; + return a->high>>31; } #endif @@ -313,14 +314,12 @@ static void | `zSig0' and `zSig1' concatenated form a complete, normalized significand. *----------------------------------------------------------------------------*/ -INLINE float64 - packFloat64( flag zSign, int16 zExp, bits32 zSig0, bits32 zSig1 ) +INLINE void +packFloat64( float64 *dst, flag zSign, int16 zExp, bits32 zSig0, bits32 zSig1 ) { - float64 z; - z.low = zSig1; - z.high = ( ( (bits32) zSign )<<31 ) + ( ( (bits32) zExp )<<20 ) + zSig0; - return z; + dst->low = zSig1; + dst->high = ( ( (bits32) zSign )<<31 ) + ( ( (bits32) zExp )<<20 ) + zSig0; } @@ -345,9 +344,9 @@ INLINE float64 | overflow follows the IEEE Standard for Floating-Point Arithmetic. *----------------------------------------------------------------------------*/ -static float64 +static void roundAndPackFloat64( - flag zSign, int16 zExp, bits32 zSig0, bits32 zSig1, bits32 zSig2 ) + float64 *dst, flag zSign, int16 zExp, bits32 zSig0, bits32 zSig1, bits32 zSig2 ) { int8 roundingMode; flag roundNearestEven, increment, isTiny; @@ -380,9 +379,11 @@ static float64 || ( zSign && ( roundingMode == float_round_up ) ) || ( ! zSign && ( roundingMode == float_round_down ) ) ) { - return packFloat64( zSign, 0x7FE, 0x000FFFFF, 0xFFFFFFFF ); + packFloat64( dst, zSign, 0x7FE, 0x000FFFFF, 0xFFFFFFFF ); + return; } - return packFloat64( zSign, 0x7FF, 0, 0 ); + packFloat64( dst, zSign, 0x7FF, 0, 0 ); + return; } if ( zExp < 0 ) { isTiny = @@ -415,7 +416,8 @@ static float64 else { if ( ( zSig0 | zSig1 ) == 0 ) zExp = 0; } - return packFloat64( zSign, zExp, zSig0, zSig1 ); + packFloat64( dst, zSign, zExp, zSig0, zSig1 ); + return; } @@ -429,9 +431,9 @@ static float64 | point exponent. *----------------------------------------------------------------------------*/ -static float64 +static void normalizeRoundAndPackFloat64( - flag zSign, int16 zExp, bits32 zSig0, bits32 zSig1 ) + float64 *dst, flag zSign, int16 zExp, bits32 zSig0, bits32 zSig1 ) { int8 shiftCount; bits32 zSig2; @@ -451,7 +453,7 @@ static float64 zSig0, zSig1, 0, - shiftCount, &zSig0, &zSig1, &zSig2 ); } zExp -= shiftCount; - return roundAndPackFloat64( zSign, zExp, zSig0, zSig1, zSig2 ); + roundAndPackFloat64( dst, zSign, zExp, zSig0, zSig1, zSig2 ); } #endif @@ -480,14 +482,17 @@ float32 int32_to_float32( int32 a ) | according to the IEEE Standard for Floating-Point Arithmetic. *----------------------------------------------------------------------------*/ -float64 int32_to_float64( int32 a ) +void int32_to_float64( float64 *dst, int32 a ) { flag zSign; bits32 absA; int8 shiftCount; bits32 zSig0, zSig1; - if ( a == 0 ) return packFloat64( 0, 0, 0, 0 ); + if ( a == 0 ) { + packFloat64( dst, 0, 0, 0, 0 ); + return; + } zSign = ( a < 0 ); absA = zSign ? - a : a; shiftCount = countLeadingZeros32( absA ) - 11; @@ -498,7 +503,7 @@ float64 int32_to_float64( int32 a ) else { shift64Right( absA, 0, - shiftCount, &zSig0, &zSig1 ); } - return packFloat64( zSign, 0x412 - shiftCount, zSig0, zSig1 ); + packFloat64( dst, zSign, 0x412 - shiftCount, zSig0, zSig1 ); } #endif @@ -620,8 +625,9 @@ int32 float32_to_int32_round_to_zero( float32 a ) | performed according to the IEEE Standard for Floating-Point Arithmetic. *----------------------------------------------------------------------------*/ -float64 float32_to_float64( float32 a ) +void float32_to_float64( float64 *dst, float32 a ) { + commonNaNT cnan; flag aSign; int16 aExp; bits32 aSig, zSig0, zSig1; @@ -630,16 +636,24 @@ float64 float32_to_float64( float32 a ) aExp = extractFloat32Exp( a ); aSign = extractFloat32Sign( a ); if ( aExp == 0xFF ) { - if ( aSig ) return commonNaNToFloat64( float32ToCommonNaN( a ) ); - return packFloat64( aSign, 0x7FF, 0, 0 ); + if ( aSig ) { + float32ToCommonNaN( &cnan, a ); + commonNaNToFloat64( dst, &cnan ); + return; + } + packFloat64( dst, aSign, 0x7FF, 0, 0 ); + return; } if ( aExp == 0 ) { - if ( aSig == 0 ) return packFloat64( aSign, 0, 0, 0 ); + if ( aSig == 0 ) { + packFloat64( dst, aSign, 0, 0, 0 ); + return; + } normalizeFloat32Subnormal( aSig, &aExp, &aSig ); --aExp; } shift64Right( aSig, 0, 3, &zSig0, &zSig1 ); - return packFloat64( aSign, aExp + 0x380, zSig0, zSig1 ); + packFloat64( dst, aSign, aExp + 0x380, zSig0, zSig1 ); } #endif @@ -1336,7 +1350,7 @@ flag __fastcall__ float32_lt_quiet( float32 a, float32 b ) | the same sign as `a' is returned. *----------------------------------------------------------------------------*/ -int32 float64_to_int32( float64 a ) +int32 float64_to_int32( float64 *a ) { flag aSign; int16 aExp, shiftCount; @@ -1408,7 +1422,7 @@ int32 float64_to_int32( float64 a ) | overflows, the largest integer with the same sign as `a' is returned. *----------------------------------------------------------------------------*/ -int32 float64_to_int32_round_to_zero( float64 a ) +int32 float64_to_int32_round_to_zero( float64 *a ) { flag aSign; int16 aExp, shiftCount; @@ -1457,7 +1471,7 @@ int32 float64_to_int32_round_to_zero( float64 a ) | performed according to the IEEE Standard for Floating-Point Arithmetic. *----------------------------------------------------------------------------*/ -float32 float64_to_float32( float64 a ) +float32 float64_to_float32( float64 *a ) { flag aSign; int16 aExp; @@ -1470,7 +1484,8 @@ float32 float64_to_float32( float64 a ) aSign = extractFloat64Sign( a ); if ( aExp == 0x7FF ) { if ( aSig0 | aSig1 ) { - return commonNaNToFloat32( float64ToCommonNaN( a ) ); + float64ToCommonNaN( &cnan, a ); + return commonNaNToFloat32( &cnan ); } return packFloat32( aSign, 0xFF, 0 ); } @@ -1487,7 +1502,7 @@ float32 float64_to_float32( float64 a ) | Arithmetic. *----------------------------------------------------------------------------*/ -float64 float64_round_to_int( float64 a ) +void float64_round_to_int( float64 *dst, float64 *a ) { flag aSign; int16 aExp; @@ -1500,14 +1515,16 @@ float64 float64_round_to_int( float64 a ) if ( 0x433 <= aExp ) { if ( ( aExp == 0x7FF ) && ( extractFloat64Frac0( a ) | extractFloat64Frac1( a ) ) ) { - return propagateFloat64NaN( a, a ); + propagateFloat64NaN( dst, a, a ); + return; } - return a; + *dst = *a; + return; } lastBitMask = 1; lastBitMask = ( lastBitMask<<( 0x432 - aExp ) )<<1; roundBitsMask = lastBitMask - 1; - z = a; + z = *a; roundingMode = float_rounding_mode; if ( roundingMode == float_round_nearest_even ) { if ( lastBitMask ) { @@ -1522,7 +1539,7 @@ float64 float64_round_to_int( float64 a ) } } else if ( roundingMode != float_round_to_zero ) { - if ( extractFloat64Sign( z ) + if ( extractFloat64Sign( &z ) ^ ( roundingMode == float_round_up ) ) { add64( z.high, z.low, 0, roundBitsMask, &z.high, &z.low ); } @@ -1531,7 +1548,10 @@ float64 float64_round_to_int( float64 a ) } else { if ( aExp <= 0x3FE ) { - if ( ( ( (bits32) ( a.high<<1 ) ) | a.low ) == 0 ) return a; + if ( ( ( (bits32) ( a->high<<1 ) ) | a->low ) == 0 ) { + *dst = *a; + return; + } float_exception_flags |= float_flag_inexact; aSign = extractFloat64Sign( a ); switch ( float_rounding_mode ) { @@ -1539,45 +1559,57 @@ float64 float64_round_to_int( float64 a ) if ( ( aExp == 0x3FE ) && ( extractFloat64Frac0( a ) | extractFloat64Frac1( a ) ) ) { - return packFloat64( aSign, 0x3FF, 0, 0 ); + packFloat64( dst, aSign, 0x3FF, 0, 0 ); + return; } break; case float_round_down: - return - aSign ? packFloat64( 1, 0x3FF, 0, 0 ) - : packFloat64( 0, 0, 0, 0 ); + { + if (aSign) { + packFloat64( dst, 1, 0x3FF, 0, 0 ); + } else { + packFloat64( dst, 0, 0, 0, 0 ); + } + return; + } case float_round_up: - return - aSign ? packFloat64( 1, 0, 0, 0 ) - : packFloat64( 0, 0x3FF, 0, 0 ); + { + if (aSign) { + packFloat64( dst, 1, 0, 0, 0 ); + } else { + packFloat64( dst, 0, 0x3FF, 0, 0 ); + } + return; + } } - return packFloat64( aSign, 0, 0, 0 ); + packFloat64( dst, aSign, 0, 0, 0 ); + return; } lastBitMask = 1; lastBitMask <<= 0x413 - aExp; roundBitsMask = lastBitMask - 1; z.low = 0; - z.high = a.high; + z.high = a->high; roundingMode = float_rounding_mode; if ( roundingMode == float_round_nearest_even ) { z.high += lastBitMask>>1; - if ( ( ( z.high & roundBitsMask ) | a.low ) == 0 ) { + if ( ( ( z.high & roundBitsMask ) | a->low ) == 0 ) { z.high &= ~ lastBitMask; } } else if ( roundingMode != float_round_to_zero ) { - if ( extractFloat64Sign( z ) + if ( extractFloat64Sign( &z ) ^ ( roundingMode == float_round_up ) ) { - z.high |= ( a.low != 0 ); + z.high |= ( a->low != 0 ); z.high += roundBitsMask; } } z.high &= ~ roundBitsMask; } - if ( ( z.low != a.low ) || ( z.high != a.high ) ) { + if ( ( z.low != a->low ) || ( z.high != a->high ) ) { float_exception_flags |= float_flag_inexact; } - return z; + *dst = z; } @@ -1589,7 +1621,7 @@ float64 float64_round_to_int( float64 a ) | Arithmetic. *----------------------------------------------------------------------------*/ -static float64 addFloat64Sigs( float64 a, float64 b, flag zSign ) +static void addFloat64Sigs( float64 *dst, float64 *a, float64 *b, flag zSign ) { int16 aExp, bExp, zExp; bits32 aSig0, aSig1, bSig0, bSig1, zSig0, zSig1, zSig2; @@ -1604,8 +1636,12 @@ static float64 addFloat64Sigs( float64 a, float64 b, flag zSign ) expDiff = aExp - bExp; if ( 0 < expDiff ) { if ( aExp == 0x7FF ) { - if ( aSig0 | aSig1 ) return propagateFloat64NaN( a, b ); - return a; + if ( aSig0 | aSig1 ) { + propagateFloat64NaN( dst, a, b ); + return; + } + *dst = *a; + return; } if ( bExp == 0 ) { --expDiff; @@ -1619,8 +1655,12 @@ static float64 addFloat64Sigs( float64 a, float64 b, flag zSign ) } else if ( expDiff < 0 ) { if ( bExp == 0x7FF ) { - if ( bSig0 | bSig1 ) return propagateFloat64NaN( a, b ); - return packFloat64( zSign, 0x7FF, 0, 0 ); + if ( bSig0 | bSig1 ) { + propagateFloat64NaN( dst, a, b ); + return; + } + packFloat64( dst, zSign, 0x7FF, 0, 0 ); + return; } if ( aExp == 0 ) { ++expDiff; @@ -1635,12 +1675,17 @@ static float64 addFloat64Sigs( float64 a, float64 b, flag zSign ) else { if ( aExp == 0x7FF ) { if ( aSig0 | aSig1 | bSig0 | bSig1 ) { - return propagateFloat64NaN( a, b ); + propagateFloat64NaN( dst, a, b ); + return; } - return a; + *dst = *a; + return; } add64( aSig0, aSig1, bSig0, bSig1, &zSig0, &zSig1 ); - if ( aExp == 0 ) return packFloat64( zSign, 0, zSig0, zSig1 ); + if ( aExp == 0 ) { + packFloat64( dst, zSign, 0, zSig0, zSig1 ); + return; + } zSig2 = 0; zSig0 |= 0x00200000; zExp = aExp; @@ -1654,7 +1699,8 @@ static float64 addFloat64Sigs( float64 a, float64 b, flag zSign ) shiftRight1: shift64ExtraRightJamming( zSig0, zSig1, zSig2, 1, &zSig0, &zSig1, &zSig2 ); roundAndPack: - return roundAndPackFloat64( zSign, zExp, zSig0, zSig1, zSig2 ); + roundAndPackFloat64( dst, zSign, zExp, zSig0, zSig1, zSig2 ); + return; } @@ -1666,7 +1712,7 @@ static float64 addFloat64Sigs( float64 a, float64 b, flag zSign ) | Standard for Floating-Point Arithmetic. *----------------------------------------------------------------------------*/ -static float64 subFloat64Sigs( float64 a, float64 b, flag zSign ) +static void subFloat64Sigs( float64 *dst, float64 *a, float64 *b, flag zSign ) { int16 aExp, bExp, zExp; bits32 aSig0, aSig1, bSig0, bSig1, zSig0, zSig1; @@ -1686,12 +1732,14 @@ static float64 subFloat64Sigs( float64 a, float64 b, flag zSign ) if ( expDiff < 0 ) goto bExpBigger; if ( aExp == 0x7FF ) { if ( aSig0 | aSig1 | bSig0 | bSig1 ) { - return propagateFloat64NaN( a, b ); + propagateFloat64NaN( dst, a, b ); + return; } float_raise( float_flag_invalid ); z.low = float64_default_nan_low; z.high = float64_default_nan_high; - return z; + *dst = z; + return; } if ( aExp == 0 ) { aExp = 1; @@ -1701,11 +1749,16 @@ static float64 subFloat64Sigs( float64 a, float64 b, flag zSign ) if ( aSig0 < bSig0 ) goto bBigger; if ( bSig1 < aSig1 ) goto aBigger; if ( aSig1 < bSig1 ) goto bBigger; - return packFloat64( float_rounding_mode == float_round_down, 0, 0, 0 ); + packFloat64( dst, float_rounding_mode == float_round_down, 0, 0, 0 ); + return; bExpBigger: if ( bExp == 0x7FF ) { - if ( bSig0 | bSig1 ) return propagateFloat64NaN( a, b ); - return packFloat64( zSign ^ 1, 0x7FF, 0, 0 ); + if ( bSig0 | bSig1 ) { + propagateFloat64NaN( dst, a, b ); + return; + } + packFloat64( dst, zSign ^ 1, 0x7FF, 0, 0 ); + return; } if ( aExp == 0 ) { ++expDiff; @@ -1722,8 +1775,12 @@ static float64 subFloat64Sigs( float64 a, float64 b, flag zSign ) goto normalizeRoundAndPack; aExpBigger: if ( aExp == 0x7FF ) { - if ( aSig0 | aSig1 ) return propagateFloat64NaN( a, b ); - return a; + if ( aSig0 | aSig1 ) { + propagateFloat64NaN( dst, a, b ); + return; + } + *dst = *a; + return; } if ( bExp == 0 ) { --expDiff; @@ -1738,7 +1795,8 @@ static float64 subFloat64Sigs( float64 a, float64 b, flag zSign ) zExp = aExp; normalizeRoundAndPack: --zExp; - return normalizeRoundAndPackFloat64( zSign, zExp - 10, zSig0, zSig1 ); + normalizeRoundAndPackFloat64( dst, zSign, zExp - 10, zSig0, zSig1 ); + return; } @@ -1748,17 +1806,19 @@ static float64 subFloat64Sigs( float64 a, float64 b, flag zSign ) | Floating-Point Arithmetic. *----------------------------------------------------------------------------*/ -float64 float64_add( float64 a, float64 b ) +void float64_add( float64 *dst, float64 *a, float64 *b ) { flag aSign, bSign; aSign = extractFloat64Sign( a ); bSign = extractFloat64Sign( b ); if ( aSign == bSign ) { - return addFloat64Sigs( a, b, aSign ); + addFloat64Sigs( dst, a, b, aSign ); + return; } else { - return subFloat64Sigs( a, b, aSign ); + subFloat64Sigs( dst, a, b, aSign ); + return; } } @@ -1769,17 +1829,19 @@ float64 float64_add( float64 a, float64 b ) | Floating-Point Arithmetic. *----------------------------------------------------------------------------*/ -float64 float64_sub( float64 a, float64 b ) +void float64_sub( float64 *dst, float64 *a, float64 *b ) { flag aSign, bSign; aSign = extractFloat64Sign( a ); bSign = extractFloat64Sign( b ); if ( aSign == bSign ) { - return subFloat64Sigs( a, b, aSign ); + subFloat64Sigs( dst, a, b, aSign ); + return; } else { - return addFloat64Sigs( a, b, aSign ); + addFloat64Sigs( dst, a, b, aSign ); + return; } } @@ -1790,7 +1852,7 @@ float64 float64_sub( float64 a, float64 b ) | Floating-Point Arithmetic. *----------------------------------------------------------------------------*/ -float64 float64_mul( float64 a, float64 b ) +void float64_mul( float64 *dst, float64 *a, float64 *b ) { flag aSign, bSign, zSign; int16 aExp, bExp, zExp; @@ -1809,28 +1871,41 @@ float64 float64_mul( float64 a, float64 b ) if ( aExp == 0x7FF ) { if ( ( aSig0 | aSig1 ) || ( ( bExp == 0x7FF ) && ( bSig0 | bSig1 ) ) ) { - return propagateFloat64NaN( a, b ); + propagateFloat64NaN( dst, a, b ); + return; } if ( ( bExp | bSig0 | bSig1 ) == 0 ) goto invalid; - return packFloat64( zSign, 0x7FF, 0, 0 ); + packFloat64( dst, zSign, 0x7FF, 0, 0 ); + return; } if ( bExp == 0x7FF ) { - if ( bSig0 | bSig1 ) return propagateFloat64NaN( a, b ); + if ( bSig0 | bSig1 ) { + propagateFloat64NaN( dst, a, b ); + return; + } if ( ( aExp | aSig0 | aSig1 ) == 0 ) { invalid: float_raise( float_flag_invalid ); z.low = float64_default_nan_low; z.high = float64_default_nan_high; - return z; + *dst = z; + return; } - return packFloat64( zSign, 0x7FF, 0, 0 ); + packFloat64( dst, zSign, 0x7FF, 0, 0 ); + return; } if ( aExp == 0 ) { - if ( ( aSig0 | aSig1 ) == 0 ) return packFloat64( zSign, 0, 0, 0 ); + if ( ( aSig0 | aSig1 ) == 0 ) { + packFloat64( dst, zSign, 0, 0, 0 ); + return; + } normalizeFloat64Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 ); } if ( bExp == 0 ) { - if ( ( bSig0 | bSig1 ) == 0 ) return packFloat64( zSign, 0, 0, 0 ); + if ( ( bSig0 | bSig1 ) == 0 ) { + packFloat64( dst, zSign, 0, 0, 0 ); + return; + } normalizeFloat64Subnormal( bSig0, bSig1, &bExp, &bSig0, &bSig1 ); } zExp = aExp + bExp - 0x400; @@ -1844,7 +1919,8 @@ float64 float64_mul( float64 a, float64 b ) zSig0, zSig1, zSig2, 1, &zSig0, &zSig1, &zSig2 ); ++zExp; } - return roundAndPackFloat64( zSign, zExp, zSig0, zSig1, zSig2 ); + roundAndPackFloat64( dst, zSign, zExp, zSig0, zSig1, zSig2 ); + return; } @@ -1854,7 +1930,7 @@ float64 float64_mul( float64 a, float64 b ) | IEEE Standard for Floating-Point Arithmetic. *----------------------------------------------------------------------------*/ -float64 float64_div( float64 a, float64 b ) +void float64_div( float64 *dst, float64 *a, float64 *b ) { flag aSign, bSign, zSign; int16 aExp, bExp, zExp; @@ -1872,16 +1948,27 @@ float64 float64_div( float64 a, float64 b ) bSign = extractFloat64Sign( b ); zSign = aSign ^ bSign; if ( aExp == 0x7FF ) { - if ( aSig0 | aSig1 ) return propagateFloat64NaN( a, b ); + if ( aSig0 | aSig1 ) { + propagateFloat64NaN( dst, a, b ); + return; + } if ( bExp == 0x7FF ) { - if ( bSig0 | bSig1 ) return propagateFloat64NaN( a, b ); + if ( bSig0 | bSig1 ) { + propagateFloat64NaN( dst, a, b ); + return; + } goto invalid; } - return packFloat64( zSign, 0x7FF, 0, 0 ); + packFloat64( dst, zSign, 0x7FF, 0, 0 ); + return; } if ( bExp == 0x7FF ) { - if ( bSig0 | bSig1 ) return propagateFloat64NaN( a, b ); - return packFloat64( zSign, 0, 0, 0 ); + if ( bSig0 | bSig1 ) { + propagateFloat64NaN( dst, a, b ); + return; + } + packFloat64( dst, zSign, 0, 0, 0 ); + return; } if ( bExp == 0 ) { if ( ( bSig0 | bSig1 ) == 0 ) { @@ -1890,15 +1977,20 @@ float64 float64_div( float64 a, float64 b ) float_raise( float_flag_invalid ); z.low = float64_default_nan_low; z.high = float64_default_nan_high; - return z; + *dst = z; + return; } float_raise( float_flag_divbyzero ); - return packFloat64( zSign, 0x7FF, 0, 0 ); + packFloat64( dst, zSign, 0x7FF, 0, 0 ); + return; } normalizeFloat64Subnormal( bSig0, bSig1, &bExp, &bSig0, &bSig1 ); } if ( aExp == 0 ) { - if ( ( aSig0 | aSig1 ) == 0 ) return packFloat64( zSign, 0, 0, 0 ); + if ( ( aSig0 | aSig1 ) == 0 ) { + packFloat64( dst, zSign, 0, 0, 0 ); + return; + } normalizeFloat64Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 ); } zExp = aExp - bExp + 0x3FD; @@ -1926,7 +2018,8 @@ float64 float64_div( float64 a, float64 b ) zSig1 |= ( ( rem1 | rem2 | rem3 ) != 0 ); } shift64ExtraRightJamming( zSig0, zSig1, 0, 11, &zSig0, &zSig1, &zSig2 ); - return roundAndPackFloat64( zSign, zExp, zSig0, zSig1, zSig2 ); + roundAndPackFloat64( dst, zSign, zExp, zSig0, zSig1, zSig2 ); + return; } @@ -1936,7 +2029,7 @@ float64 float64_div( float64 a, float64 b ) | according to the IEEE Standard for Floating-Point Arithmetic. *----------------------------------------------------------------------------*/ -float64 float64_rem( float64 a, float64 b ) +void float64_rem( float64 *dst, float64 *a, float64 *b ) { flag aSign, bSign, zSign; int16 aExp, bExp, expDiff; @@ -1956,13 +2049,18 @@ float64 float64_rem( float64 a, float64 b ) if ( aExp == 0x7FF ) { if ( ( aSig0 | aSig1 ) || ( ( bExp == 0x7FF ) && ( bSig0 | bSig1 ) ) ) { - return propagateFloat64NaN( a, b ); + propagateFloat64NaN( dst, a, b ); + return; } goto invalid; } if ( bExp == 0x7FF ) { - if ( bSig0 | bSig1 ) return propagateFloat64NaN( a, b ); - return a; + if ( bSig0 | bSig1 ) { + propagateFloat64NaN( dst, a, b ); + return; + } + *dst = *a; + return; } if ( bExp == 0 ) { if ( ( bSig0 | bSig1 ) == 0 ) { @@ -1970,16 +2068,23 @@ float64 float64_rem( float64 a, float64 b ) float_raise( float_flag_invalid ); z.low = float64_default_nan_low; z.high = float64_default_nan_high; - return z; + *dst = z; + return; } normalizeFloat64Subnormal( bSig0, bSig1, &bExp, &bSig0, &bSig1 ); } if ( aExp == 0 ) { - if ( ( aSig0 | aSig1 ) == 0 ) return a; + if ( ( aSig0 | aSig1 ) == 0 ) { + *dst = *a; + return; + } normalizeFloat64Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 ); } expDiff = aExp - bExp; - if ( expDiff < -1 ) return a; + if ( expDiff < -1 ) { + *dst = *a; + return; + } shortShift64Left( aSig0 | 0x00100000, aSig1, 11 - ( expDiff < 0 ), &aSig0, &aSig1 ); shortShift64Left( bSig0 | 0x00100000, bSig1, 11, &bSig0, &bSig1 ); @@ -2020,8 +2125,11 @@ float64 float64_rem( float64 a, float64 b ) ++q; sub64( aSig0, aSig1, bSig0, bSig1, &aSig0, &aSig1 ); } while ( 0 <= (sbits32) aSig0 ); + + /* Casting to unsigned pointer here. Will this work? */ add64( - aSig0, aSig1, alternateASig0, alternateASig1, &sigMean0, &sigMean1 ); + aSig0, aSig1, alternateASig0, alternateASig1, (bits32 *) &sigMean0, &sigMean1 ); + if ( ( sigMean0 < 0 ) || ( ( ( sigMean0 | sigMean1 ) == 0 ) && ( q & 1 ) ) ) { aSig0 = alternateASig0; @@ -2029,8 +2137,8 @@ float64 float64_rem( float64 a, float64 b ) } zSign = ( (sbits32) aSig0 < 0 ); if ( zSign ) sub64( 0, 0, aSig0, aSig1, &aSig0, &aSig1 ); - return - normalizeRoundAndPackFloat64( aSign ^ zSign, bExp - 4, aSig0, aSig1 ); + normalizeRoundAndPackFloat64( dst, aSign ^ zSign, bExp - 4, aSig0, aSig1 ); + return; } @@ -2040,7 +2148,7 @@ float64 float64_rem( float64 a, float64 b ) | Arithmetic. *----------------------------------------------------------------------------*/ -float64 float64_sqrt( float64 a ) +void float64_sqrt( float64 *dst, float64 *a ) { flag aSign; int16 aExp, zExp; @@ -2053,20 +2161,33 @@ float64 float64_sqrt( float64 a ) aExp = extractFloat64Exp( a ); aSign = extractFloat64Sign( a ); if ( aExp == 0x7FF ) { - if ( aSig0 | aSig1 ) return propagateFloat64NaN( a, a ); - if ( ! aSign ) return a; + if ( aSig0 | aSig1 ) { + propagateFloat64NaN( dst, a, a ); + return; + } + if ( ! aSign ) { + *dst = *a; + return; + } goto invalid; } if ( aSign ) { - if ( ( aExp | aSig0 | aSig1 ) == 0 ) return a; + if ( ( aExp | aSig0 | aSig1 ) == 0 ) { + *dst = *a; + return; + } invalid: float_raise( float_flag_invalid ); z.low = float64_default_nan_low; z.high = float64_default_nan_high; - return z; + *dst = z; + return; } if ( aExp == 0 ) { - if ( ( aSig0 | aSig1 ) == 0 ) return packFloat64( 0, 0, 0, 0 ); + if ( ( aSig0 | aSig1 ) == 0 ) { + packFloat64( dst, 0, 0, 0, 0 ); + return; + } normalizeFloat64Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 ); } zExp = ( ( aExp - 0x3FF )>>1 ) + 0x3FE; @@ -2100,7 +2221,8 @@ float64 float64_sqrt( float64 a ) zSig1 |= ( ( rem1 | rem2 | rem3 ) != 0 ); } shift64ExtraRightJamming( zSig0, zSig1, 0, 10, &zSig0, &zSig1, &zSig2 ); - return roundAndPackFloat64( 0, zExp, zSig0, zSig1, zSig2 ); + roundAndPackFloat64( dst, 0, zExp, zSig0, zSig1, zSig2 ); + return; } @@ -2110,7 +2232,7 @@ float64 float64_sqrt( float64 a ) | according to the IEEE Standard for Floating-Point Arithmetic. *----------------------------------------------------------------------------*/ -flag float64_eq( float64 a, float64 b ) +flag float64_eq( float64 *a, float64 *b ) { if ( ( ( extractFloat64Exp( a ) == 0x7FF ) @@ -2124,10 +2246,10 @@ flag float64_eq( float64 a, float64 b ) return 0; } return - ( a.low == b.low ) - && ( ( a.high == b.high ) - || ( ( a.low == 0 ) - && ( (bits32) ( ( a.high | b.high )<<1 ) == 0 ) ) + ( a->low == b->low ) + && ( ( a->high == b->high ) + || ( ( a->low == 0 ) + && ( (bits32) ( ( a->high | b->high )<<1 ) == 0 ) ) ); } @@ -2138,7 +2260,7 @@ flag float64_eq( float64 a, float64 b ) | performed according to the IEEE Standard for Floating-Point Arithmetic. *----------------------------------------------------------------------------*/ -flag float64_le( float64 a, float64 b ) +flag float64_le( float64 *a, float64 *b ) { flag aSign, bSign; @@ -2155,12 +2277,12 @@ flag float64_le( float64 a, float64 b ) if ( aSign != bSign ) { return aSign - || ( ( ( (bits32) ( ( a.high | b.high )<<1 ) ) | a.low | b.low ) + || ( ( ( (bits32) ( ( a->high | b->high )<<1 ) ) | a->low | b->low ) == 0 ); } return - aSign ? le64( b.high, b.low, a.high, a.low ) - : le64( a.high, a.low, b.high, b.low ); + aSign ? le64( b->high, b->low, a->high, a->low ) + : le64( a->high, a->low, b->high, b->low ); } @@ -2170,7 +2292,7 @@ flag float64_le( float64 a, float64 b ) | according to the IEEE Standard for Floating-Point Arithmetic. *----------------------------------------------------------------------------*/ -flag float64_lt( float64 a, float64 b ) +flag float64_lt( float64 *a, float64 *b ) { flag aSign, bSign; @@ -2187,12 +2309,12 @@ flag float64_lt( float64 a, float64 b ) if ( aSign != bSign ) { return aSign - && ( ( ( (bits32) ( ( a.high | b.high )<<1 ) ) | a.low | b.low ) + && ( ( ( (bits32) ( ( a->high | b->high )<<1 ) ) | a->low | b->low ) != 0 ); } return - aSign ? lt64( b.high, b.low, a.high, a.low ) - : lt64( a.high, a.low, b.high, b.low ); + aSign ? lt64( b->high, b->low, a->high, a->low ) + : lt64( a->high, a->low, b->high, b->low ); } @@ -2203,7 +2325,7 @@ flag float64_lt( float64 a, float64 b ) | according to the IEEE Standard for Floating-Point Arithmetic. *----------------------------------------------------------------------------*/ -flag float64_eq_signaling( float64 a, float64 b ) +flag float64_eq_signaling( float64 *a, float64 *b ) { if ( ( ( extractFloat64Exp( a ) == 0x7FF ) @@ -2215,10 +2337,10 @@ flag float64_eq_signaling( float64 a, float64 b ) return 0; } return - ( a.low == b.low ) - && ( ( a.high == b.high ) - || ( ( a.low == 0 ) - && ( (bits32) ( ( a.high | b.high )<<1 ) == 0 ) ) + ( a->low == b->low ) + && ( ( a->high == b->high ) + || ( ( a->low == 0 ) + && ( (bits32) ( ( a->high | b->high )<<1 ) == 0 ) ) ); } @@ -2230,7 +2352,7 @@ flag float64_eq_signaling( float64 a, float64 b ) | IEEE Standard for Floating-Point Arithmetic. *----------------------------------------------------------------------------*/ -flag float64_le_quiet( float64 a, float64 b ) +flag float64_le_quiet( float64 *a, float64 *b ) { flag aSign, bSign; @@ -2249,12 +2371,12 @@ flag float64_le_quiet( float64 a, float64 b ) if ( aSign != bSign ) { return aSign - || ( ( ( (bits32) ( ( a.high | b.high )<<1 ) ) | a.low | b.low ) + || ( ( ( (bits32) ( ( a->high | b->high )<<1 ) ) | a->low | b->low ) == 0 ); } return - aSign ? le64( b.high, b.low, a.high, a.low ) - : le64( a.high, a.low, b.high, b.low ); + aSign ? le64( b->high, b->low, a->high, a->low ) + : le64( a->high, a->low, b->high, b->low ); } @@ -2265,7 +2387,7 @@ flag float64_le_quiet( float64 a, float64 b ) | Standard for Floating-Point Arithmetic. *----------------------------------------------------------------------------*/ -flag float64_lt_quiet( float64 a, float64 b ) +flag float64_lt_quiet( float64 *a, float64 *b ) { flag aSign, bSign; @@ -2284,12 +2406,12 @@ flag float64_lt_quiet( float64 a, float64 b ) if ( aSign != bSign ) { return aSign - && ( ( ( (bits32) ( ( a.high | b.high )<<1 ) ) | a.low | b.low ) + && ( ( ( (bits32) ( ( a->high | b->high )<<1 ) ) | a->low | b->low ) != 0 ); } return - aSign ? lt64( b.high, b.low, a.high, a.low ) - : lt64( a.high, a.low, b.high, b.low ); + aSign ? lt64( b->high, b->low, a->high, a->low ) + : lt64( a->high, a->low, b->high, b->low ); } #endif From f2eba0e0c3a81623fe492e666964bd551bbbbde1 Mon Sep 17 00:00:00 2001 From: James Baldwin-Brown Date: Thu, 11 Apr 2024 11:41:37 -0600 Subject: [PATCH 2/3] added float64 changes to specialize.h --- libsrc/float/softfloat/specialize.h | 81 +++++++++++++++++++---------- 1 file changed, 54 insertions(+), 27 deletions(-) diff --git a/libsrc/float/softfloat/specialize.h b/libsrc/float/softfloat/specialize.h index 37633796c4..6982309483 100644 --- a/libsrc/float/softfloat/specialize.h +++ b/libsrc/float/softfloat/specialize.h @@ -156,12 +156,12 @@ enum { | otherwise returns 0. *----------------------------------------------------------------------------*/ -flag float64_is_nan( float64 a ) +flag float64_is_nan( float64 *a ) { return - ( 0xFFE00000 <= (bits32) ( a.high<<1 ) ) - && ( a.low || ( a.high & 0x000FFFFF ) ); + ( 0xFFE00000 <= (bits32) ( a->high<<1 ) ) + && ( a->low || ( a->high & 0x000FFFFF ) ); } @@ -170,12 +170,12 @@ flag float64_is_nan( float64 a ) | NaN; otherwise returns 0. *----------------------------------------------------------------------------*/ -flag float64_is_signaling_nan( float64 a ) +flag float64_is_signaling_nan( float64 *a ) { return - ( ( ( a.high>>19 ) & 0xFFF ) == 0xFFE ) - && ( a.low || ( a.high & 0x0007FFFF ) ); + ( ( ( a->high>>19 ) & 0xFFF ) == 0xFFE ) + && ( a->low || ( a->high & 0x0007FFFF ) ); } @@ -185,14 +185,12 @@ flag float64_is_signaling_nan( float64 a ) | exception is raised. *----------------------------------------------------------------------------*/ -static commonNaNT float64ToCommonNaN( float64 a ) +static void float64ToCommonNaN( commonNaNT *dst, float64 *a ) { - commonNaNT z; if ( float64_is_signaling_nan( a ) ) float_raise( float_flag_invalid ); - z.sign = a.high>>31; - shortShift64Left( a.high, a.low, 12, &z.high, &z.low ); - return z; + dst->sign = a->high>>31; + shortShift64Left( a->high, a->low, 12, &dst->high, &dst->low ); } @@ -201,13 +199,11 @@ static commonNaNT float64ToCommonNaN( float64 a ) | precision floating-point format. *----------------------------------------------------------------------------*/ -static float64 commonNaNToFloat64( commonNaNT a ) +static void commonNaNToFloat64( float64 *dst, commonNaNT *a ) { - float64 z; - shift64Right( a.high, a.low, 12, &z.high, &z.low ); - z.high |= ( ( (bits32) a.sign )<<31 ) | 0x7FF80000; - return z; + shift64Right( a->high, a->low, 12, &dst->high, &dst->low ); + dst->high |= ( ( (bits32) a->sign )<<31 ) | 0x7FF80000; } @@ -217,22 +213,53 @@ static float64 commonNaNToFloat64( commonNaNT a ) | signaling NaN, the invalid exception is raised. *----------------------------------------------------------------------------*/ -static float64 propagateFloat64NaN( float64 a, float64 b ) +static void propagateFloat64NaN( float64 *dst, float64 *a, float64 *b ) { flag aIsNaN, aIsSignalingNaN, bIsNaN, bIsSignalingNaN; - - aIsNaN = float64_is_nan( a ); - aIsSignalingNaN = float64_is_signaling_nan( a ); - bIsNaN = float64_is_nan( b ); - bIsSignalingNaN = float64_is_signaling_nan( b ); - a.high |= 0x00080000; - b.high |= 0x00080000; + float64 a2, b2; + a2 = *a; + b2 = *b; + + aIsNaN = float64_is_nan( &a2 ); + aIsSignalingNaN = float64_is_signaling_nan( &a2 ); + bIsNaN = float64_is_nan( &b2 ); + bIsSignalingNaN = float64_is_signaling_nan( &b2 ); + a2.high |= 0x00080000; + b2.high |= 0x00080000; if ( aIsSignalingNaN | bIsSignalingNaN ) float_raise( float_flag_invalid ); - if ( aIsNaN ) { - return ( aIsSignalingNaN & bIsNaN ) ? b : a; + if ( aIsSignalingNaN ) { + if ( bIsSignalingNaN ) goto returnLargerSignificand; + if ( bIsNaN ) { + *dst = b2; + } else { + *dst = a2; + } + return; + } + else if ( aIsNaN ) { + if ( bIsSignalingNaN | ! bIsNaN ) { + *dst = a2; + return; + } + returnLargerSignificand: + if ( lt64( a2.high<<1, a2.low, b2.high<<1, b2.low ) ) { + *dst = b2; + return; + } + if ( lt64( b2.high<<1, b2.low, a2.high<<1, a2.low ) ) { + *dst = a2; + return; + } + + if ( a2.high < b2.high ) { + *dst = a2; + } else { + *dst = b2; + } + return; } else { - return b; + *dst = b2; } } From 520301f79578984ae18c7e27d2c9f98578fcf6a7 Mon Sep 17 00:00:00 2001 From: James Baldwin-Brown Date: Mon, 29 Apr 2024 15:33:14 -0600 Subject: [PATCH 3/3] fixed dangling space in specialize.h --- libsrc/float/softfloat/specialize.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/libsrc/float/softfloat/specialize.h b/libsrc/float/softfloat/specialize.h index 6982309483..68e6ad1464 100644 --- a/libsrc/float/softfloat/specialize.h +++ b/libsrc/float/softfloat/specialize.h @@ -253,7 +253,7 @@ static void propagateFloat64NaN( float64 *dst, float64 *a, float64 *b ) if ( a2.high < b2.high ) { *dst = a2; - } else { + } else { *dst = b2; } return;