From c51c1ce2ed731188a977b0654a66e929982d56d4 Mon Sep 17 00:00:00 2001 From: richagadgil Date: Thu, 10 Oct 2024 17:53:07 -0500 Subject: [PATCH 01/58] first pass at integrating generic float --- src/include/migraphx/half.hpp | 263 ++++++++++++++++++++++++++++++++-- 1 file changed, 252 insertions(+), 11 deletions(-) diff --git a/src/include/migraphx/half.hpp b/src/include/migraphx/half.hpp index 3296e8c328d..3f63f0b52a5 100644 --- a/src/include/migraphx/half.hpp +++ b/src/include/migraphx/half.hpp @@ -31,8 +31,248 @@ namespace migraphx { inline namespace MIGRAPHX_INLINE_NS { +namespace half { -using half = half_float::half; +template +constexpr unsigned int all_ones() noexcept +{ + return (1 << N) - 1; +} + +struct float16_parts +{ + unsigned int mantissa : 10; + unsigned int exponent : 5; + unsigned int sign : 1; + + static constexpr unsigned int mantissa_width() + { + return 23; + } + + static constexpr unsigned int max_exponent() + { + return all_ones<5>(); + } + + static constexpr int exponent_bias() + { + return all_ones<4>(); + } + + constexpr float to_float() const noexcept + { + return migraphx::bit_cast(*this); + } +}; + +constexpr float16_parts get_parts(float f) +{ + return migraphx::bit_cast(f); +} + +template +struct generic_float +{ + unsigned int mantissa : MantissaSize; + unsigned int exponent : ExponentSize; + unsigned int sign : 1; + + static constexpr int exponent_bias() + { + return all_ones(); + } + + explicit generic_float(float f = 0.0) noexcept + { + from_float(get_parts(f)); + } + + constexpr float to_float() const noexcept + { + float16_parts f{}; + f.sign = sign; + f.mantissa = mantissa << (float16_parts::mantissa_width() - MantissaSize); + if(exponent == all_ones()) + { + f.exponent = float16_parts::max_exponent(); + } + else + { + constexpr const auto diff = float16_parts::exponent_bias() - exponent_bias(); + f.exponent = exponent + diff; + } + return f.to_float(); + } + + constexpr void from_float(float16_parts f) noexcept + { + sign = f.sign; + mantissa = f.mantissa >> (float16_parts::mantissa_width() - MantissaSize); + + if(f.exponent == 0) + { + exponent = 0; + } + else if(f.exponent == float16_parts::max_exponent()) + { + exponent = all_ones(); + } + else + { + constexpr const int diff = float16_parts::exponent_bias() - exponent_bias(); + auto e = int(f.exponent) - diff; + if(e >= all_ones()) + { + exponent = all_ones(); + mantissa = 0; + } + else if(e < 0) + { + exponent = 0; + mantissa = 0; + } + else + { + exponent = f.exponent - diff; + } + } + + exponent = std::min(f.exponent, all_ones()); + } + + constexpr bool is_normal() const noexcept + { + return exponent != all_ones() and exponent != 0; + } + + constexpr bool is_inf() const noexcept + { + return exponent == all_ones() and mantissa == 0; + } + + constexpr bool is_nan() const noexcept + { + return exponent == all_ones() and mantissa != 0; + } + + constexpr bool is_finite() const noexcept + { + return exponent != all_ones(); + } + + constexpr operator float() const noexcept + { + return this->to_float(); + } + + static constexpr generic_float infinity() + { + generic_float x{}; + x.exponent = all_ones(); + return x; + } + + static constexpr generic_float snan() + { + generic_float x{}; + x.exponent = all_ones(); + x.mantissa = 1 << (MantissaSize - 2); + return x; + } + + static constexpr generic_float qnan() + { + generic_float x{}; + x.exponent = all_ones(); + x.mantissa = 1 << (MantissaSize - 1); + return x; + } + + static constexpr generic_float min() + { + generic_float x{}; + x.exponent = 1; + x.mantissa = 0; + return x; + } + + static constexpr generic_float denorm_min() + { + generic_float x{}; + x.exponent = 0; + x.mantissa = 1; + x.sign = 0; + return x; + } + + static constexpr generic_float lowest() + { + generic_float x{}; + x.exponent = all_ones() - 1; + x.mantissa = all_ones(); + x.sign = 1; + return x; + } + + static constexpr generic_float max() + { + generic_float x{}; + x.exponent = all_ones() - 1; + x.mantissa = all_ones(); + x.sign = 0; + return x; + } + + static constexpr generic_float epsilon() + { + generic_float x{1.0}; + x.mantissa++; + return generic_float{x.to_float() - 1.0f}; + } +// NOLINTNEXTLINE +#define MIGRAPHX_GENERIC_FLOAT_ASSIGN_OP(op) \ + constexpr generic_float& operator op(const generic_float& rhs) \ + { \ + float self = *this; \ + float frhs = rhs; \ + self op frhs; \ + *this = generic_float(self); \ + return *this; \ + } + MIGRAPHX_GENERIC_FLOAT_ASSIGN_OP(*=) + MIGRAPHX_GENERIC_FLOAT_ASSIGN_OP(-=) + MIGRAPHX_GENERIC_FLOAT_ASSIGN_OP(+=) + MIGRAPHX_GENERIC_FLOAT_ASSIGN_OP(/=) +// NOLINTNEXTLINE +#define MIGRAPHX_GENERIC_FLOAT_BINARY_OP(op) \ + friend constexpr generic_float operator op(const generic_float& x, const generic_float& y) \ + { \ + return generic_float(float(x) op float(y)); \ + } + MIGRAPHX_GENERIC_FLOAT_BINARY_OP(*) + MIGRAPHX_GENERIC_FLOAT_BINARY_OP(-) + MIGRAPHX_GENERIC_FLOAT_BINARY_OP(+) + MIGRAPHX_GENERIC_FLOAT_BINARY_OP(/) + MIGRAPHX_GENERIC_FLOAT_BINARY_OP(<) + MIGRAPHX_GENERIC_FLOAT_BINARY_OP(<=) + MIGRAPHX_GENERIC_FLOAT_BINARY_OP(>) + MIGRAPHX_GENERIC_FLOAT_BINARY_OP(>=) + + friend constexpr generic_float operator==(const generic_float& x, const generic_float& y) + { + if (not x.is_finite() or not y.is_finite()) + return false; + return std::tie(x.mantissa, x.exponent, x.sign) == std::tie(y.mantissa, y.exponent, y.sign); + } + + friend constexpr generic_float operator!=(const generic_float& x, const generic_float& y) + { + return not(x == y); + } +}; + +using half = migraphx::half::generic_float<10, 5>; namespace detail { template @@ -53,61 +293,62 @@ struct deduce template using deduce = typename detail::deduce::type; +} // namespace half } // namespace MIGRAPHX_INLINE_NS } // namespace migraphx namespace std { template -struct common_type : std::common_type // NOLINT +struct common_type : std::common_type // NOLINT { }; template -struct common_type : std::common_type // NOLINT +struct common_type : std::common_type // NOLINT { }; template <> -struct common_type +struct common_type { using type = float; }; template <> -struct common_type +struct common_type { using type = float; }; template <> -struct common_type +struct common_type { using type = float; }; template <> -struct common_type +struct common_type { using type = float; }; template <> -struct common_type +struct common_type { using type = float; }; template <> -struct common_type +struct common_type { using type = float; }; template <> -struct common_type +struct common_type { - using type = migraphx::half; + using type = migraphx::half::half; }; } // namespace std From 134b408b7f0940ec7972809884af3c17ab20f905 Mon Sep 17 00:00:00 2001 From: richagadgil Date: Thu, 10 Oct 2024 18:03:01 -0500 Subject: [PATCH 02/58] fix namespaces --- src/include/migraphx/half.hpp | 24 +++++++++++------------- 1 file changed, 11 insertions(+), 13 deletions(-) diff --git a/src/include/migraphx/half.hpp b/src/include/migraphx/half.hpp index 3f63f0b52a5..6f37f1814d7 100644 --- a/src/include/migraphx/half.hpp +++ b/src/include/migraphx/half.hpp @@ -31,7 +31,6 @@ namespace migraphx { inline namespace MIGRAPHX_INLINE_NS { -namespace half { template constexpr unsigned int all_ones() noexcept @@ -272,7 +271,7 @@ struct generic_float } }; -using half = migraphx::half::generic_float<10, 5>; +using half = migraphx::generic_float<10, 5>; namespace detail { template @@ -293,62 +292,61 @@ struct deduce template using deduce = typename detail::deduce::type; -} // namespace half } // namespace MIGRAPHX_INLINE_NS } // namespace migraphx namespace std { template -struct common_type : std::common_type // NOLINT +struct common_type : std::common_type // NOLINT { }; template -struct common_type : std::common_type // NOLINT +struct common_type : std::common_type // NOLINT { }; template <> -struct common_type +struct common_type { using type = float; }; template <> -struct common_type +struct common_type { using type = float; }; template <> -struct common_type +struct common_type { using type = float; }; template <> -struct common_type +struct common_type { using type = float; }; template <> -struct common_type +struct common_type { using type = float; }; template <> -struct common_type +struct common_type { using type = float; }; template <> -struct common_type +struct common_type { - using type = migraphx::half::half; + using type = migraphx::half; }; } // namespace std From d4fa6eb210179b2dd58495710d62cc870189bb9e Mon Sep 17 00:00:00 2001 From: richagadgil Date: Thu, 10 Oct 2024 18:37:51 -0500 Subject: [PATCH 03/58] fix mantissa --- src/include/migraphx/half.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/include/migraphx/half.hpp b/src/include/migraphx/half.hpp index 6f37f1814d7..582550ecf22 100644 --- a/src/include/migraphx/half.hpp +++ b/src/include/migraphx/half.hpp @@ -46,7 +46,7 @@ struct float16_parts static constexpr unsigned int mantissa_width() { - return 23; + return 10; } static constexpr unsigned int max_exponent() From 0b60841f2bfeba29d3533faf67fc02ac03536995 Mon Sep 17 00:00:00 2001 From: richagadgil Date: Thu, 10 Oct 2024 19:04:17 -0500 Subject: [PATCH 04/58] refactor --- src/include/migraphx/generic_float.hpp | 262 +++++++++++++++++++++++++ src/include/migraphx/half.hpp | 242 +---------------------- 2 files changed, 264 insertions(+), 240 deletions(-) create mode 100644 src/include/migraphx/generic_float.hpp diff --git a/src/include/migraphx/generic_float.hpp b/src/include/migraphx/generic_float.hpp new file mode 100644 index 00000000000..baad2c900a9 --- /dev/null +++ b/src/include/migraphx/generic_float.hpp @@ -0,0 +1,262 @@ +/* + * The MIT License (MIT) + * + * Copyright (c) 2015-2024 Advanced Micro Devices, Inc. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +template +constexpr unsigned int all_ones() noexcept +{ + return (1 << N) - 1; +} + +struct float32_parts +{ + unsigned int mantissa : 23; + unsigned int exponent : 8; + unsigned int sign : 1; + + static constexpr unsigned int mantissa_width() + { + return 23; + } + + static constexpr unsigned int max_exponent() + { + return all_ones<8>(); + } + + static constexpr int exponent_bias() + { + return all_ones<7>(); + } + + constexpr float to_float() const noexcept + { + return migraphx::bit_cast(*this); + } +}; + +constexpr float32_parts get_parts(float f) +{ + return migraphx::bit_cast(f); +} + +template +struct generic_float +{ + unsigned int mantissa : MantissaSize; + unsigned int exponent : ExponentSize; + unsigned int sign : 1; + + static constexpr int exponent_bias() + { + return all_ones(); + } + + explicit generic_float(float f = 0.0) noexcept + { + from_float(get_parts(f)); + } + + constexpr float to_float() const noexcept + { + float32_parts f{}; + f.sign = sign; + f.mantissa = mantissa << (float32_parts::mantissa_width() - MantissaSize); + if(exponent == all_ones()) + { + f.exponent = float32_parts::max_exponent(); + } + else + { + constexpr const auto diff = float32_parts::exponent_bias() - exponent_bias(); + f.exponent = exponent + diff; + } + return f.to_float(); + } + + constexpr void from_float(float32_parts f) noexcept + { + sign = f.sign; + mantissa = f.mantissa >> (float32_parts::mantissa_width() - MantissaSize); + + if(f.exponent == 0) + { + exponent = 0; + } + else if(f.exponent == float32_parts::max_exponent()) + { + exponent = all_ones(); + } + else + { + constexpr const int diff = float32_parts::exponent_bias() - exponent_bias(); + auto e = int(f.exponent) - diff; + if(e >= all_ones()) + { + exponent = all_ones(); + mantissa = 0; + } + else if(e < 0) + { + exponent = 0; + mantissa = 0; + } + else + { + exponent = f.exponent - diff; + } + } + + exponent = std::min(f.exponent, all_ones()); + } + + constexpr bool is_normal() const noexcept + { + return exponent != all_ones() and exponent != 0; + } + + constexpr bool is_inf() const noexcept + { + return exponent == all_ones() and mantissa == 0; + } + + constexpr bool is_nan() const noexcept + { + return exponent == all_ones() and mantissa != 0; + } + + constexpr bool is_finite() const noexcept + { + return exponent != all_ones(); + } + + constexpr operator float() const noexcept + { + return this->to_float(); + } + + static constexpr generic_float infinity() + { + generic_float x{}; + x.exponent = all_ones(); + return x; + } + + static constexpr generic_float snan() + { + generic_float x{}; + x.exponent = all_ones(); + x.mantissa = 1 << (MantissaSize - 2); + return x; + } + + static constexpr generic_float qnan() + { + generic_float x{}; + x.exponent = all_ones(); + x.mantissa = 1 << (MantissaSize - 1); + return x; + } + + static constexpr generic_float min() + { + generic_float x{}; + x.exponent = 1; + x.mantissa = 0; + return x; + } + + static constexpr generic_float denorm_min() + { + generic_float x{}; + x.exponent = 0; + x.mantissa = 1; + x.sign = 0; + return x; + } + + static constexpr generic_float lowest() + { + generic_float x{}; + x.exponent = all_ones() - 1; + x.mantissa = all_ones(); + x.sign = 1; + return x; + } + + static constexpr generic_float max() + { + generic_float x{}; + x.exponent = all_ones() - 1; + x.mantissa = all_ones(); + x.sign = 0; + return x; + } + + static constexpr generic_float epsilon() + { + generic_float x{1.0}; + x.mantissa++; + return generic_float{x.to_float() - 1.0f}; + } +// NOLINTNEXTLINE +#define MIGRAPHX_GENERIC_FLOAT_ASSIGN_OP(op) \ + constexpr generic_float& operator op(const generic_float& rhs) \ + { \ + float self = *this; \ + float frhs = rhs; \ + self op frhs; \ + *this = generic_float(self); \ + return *this; \ + } + MIGRAPHX_GENERIC_FLOAT_ASSIGN_OP(*=) + MIGRAPHX_GENERIC_FLOAT_ASSIGN_OP(-=) + MIGRAPHX_GENERIC_FLOAT_ASSIGN_OP(+=) + MIGRAPHX_GENERIC_FLOAT_ASSIGN_OP(/=) +// NOLINTNEXTLINE +#define MIGRAPHX_GENERIC_FLOAT_BINARY_OP(op) \ + friend constexpr generic_float operator op(const generic_float& x, const generic_float& y) \ + { \ + return generic_float(float(x) op float(y)); \ + } + MIGRAPHX_GENERIC_FLOAT_BINARY_OP(*) + MIGRAPHX_GENERIC_FLOAT_BINARY_OP(-) + MIGRAPHX_GENERIC_FLOAT_BINARY_OP(+) + MIGRAPHX_GENERIC_FLOAT_BINARY_OP(/) + MIGRAPHX_GENERIC_FLOAT_BINARY_OP(<) + MIGRAPHX_GENERIC_FLOAT_BINARY_OP(<=) + MIGRAPHX_GENERIC_FLOAT_BINARY_OP(>) + MIGRAPHX_GENERIC_FLOAT_BINARY_OP(>=) + + friend constexpr generic_float operator==(const generic_float& x, const generic_float& y) + { + if (not x.is_finite() or not y.is_finite()) + return false; + return std::tie(x.mantissa, x.exponent, x.sign) == std::tie(y.mantissa, y.exponent, y.sign); + } + + friend constexpr generic_float operator!=(const generic_float& x, const generic_float& y) + { + return not(x == y); + } +}; diff --git a/src/include/migraphx/half.hpp b/src/include/migraphx/half.hpp index 582550ecf22..0a93d6a237b 100644 --- a/src/include/migraphx/half.hpp +++ b/src/include/migraphx/half.hpp @@ -28,250 +28,12 @@ #include #include #include +#include namespace migraphx { inline namespace MIGRAPHX_INLINE_NS { -template -constexpr unsigned int all_ones() noexcept -{ - return (1 << N) - 1; -} - -struct float16_parts -{ - unsigned int mantissa : 10; - unsigned int exponent : 5; - unsigned int sign : 1; - - static constexpr unsigned int mantissa_width() - { - return 10; - } - - static constexpr unsigned int max_exponent() - { - return all_ones<5>(); - } - - static constexpr int exponent_bias() - { - return all_ones<4>(); - } - - constexpr float to_float() const noexcept - { - return migraphx::bit_cast(*this); - } -}; - -constexpr float16_parts get_parts(float f) -{ - return migraphx::bit_cast(f); -} - -template -struct generic_float -{ - unsigned int mantissa : MantissaSize; - unsigned int exponent : ExponentSize; - unsigned int sign : 1; - - static constexpr int exponent_bias() - { - return all_ones(); - } - - explicit generic_float(float f = 0.0) noexcept - { - from_float(get_parts(f)); - } - - constexpr float to_float() const noexcept - { - float16_parts f{}; - f.sign = sign; - f.mantissa = mantissa << (float16_parts::mantissa_width() - MantissaSize); - if(exponent == all_ones()) - { - f.exponent = float16_parts::max_exponent(); - } - else - { - constexpr const auto diff = float16_parts::exponent_bias() - exponent_bias(); - f.exponent = exponent + diff; - } - return f.to_float(); - } - - constexpr void from_float(float16_parts f) noexcept - { - sign = f.sign; - mantissa = f.mantissa >> (float16_parts::mantissa_width() - MantissaSize); - - if(f.exponent == 0) - { - exponent = 0; - } - else if(f.exponent == float16_parts::max_exponent()) - { - exponent = all_ones(); - } - else - { - constexpr const int diff = float16_parts::exponent_bias() - exponent_bias(); - auto e = int(f.exponent) - diff; - if(e >= all_ones()) - { - exponent = all_ones(); - mantissa = 0; - } - else if(e < 0) - { - exponent = 0; - mantissa = 0; - } - else - { - exponent = f.exponent - diff; - } - } - - exponent = std::min(f.exponent, all_ones()); - } - - constexpr bool is_normal() const noexcept - { - return exponent != all_ones() and exponent != 0; - } - - constexpr bool is_inf() const noexcept - { - return exponent == all_ones() and mantissa == 0; - } - - constexpr bool is_nan() const noexcept - { - return exponent == all_ones() and mantissa != 0; - } - - constexpr bool is_finite() const noexcept - { - return exponent != all_ones(); - } - - constexpr operator float() const noexcept - { - return this->to_float(); - } - - static constexpr generic_float infinity() - { - generic_float x{}; - x.exponent = all_ones(); - return x; - } - - static constexpr generic_float snan() - { - generic_float x{}; - x.exponent = all_ones(); - x.mantissa = 1 << (MantissaSize - 2); - return x; - } - - static constexpr generic_float qnan() - { - generic_float x{}; - x.exponent = all_ones(); - x.mantissa = 1 << (MantissaSize - 1); - return x; - } - - static constexpr generic_float min() - { - generic_float x{}; - x.exponent = 1; - x.mantissa = 0; - return x; - } - - static constexpr generic_float denorm_min() - { - generic_float x{}; - x.exponent = 0; - x.mantissa = 1; - x.sign = 0; - return x; - } - - static constexpr generic_float lowest() - { - generic_float x{}; - x.exponent = all_ones() - 1; - x.mantissa = all_ones(); - x.sign = 1; - return x; - } - - static constexpr generic_float max() - { - generic_float x{}; - x.exponent = all_ones() - 1; - x.mantissa = all_ones(); - x.sign = 0; - return x; - } - - static constexpr generic_float epsilon() - { - generic_float x{1.0}; - x.mantissa++; - return generic_float{x.to_float() - 1.0f}; - } -// NOLINTNEXTLINE -#define MIGRAPHX_GENERIC_FLOAT_ASSIGN_OP(op) \ - constexpr generic_float& operator op(const generic_float& rhs) \ - { \ - float self = *this; \ - float frhs = rhs; \ - self op frhs; \ - *this = generic_float(self); \ - return *this; \ - } - MIGRAPHX_GENERIC_FLOAT_ASSIGN_OP(*=) - MIGRAPHX_GENERIC_FLOAT_ASSIGN_OP(-=) - MIGRAPHX_GENERIC_FLOAT_ASSIGN_OP(+=) - MIGRAPHX_GENERIC_FLOAT_ASSIGN_OP(/=) -// NOLINTNEXTLINE -#define MIGRAPHX_GENERIC_FLOAT_BINARY_OP(op) \ - friend constexpr generic_float operator op(const generic_float& x, const generic_float& y) \ - { \ - return generic_float(float(x) op float(y)); \ - } - MIGRAPHX_GENERIC_FLOAT_BINARY_OP(*) - MIGRAPHX_GENERIC_FLOAT_BINARY_OP(-) - MIGRAPHX_GENERIC_FLOAT_BINARY_OP(+) - MIGRAPHX_GENERIC_FLOAT_BINARY_OP(/) - MIGRAPHX_GENERIC_FLOAT_BINARY_OP(<) - MIGRAPHX_GENERIC_FLOAT_BINARY_OP(<=) - MIGRAPHX_GENERIC_FLOAT_BINARY_OP(>) - MIGRAPHX_GENERIC_FLOAT_BINARY_OP(>=) - - friend constexpr generic_float operator==(const generic_float& x, const generic_float& y) - { - if (not x.is_finite() or not y.is_finite()) - return false; - return std::tie(x.mantissa, x.exponent, x.sign) == std::tie(y.mantissa, y.exponent, y.sign); - } - - friend constexpr generic_float operator!=(const generic_float& x, const generic_float& y) - { - return not(x == y); - } -}; - -using half = migraphx::generic_float<10, 5>; +using half = generic_float<10,5>; namespace detail { template From 7a646f1b2fc77cedda6789190363c8ce4a16050d Mon Sep 17 00:00:00 2001 From: richagadgil Date: Fri, 11 Oct 2024 14:53:09 -0500 Subject: [PATCH 05/58] refactor --- src/include/migraphx/generic_float.hpp | 6 +++ src/include/migraphx/half.hpp | 61 +++++++++++--------------- 2 files changed, 31 insertions(+), 36 deletions(-) diff --git a/src/include/migraphx/generic_float.hpp b/src/include/migraphx/generic_float.hpp index baad2c900a9..6599ab0df94 100644 --- a/src/include/migraphx/generic_float.hpp +++ b/src/include/migraphx/generic_float.hpp @@ -22,6 +22,9 @@ * THE SOFTWARE. */ +namespace migraphx { +inline namespace MIGRAPHX_INLINE_NS { + template constexpr unsigned int all_ones() noexcept { @@ -260,3 +263,6 @@ struct generic_float return not(x == y); } }; + +} +} diff --git a/src/include/migraphx/half.hpp b/src/include/migraphx/half.hpp index 0a93d6a237b..013bc6c6394 100644 --- a/src/include/migraphx/half.hpp +++ b/src/include/migraphx/half.hpp @@ -33,7 +33,7 @@ namespace migraphx { inline namespace MIGRAPHX_INLINE_NS { -using half = generic_float<10,5>; +using half = migraphx::generic_float<10,5>; namespace detail { template @@ -59,56 +59,45 @@ using deduce = typename detail::deduce::type; namespace std { -template -struct common_type : std::common_type // NOLINT +template +class numeric_limits> { -}; + public: + static constexpr bool has_infinity = false; + static constexpr migraphx::generic_float epsilon() { return migraphx::generic_float::epsilon(); } -template -struct common_type : std::common_type // NOLINT -{ -}; + static constexpr migraphx::generic_float quiet_NaN() { return migraphx::generic_float::quiet_NaN(); } -template <> -struct common_type -{ - using type = float; -}; + static constexpr migraphx::generic_float max() { return migraphx::generic_float::max(); } -template <> -struct common_type -{ - using type = float; -}; + static constexpr migraphx::generic_float min() { return migraphx::generic_float::min(); } -template <> -struct common_type -{ - using type = float; -}; + static constexpr migraphx::generic_float lowest() { return migraphx::generic_float::lowest(); } -template <> -struct common_type -{ - using type = float; }; -template <> -struct common_type +template +struct common_type, T> : std::common_type // NOLINT { - using type = float; }; -template <> -struct common_type +template +struct common_type> : std::common_type // NOLINT { - using type = float; }; -template <> -struct common_type +template +struct common_type, migraphx::fp8::float8> : std::common_type +{}; + +template +struct common_type, migraphx::generic_float> : std::common_type +{}; + +template +struct common_type, migraphx::generic_float> { - using type = migraphx::half; + using type = migraphx::generic_float; }; } // namespace std From ebe819b619a38840f6b06bca88de523df3028afa Mon Sep 17 00:00:00 2001 From: richagadgil Date: Fri, 11 Oct 2024 15:13:00 -0500 Subject: [PATCH 06/58] add fp --- src/include/migraphx/half.hpp | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/src/include/migraphx/half.hpp b/src/include/migraphx/half.hpp index 013bc6c6394..df1a03b095c 100644 --- a/src/include/migraphx/half.hpp +++ b/src/include/migraphx/half.hpp @@ -54,6 +54,12 @@ struct deduce template using deduce = typename detail::deduce::type; +template +struct is_floating_point : std::false_type {}; + +template +struct is_floating_point> : std::true_type {}; + } // namespace MIGRAPHX_INLINE_NS } // namespace migraphx From 379a77a16ada3df39e39b9a0a3922d4898cdcec1 Mon Sep 17 00:00:00 2001 From: richagadgil Date: Mon, 14 Oct 2024 17:04:13 -0500 Subject: [PATCH 07/58] fixed generic float class --- src/include/migraphx/generic_float.hpp | 88 +++++++++++++++++++++++++- src/include/migraphx/half.hpp | 65 ++++++++++--------- 2 files changed, 120 insertions(+), 33 deletions(-) diff --git a/src/include/migraphx/generic_float.hpp b/src/include/migraphx/generic_float.hpp index 6599ab0df94..57697e0fd6f 100644 --- a/src/include/migraphx/generic_float.hpp +++ b/src/include/migraphx/generic_float.hpp @@ -22,6 +22,11 @@ * THE SOFTWARE. */ +#include +#include +#include +#include + namespace migraphx { inline namespace MIGRAPHX_INLINE_NS { @@ -75,11 +80,25 @@ struct generic_float return all_ones(); } - explicit generic_float(float f = 0.0) noexcept + explicit constexpr generic_float(float f = 0.0) noexcept + { + from_float(get_parts(f)); + } + + constexpr generic_float &operator=(float f) noexcept { from_float(get_parts(f)); + return *this; } + constexpr generic_float operator-() const noexcept + { + generic_float result = *this; + result.sign = !this->sign; + return result; + } + + constexpr float to_float() const noexcept { float32_parts f{}; @@ -251,18 +270,81 @@ struct generic_float MIGRAPHX_GENERIC_FLOAT_BINARY_OP(>) MIGRAPHX_GENERIC_FLOAT_BINARY_OP(>=) - friend constexpr generic_float operator==(const generic_float& x, const generic_float& y) + friend constexpr bool operator==(const generic_float& x, const generic_float& y) { if (not x.is_finite() or not y.is_finite()) return false; return std::tie(x.mantissa, x.exponent, x.sign) == std::tie(y.mantissa, y.exponent, y.sign); } - friend constexpr generic_float operator!=(const generic_float& x, const generic_float& y) + friend constexpr bool operator!=(const generic_float& x, const generic_float& y) { return not(x == y); } }; + } } + +namespace std { + +template +class numeric_limits> +{ + public: + static constexpr bool has_infinity = true; + static constexpr migraphx::generic_float epsilon() { return migraphx::generic_float::epsilon(); } + + static constexpr migraphx::generic_float quiet_NaN() { return migraphx::generic_float::qnan(); } + + static constexpr migraphx::generic_float max() { return migraphx::generic_float::max(); } + + static constexpr migraphx::generic_float min() { return migraphx::generic_float::min(); } + + static constexpr migraphx::generic_float lowest() { return migraphx::generic_float::lowest(); } + + static constexpr migraphx::generic_float infinity() { return migraphx::generic_float::infinity(); } + +}; + +template +struct common_type, T> : std::common_type // NOLINT +{ +}; + +template +struct common_type> : std::common_type // NOLINT +{ +}; + +// template +// struct common_type, migraphx::fp8::float8> : std::common_type +// {}; + +// template +// struct common_type, migraphx::generic_float> : std::common_type +// {}; + +// template +// struct common_type, migraphx::fp8::float8> : std::common_type +// {}; + +// template +// struct common_type, migraphx::generic_float> : std::common_type +// {}; + +template +struct common_type, migraphx::generic_float> +{ + using type = migraphx::generic_float; +}; + +// template +// struct common_type, migraphx::generic_float> +// { +// using type = float; +// }; + + +} diff --git a/src/include/migraphx/half.hpp b/src/include/migraphx/half.hpp index df1a03b095c..5b7b0607fd8 100644 --- a/src/include/migraphx/half.hpp +++ b/src/include/migraphx/half.hpp @@ -54,56 +54,61 @@ struct deduce template using deduce = typename detail::deduce::type; -template -struct is_floating_point : std::false_type {}; - -template -struct is_floating_point> : std::true_type {}; - } // namespace MIGRAPHX_INLINE_NS } // namespace migraphx namespace std { -template -class numeric_limits> +template +struct common_type : std::common_type // NOLINT { - public: - static constexpr bool has_infinity = false; - static constexpr migraphx::generic_float epsilon() { return migraphx::generic_float::epsilon(); } - - static constexpr migraphx::generic_float quiet_NaN() { return migraphx::generic_float::quiet_NaN(); } - - static constexpr migraphx::generic_float max() { return migraphx::generic_float::max(); } +}; - static constexpr migraphx::generic_float min() { return migraphx::generic_float::min(); } +template +struct common_type : std::common_type // NOLINT +{ +}; - static constexpr migraphx::generic_float lowest() { return migraphx::generic_float::lowest(); } +template <> +struct common_type +{ + using type = float; +}; +template <> +struct common_type +{ + using type = float; }; -template -struct common_type, T> : std::common_type // NOLINT +template <> +struct common_type { + using type = float; }; -template -struct common_type> : std::common_type // NOLINT +template <> +struct common_type { + using type = float; }; -template -struct common_type, migraphx::fp8::float8> : std::common_type -{}; +template <> +struct common_type +{ + using type = float; +}; -template -struct common_type, migraphx::generic_float> : std::common_type -{}; +template <> +struct common_type +{ + using type = float; +}; -template -struct common_type, migraphx::generic_float> +template <> +struct common_type { - using type = migraphx::generic_float; + using type = migraphx::half; }; } // namespace std From 174384ca7df47391fb802610ad68a7a3857521c2 Mon Sep 17 00:00:00 2001 From: richagadgil Date: Mon, 14 Oct 2024 18:30:07 -0500 Subject: [PATCH 08/58] add fp32 test --- test/float32.cpp | 55 ++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 55 insertions(+) create mode 100644 test/float32.cpp diff --git a/test/float32.cpp b/test/float32.cpp new file mode 100644 index 00000000000..12256eff38c --- /dev/null +++ b/test/float32.cpp @@ -0,0 +1,55 @@ +/* + * The MIT License (MIT) + * + * Copyright (c) 2015-2024 Advanced Micro Devices, Inc. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ +#include +#include +#include +#include +#include "test.hpp" + +#include + +using fp32 = migraphx::generic_float<23, 8>; + +#define CHECK_FLOAT(x, y) \ + CHECK(migraphx::float_equal(x, y)); \ + CHECK(migraphx::float_equal(x, y.to_float())); \ + CHECK(migraphx::float_equal(fp32{x}, y)); \ + CHECK(migraphx::float_equal(fp32{x}.to_float(), y.to_float())) + + +TEST_CASE(fp32_values) +{ + CHECK_FLOAT(1.0f, fp32{1.0f}); + CHECK_FLOAT(-1.0f, fp32{-1.0f}); + CHECK_FLOAT(std::numeric_limits::min(), fp32::min()); + CHECK_FLOAT(std::numeric_limits::lowest(), fp32::lowest()); + CHECK_FLOAT(std::numeric_limits::max(), fp32::max()); + CHECK_FLOAT(std::numeric_limits::epsilon(), fp32::epsilon()); + CHECK_FLOAT(std::numeric_limits::infinity(), fp32::infinity()); + CHECK_FLOAT(std::numeric_limits::quiet_NaN(), fp32::qnan()); + CHECK_FLOAT(std::numeric_limits::signaling_NaN(), fp32::snan()); + CHECK_FLOAT(std::numeric_limits::denorm_min(), fp32::denorm_min()); +} + +int main(int argc, const char* argv[]) { test::run(argc, argv); } From 787b651868528d0393812a50f31aad51be081f8d Mon Sep 17 00:00:00 2001 From: richagadgil Date: Mon, 14 Oct 2024 18:31:24 -0500 Subject: [PATCH 09/58] remove import --- test/float32.cpp | 1 - 1 file changed, 1 deletion(-) diff --git a/test/float32.cpp b/test/float32.cpp index 12256eff38c..5cfa5b916d3 100644 --- a/test/float32.cpp +++ b/test/float32.cpp @@ -23,7 +23,6 @@ */ #include #include -#include #include #include "test.hpp" From 1d1fa1ccf5f473fddfc2af6370d9f38b7c55a8eb Mon Sep 17 00:00:00 2001 From: richagadgil Date: Tue, 15 Oct 2024 14:20:52 -0500 Subject: [PATCH 10/58] update tests --- test/float32.cpp | 38 ++++++++++++++++++++++++++++++++++---- 1 file changed, 34 insertions(+), 4 deletions(-) diff --git a/test/float32.cpp b/test/float32.cpp index 5cfa5b916d3..63b9db489d8 100644 --- a/test/float32.cpp +++ b/test/float32.cpp @@ -25,6 +25,7 @@ #include #include #include "test.hpp" +#include #include @@ -37,7 +38,7 @@ using fp32 = migraphx::generic_float<23, 8>; CHECK(migraphx::float_equal(fp32{x}.to_float(), y.to_float())) -TEST_CASE(fp32_values) +TEST_CASE(fp32_values_working) { CHECK_FLOAT(1.0f, fp32{1.0f}); CHECK_FLOAT(-1.0f, fp32{-1.0f}); @@ -45,10 +46,39 @@ TEST_CASE(fp32_values) CHECK_FLOAT(std::numeric_limits::lowest(), fp32::lowest()); CHECK_FLOAT(std::numeric_limits::max(), fp32::max()); CHECK_FLOAT(std::numeric_limits::epsilon(), fp32::epsilon()); - CHECK_FLOAT(std::numeric_limits::infinity(), fp32::infinity()); - CHECK_FLOAT(std::numeric_limits::quiet_NaN(), fp32::qnan()); - CHECK_FLOAT(std::numeric_limits::signaling_NaN(), fp32::snan()); CHECK_FLOAT(std::numeric_limits::denorm_min(), fp32::denorm_min()); + // CHECK_FLOAT(std::numeric_limits::infinity(), fp32::infinity()); + // CHECK_FLOAT(std::numeric_limits::quiet_NaN(), fp32::qnan()); + // CHECK_FLOAT(std::numeric_limits::signaling_NaN(), fp32::snan()); } +TEST_CASE(test_infinity_1) +{ + float f_inf = std::numeric_limits::infinity(); + float f32_inf = fp32::infinity().to_float(); + EXPECT(f32_inf == f_inf); +} + +TEST_CASE(test_infinity_2) +{ + float f_inf = -1.0 * std::numeric_limits::infinity(); + float f32_inf = -1.0 * fp32::infinity().to_float(); + EXPECT(f32_inf == f_inf); +} + +TEST_CASE(test_snan) +{ + fp32 fp32_snan = fp32::snan(); + EXPECT(fp32_snan.is_nan()); + EXPECT(std::isnan(fp32_snan)); +} + +TEST_CASE(test_qnan) +{ + fp32 fp32_snan = fp32::qnan(); + EXPECT(fp32_snan.is_nan()); + EXPECT(std::isnan(fp32_snan)); +} + + int main(int argc, const char* argv[]) { test::run(argc, argv); } From 179109294c3bcd68b8940a025741089f8ba24975 Mon Sep 17 00:00:00 2001 From: richagadgil Date: Thu, 17 Oct 2024 18:16:10 -0500 Subject: [PATCH 11/58] fp16 tests that work --- src/include/migraphx/generic_float.hpp | 2 +- test/float16.cpp | 85 ++++++++++++++++++++++++++ 2 files changed, 86 insertions(+), 1 deletion(-) create mode 100644 test/float16.cpp diff --git a/src/include/migraphx/generic_float.hpp b/src/include/migraphx/generic_float.hpp index 57697e0fd6f..03f18de37af 100644 --- a/src/include/migraphx/generic_float.hpp +++ b/src/include/migraphx/generic_float.hpp @@ -149,7 +149,7 @@ struct generic_float } } - exponent = std::min(f.exponent, all_ones()); + exponent = std::min(exponent, all_ones()); } constexpr bool is_normal() const noexcept diff --git a/test/float16.cpp b/test/float16.cpp new file mode 100644 index 00000000000..4843338b92e --- /dev/null +++ b/test/float16.cpp @@ -0,0 +1,85 @@ +/* + * The MIT License (MIT) + * + * Copyright (c) 2015-2024 Advanced Micro Devices, Inc. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ +#include +#include +#include +#include "test.hpp" +#define HIP_ENABLE_PRINTF + +#include + +using fp16 = migraphx::half; + +#define CHECK_FLOAT(x, y) \ + CHECK(migraphx::float_equal(x, y)); \ + CHECK(migraphx::float_equal(x, y.to_float())); \ + CHECK(migraphx::float_equal(fp16{x}, y)); \ + CHECK(migraphx::float_equal(fp16{x}.to_float(), y.to_float())) + + +TEST_CASE(fp16_values) +{ + + CHECK_FLOAT(1.0f, fp16{1.0f}); + CHECK_FLOAT(-1.0f, fp16{-1.0f}); + // CHECK_FLOAT(std::numeric_limits::min(), fp16::min()); + // CHECK_FLOAT(std::numeric_limits::lowest(), fp16::lowest()); + // CHECK_FLOAT(std::numeric_limits::max(), fp16::max()); + // CHECK_FLOAT(std::numeric_limits::epsilon(), fp16::epsilon()); + // CHECK_FLOAT(std::numeric_limits::infinity(), fp16::infinity()); + // CHECK_FLOAT(std::numeric_limits::quiet_NaN(), fp16::qnan()); + // CHECK_FLOAT(std::numeric_limits::signaling_NaN(), fp16::snan()); + // CHECK_FLOAT(std::numeric_limits::denorm_min(), fp16::denorm_min()); +} + +TEST_CASE(test_infinity_1) +{ + float f_inf = std::numeric_limits::infinity(); + float f16_inf = fp16::infinity().to_float(); + EXPECT(f16_inf == f_inf); +} + +TEST_CASE(test_infinity_2) +{ + float f_inf = -1.0 * std::numeric_limits::infinity(); + float f16_inf = -1.0 * fp16::infinity().to_float(); + EXPECT(f16_inf == f_inf); +} + +TEST_CASE(test_snan) +{ + fp16 fp16_snan = fp16::snan(); + EXPECT(fp16_snan.is_nan()); + EXPECT(std::isnan(fp16_snan)); +} + +TEST_CASE(test_qnan) +{ + fp16 fp16_snan = fp16::qnan(); + EXPECT(fp16_snan.is_nan()); + EXPECT(std::isnan(fp16_snan)); +} + + +int main(int argc, const char* argv[]) { test::run(argc, argv); } From a2eb0051414ca31b0a0178529330125537c5ec42 Mon Sep 17 00:00:00 2001 From: richagadgil Date: Fri, 18 Oct 2024 00:03:05 -0500 Subject: [PATCH 12/58] update tests --- src/include/migraphx/generic_float.hpp | 14 ++++++++++++-- test/float16.cpp | 3 +-- test/float32.cpp | 1 + 3 files changed, 14 insertions(+), 4 deletions(-) diff --git a/src/include/migraphx/generic_float.hpp b/src/include/migraphx/generic_float.hpp index 03f18de37af..6352e8fc2d4 100644 --- a/src/include/migraphx/generic_float.hpp +++ b/src/include/migraphx/generic_float.hpp @@ -104,7 +104,12 @@ struct generic_float float32_parts f{}; f.sign = sign; f.mantissa = mantissa << (float32_parts::mantissa_width() - MantissaSize); - if(exponent == all_ones()) + + if(exponent == 1 and mantissa == 0) + { + f.exponent = 1; + } + else if(exponent == all_ones()) { f.exponent = float32_parts::max_exponent(); } @@ -125,6 +130,10 @@ struct generic_float { exponent = 0; } + else if (f.exponent == 1 and f.mantissa == 0) + { + exponent = 1; + } else if(f.exponent == float32_parts::max_exponent()) { exponent = all_ones(); @@ -133,7 +142,8 @@ struct generic_float { constexpr const int diff = float32_parts::exponent_bias() - exponent_bias(); auto e = int(f.exponent) - diff; - if(e >= all_ones()) + + if(e >= static_cast(all_ones())) { exponent = all_ones(); mantissa = 0; diff --git a/test/float16.cpp b/test/float16.cpp index 4843338b92e..9effc9a8f5a 100644 --- a/test/float16.cpp +++ b/test/float16.cpp @@ -25,7 +25,6 @@ #include #include #include "test.hpp" -#define HIP_ENABLE_PRINTF #include @@ -43,7 +42,7 @@ TEST_CASE(fp16_values) CHECK_FLOAT(1.0f, fp16{1.0f}); CHECK_FLOAT(-1.0f, fp16{-1.0f}); - // CHECK_FLOAT(std::numeric_limits::min(), fp16::min()); + CHECK_FLOAT(std::numeric_limits::min(), fp16::min()); // CHECK_FLOAT(std::numeric_limits::lowest(), fp16::lowest()); // CHECK_FLOAT(std::numeric_limits::max(), fp16::max()); // CHECK_FLOAT(std::numeric_limits::epsilon(), fp16::epsilon()); diff --git a/test/float32.cpp b/test/float32.cpp index 63b9db489d8..a6025cd8b21 100644 --- a/test/float32.cpp +++ b/test/float32.cpp @@ -47,6 +47,7 @@ TEST_CASE(fp32_values_working) CHECK_FLOAT(std::numeric_limits::max(), fp32::max()); CHECK_FLOAT(std::numeric_limits::epsilon(), fp32::epsilon()); CHECK_FLOAT(std::numeric_limits::denorm_min(), fp32::denorm_min()); + // CHECK_FLOAT(std::numeric_limits::infinity(), fp32::infinity()); // CHECK_FLOAT(std::numeric_limits::quiet_NaN(), fp32::qnan()); // CHECK_FLOAT(std::numeric_limits::signaling_NaN(), fp32::snan()); From ff8ffc7f3af9570eb7892cfa3b69cd5b2cc24373 Mon Sep 17 00:00:00 2001 From: richagadgil Date: Fri, 18 Oct 2024 18:06:28 -0500 Subject: [PATCH 13/58] updated fp16 and fp32 tests --- src/include/migraphx/generic_float.hpp | 20 +- test/float16.cpp | 1115 +++++++++++++++++++++++- test/float32.cpp | 52 +- 3 files changed, 1097 insertions(+), 90 deletions(-) diff --git a/src/include/migraphx/generic_float.hpp b/src/include/migraphx/generic_float.hpp index 6352e8fc2d4..75ed3d21c1f 100644 --- a/src/include/migraphx/generic_float.hpp +++ b/src/include/migraphx/generic_float.hpp @@ -26,6 +26,7 @@ #include #include #include +#include namespace migraphx { inline namespace MIGRAPHX_INLINE_NS { @@ -33,7 +34,7 @@ inline namespace MIGRAPHX_INLINE_NS { template constexpr unsigned int all_ones() noexcept { - return (1 << N) - 1; + return (1u << N) - 1u; } struct float32_parts @@ -68,8 +69,9 @@ constexpr float32_parts get_parts(float f) return migraphx::bit_cast(f); } + template -struct generic_float +struct __attribute__((packed)) generic_float { unsigned int mantissa : MantissaSize; unsigned int exponent : ExponentSize; @@ -105,11 +107,7 @@ struct generic_float f.sign = sign; f.mantissa = mantissa << (float32_parts::mantissa_width() - MantissaSize); - if(exponent == 1 and mantissa == 0) - { - f.exponent = 1; - } - else if(exponent == all_ones()) + if(exponent == all_ones()) { f.exponent = float32_parts::max_exponent(); } @@ -130,10 +128,6 @@ struct generic_float { exponent = 0; } - else if (f.exponent == 1 and f.mantissa == 0) - { - exponent = 1; - } else if(f.exponent == float32_parts::max_exponent()) { exponent = all_ones(); @@ -308,6 +302,8 @@ class numeric_limits> static constexpr migraphx::generic_float quiet_NaN() { return migraphx::generic_float::qnan(); } + static constexpr migraphx::generic_float signaling_NaN() { return migraphx::generic_float::snan(); } + static constexpr migraphx::generic_float max() { return migraphx::generic_float::max(); } static constexpr migraphx::generic_float min() { return migraphx::generic_float::min(); } @@ -316,6 +312,8 @@ class numeric_limits> static constexpr migraphx::generic_float infinity() { return migraphx::generic_float::infinity(); } + static constexpr migraphx::generic_float denorm_min() { return migraphx::generic_float::denorm_min(); } + }; template diff --git a/test/float16.cpp b/test/float16.cpp index 9effc9a8f5a..e2153e095c0 100644 --- a/test/float16.cpp +++ b/test/float16.cpp @@ -24,61 +24,1090 @@ #include #include #include +#include #include "test.hpp" #include +#include -using fp16 = migraphx::half; - -#define CHECK_FLOAT(x, y) \ - CHECK(migraphx::float_equal(x, y)); \ - CHECK(migraphx::float_equal(x, y.to_float())); \ - CHECK(migraphx::float_equal(fp16{x}, y)); \ - CHECK(migraphx::float_equal(fp16{x}.to_float(), y.to_float())) - - -TEST_CASE(fp16_values) -{ - - CHECK_FLOAT(1.0f, fp16{1.0f}); - CHECK_FLOAT(-1.0f, fp16{-1.0f}); - CHECK_FLOAT(std::numeric_limits::min(), fp16::min()); - // CHECK_FLOAT(std::numeric_limits::lowest(), fp16::lowest()); - // CHECK_FLOAT(std::numeric_limits::max(), fp16::max()); - // CHECK_FLOAT(std::numeric_limits::epsilon(), fp16::epsilon()); - // CHECK_FLOAT(std::numeric_limits::infinity(), fp16::infinity()); - // CHECK_FLOAT(std::numeric_limits::quiet_NaN(), fp16::qnan()); - // CHECK_FLOAT(std::numeric_limits::signaling_NaN(), fp16::snan()); - // CHECK_FLOAT(std::numeric_limits::denorm_min(), fp16::denorm_min()); -} - -TEST_CASE(test_infinity_1) +template +bool bit_equal(const T& x, const U& y) { - float f_inf = std::numeric_limits::infinity(); - float f16_inf = fp16::infinity().to_float(); - EXPECT(f16_inf == f_inf); + static_assert(sizeof(T) == sizeof(U)); + using type = std::array; + return migraphx::bit_cast(x) == migraphx::bit_cast(y); } -TEST_CASE(test_infinity_2) +TEST_CASE(check_numeric_limits) { - float f_inf = -1.0 * std::numeric_limits::infinity(); - float f16_inf = -1.0 * fp16::infinity().to_float(); - EXPECT(f16_inf == f_inf); + CHECK(bit_equal(std::numeric_limits::min(), uint16_t{0x0400})); + CHECK(bit_equal(std::numeric_limits::lowest(), uint16_t{0xfbff})); + CHECK(bit_equal(std::numeric_limits::max(), uint16_t{0x7bff})); + CHECK(bit_equal(std::numeric_limits::epsilon(), uint16_t{0x1400})); + CHECK(bit_equal(std::numeric_limits::denorm_min(), uint16_t{0x0001})); + CHECK(bit_equal(std::numeric_limits::infinity(), uint16_t{0x7c00})); + CHECK(bit_equal(std::numeric_limits::quiet_NaN(), uint16_t{0x7e00})); + CHECK(bit_equal(std::numeric_limits::signaling_NaN(), uint16_t{0x7d00})); } -TEST_CASE(test_snan) -{ - fp16 fp16_snan = fp16::snan(); - EXPECT(fp16_snan.is_nan()); - EXPECT(std::isnan(fp16_snan)); -} +static const std::map half_lut = { + {0x0000, 0}, + {0x0058, 0.0000052452087402}, + {0x0079, 0.0000072121620178}, + {0x0097, 0.0000090003013611}, + {0x009e, 0.0000094175338745}, + {0x0125, 0.0000174641609192}, + {0x0167, 0.0000213980674744}, + {0x0196, 0.0000241994857788}, + {0x01c4, 0.0000269412994385}, + {0x01c8, 0.0000271797180176}, + {0x0236, 0.0000337362289429}, + {0x029f, 0.0000399947166443}, + {0x02bf, 0.0000419020652771}, + {0x02d6, 0.0000432729721069}, + {0x03a6, 0.0000556707382202}, + {0x03b7, 0.0000566840171814}, + {0x03d4, 0.0000584125518799}, + {0x03d8, 0.000058650970459}, + {0x03ed, 0.0000599026679993}, + {0x0427, 0.0000633597373962}, + {0x0430, 0.0000638961791992}, + {0x0435, 0.0000641942024231}, + {0x0454, 0.0000660419464111}, + {0x047a, 0.0000683069229126}, + {0x04b6, 0.0000718832015991}, + {0x056a, 0.0000826120376587}, + {0x056f, 0.0000829100608826}, + {0x0584, 0.0000841617584229}, + {0x05a1, 0.0000858902931213}, + {0x05a4, 0.0000860691070557}, + {0x05b8, 0.0000872611999512}, + {0x05bc, 0.0000874996185303}, + {0x0635, 0.0000947117805481}, + {0x0641, 0.0000954270362854}, + {0x0686, 0.0000995397567749}, + {0x0694, 0.0001003742218018}, + {0x06db, 0.0001046061515808}, + {0x0725, 0.0001090168952942}, + {0x0777, 0.0001139044761658}, + {0x07b2, 0.0001174211502075}, + {0x0812, 0.0001242160797119}, + {0x082e, 0.0001275539398193}, + {0x0859, 0.00013267993927}, + {0x0895, 0.0001398324966431}, + {0x08af, 0.0001429319381714}, + {0x08fc, 0.0001521110534668}, + {0x092e, 0.0001580715179443}, + {0x0971, 0.0001660585403442}, + {0x0991, 0.0001698732376099}, + {0x09ca, 0.0001766681671143}, + {0x0a63, 0.0001949071884155}, + {0x0a8e, 0.0002000331878662}, + {0x0a93, 0.000200629234314}, + {0x0b2a, 0.0002186298370361}, + {0x0b3a, 0.0002205371856689}, + {0x0b3c, 0.000220775604248}, + {0x0b4e, 0.00022292137146}, + {0x0bae, 0.0002343654632568}, + {0x0bff, 0.0002440214157104}, + {0x0c08, 0.0002460479736328}, + {0x0c56, 0.0002646446228027}, + {0x0c61, 0.0002672672271729}, + {0x0c70, 0.0002708435058594}, + {0x0c7c, 0.0002737045288086}, + {0x0cd8, 0.0002956390380859}, + {0x0cdd, 0.0002968311309814}, + {0x0d05, 0.0003063678741455}, + {0x0d61, 0.0003283023834229}, + {0x0d85, 0.0003368854522705}, + {0x0d8c, 0.0003385543823242}, + {0x0d90, 0.0003395080566406}, + {0x0d9e, 0.000342845916748}, + {0x0da5, 0.0003445148468018}, + {0x0dda, 0.0003571510314941}, + {0x0dde, 0.0003581047058105}, + {0x0df6, 0.000363826751709}, + {0x0eec, 0.000422477722168}, + {0x0f1c, 0.0004339218139648}, + {0x0f99, 0.0004637241363525}, + {0x0fac, 0.0004682540893555}, + {0x0fb0, 0.0004692077636719}, + {0x0ff5, 0.0004856586456299}, + {0x107f, 0.0005488395690918}, + {0x1096, 0.0005598068237305}, + {0x10c8, 0.0005836486816406}, + {0x10e9, 0.0005993843078613}, + {0x110a, 0.000615119934082}, + {0x118a, 0.000676155090332}, + {0x11b5, 0.0006966590881348}, + {0x1293, 0.0008025169372559}, + {0x133f, 0.0008845329284668}, + {0x1342, 0.0008859634399414}, + {0x1372, 0.0009088516235352}, + {0x13cf, 0.000953197479248}, + {0x140c, 0.0009880065917969}, + {0x1437, 0.0010290145874023}, + {0x14a3, 0.0011320114135742}, + {0x14a6, 0.0011348724365234}, + {0x14b2, 0.0011463165283203}, + {0x14ba, 0.0011539459228516}, + {0x14d9, 0.0011835098266602}, + {0x14da, 0.0011844635009766}, + {0x14e7, 0.0011968612670898}, + {0x14fe, 0.0012187957763672}, + {0x1521, 0.0012521743774414}, + {0x153d, 0.0012788772583008}, + {0x15ad, 0.0013856887817383}, + {0x15fd, 0.0014619827270508}, + {0x1649, 0.0015344619750977}, + {0x1658, 0.0015487670898438}, + {0x168a, 0.0015964508056641}, + {0x169d, 0.0016145706176758}, + {0x16b3, 0.0016355514526367}, + {0x16c9, 0.0016565322875977}, + {0x16d1, 0.0016641616821289}, + {0x16e0, 0.001678466796875}, + {0x170a, 0.0017185211181641}, + {0x176d, 0.0018129348754883}, + {0x185b, 0.0021266937255859}, + {0x185e, 0.0021324157714844}, + {0x187e, 0.0021934509277344}, + {0x18ca, 0.0023384094238281}, + {0x18e9, 0.0023975372314453}, + {0x1901, 0.0024433135986328}, + {0x191e, 0.0024986267089844}, + {0x1963, 0.0026302337646484}, + {0x199f, 0.0027446746826172}, + {0x19b2, 0.0027809143066406}, + {0x19d4, 0.0028457641601562}, + {0x1a31, 0.0030231475830078}, + {0x1a4a, 0.0030708312988281}, + {0x1a7a, 0.0031623840332031}, + {0x1ace, 0.0033226013183594}, + {0x1b03, 0.0034236907958984}, + {0x1b22, 0.0034828186035156}, + {0x1d49, 0.0051612854003906}, + {0x1d5a, 0.0052261352539062}, + {0x1d6c, 0.0052947998046875}, + {0x1e02, 0.0058670043945312}, + {0x1e19, 0.0059547424316406}, + {0x1e4c, 0.0061492919921875}, + {0x1eb3, 0.0065422058105469}, + {0x1f32, 0.0070266723632812}, + {0x1f36, 0.0070419311523438}, + {0x1f41, 0.0070838928222656}, + {0x1f7a, 0.0073013305664062}, + {0x1f8d, 0.0073738098144531}, + {0x200b, 0.0078964233398438}, + {0x205f, 0.0085372924804688}, + {0x2060, 0.008544921875}, + {0x2067, 0.0085983276367188}, + {0x20e2, 0.0095367431640625}, + {0x2164, 0.010528564453125}, + {0x22a4, 0.012969970703125}, + {0x22b4, 0.013092041015625}, + {0x22f2, 0.0135650634765625}, + {0x230c, 0.013763427734375}, + {0x2314, 0.013824462890625}, + {0x2341, 0.0141677856445312}, + {0x2356, 0.0143280029296875}, + {0x236e, 0.0145111083984375}, + {0x2371, 0.0145339965820312}, + {0x23cd, 0.0152359008789062}, + {0x2405, 0.0157012939453125}, + {0x24a2, 0.018096923828125}, + {0x24ba, 0.018463134765625}, + {0x24e7, 0.0191497802734375}, + {0x266c, 0.02508544921875}, + {0x26a2, 0.025909423828125}, + {0x26cc, 0.02655029296875}, + {0x26f0, 0.027099609375}, + {0x271e, 0.027801513671875}, + {0x2798, 0.0296630859375}, + {0x287d, 0.035064697265625}, + {0x28a2, 0.03619384765625}, + {0x28ca, 0.03741455078125}, + {0x2933, 0.040618896484375}, + {0x298d, 0.043365478515625}, + {0x299e, 0.04388427734375}, + {0x29c0, 0.044921875}, + {0x29c2, 0.04498291015625}, + {0x29cf, 0.045379638671875}, + {0x29fa, 0.04669189453125}, + {0x2a06, 0.04705810546875}, + {0x2aa5, 0.051910400390625}, + {0x2bcb, 0.060882568359375}, + {0x2c18, 0.06396484375}, + {0x2c65, 0.06866455078125}, + {0x2c66, 0.0687255859375}, + {0x2c93, 0.07147216796875}, + {0x2d24, 0.080322265625}, + {0x2d35, 0.08135986328125}, + {0x2d4c, 0.082763671875}, + {0x2db7, 0.08929443359375}, + {0x2dec, 0.092529296875}, + {0x2e31, 0.09674072265625}, + {0x2ec9, 0.10601806640625}, + {0x2f85, 0.11749267578125}, + {0x2f94, 0.118408203125}, + {0x302b, 0.1302490234375}, + {0x3094, 0.14306640625}, + {0x3096, 0.143310546875}, + {0x30ae, 0.146240234375}, + {0x30b9, 0.1475830078125}, + {0x310c, 0.15771484375}, + {0x31bd, 0.1793212890625}, + {0x3213, 0.1898193359375}, + {0x325b, 0.1986083984375}, + {0x32aa, 0.208251953125}, + {0x32c0, 0.2109375}, + {0x32d7, 0.2137451171875}, + {0x3391, 0.2364501953125}, + {0x340d, 0.253173828125}, + {0x343d, 0.264892578125}, + {0x3566, 0.33740234375}, + {0x35e6, 0.36865234375}, + {0x35f4, 0.3720703125}, + {0x363b, 0.389404296875}, + {0x363e, 0.39013671875}, + {0x3650, 0.39453125}, + {0x3698, 0.412109375}, + {0x36e7, 0.431396484375}, + {0x36fe, 0.43701171875}, + {0x374a, 0.45556640625}, + {0x3760, 0.4609375}, + {0x3761, 0.461181640625}, + {0x379e, 0.47607421875}, + {0x37cc, 0.4873046875}, + {0x37fd, 0.499267578125}, + {0x3828, 0.51953125}, + {0x3841, 0.53173828125}, + {0x3877, 0.55810546875}, + {0x38a4, 0.580078125}, + {0x38d3, 0.60302734375}, + {0x39b2, 0.7119140625}, + {0x3a60, 0.796875}, + {0x3aa3, 0.82958984375}, + {0x3aa6, 0.8310546875}, + {0x3ac9, 0.84814453125}, + {0x3acf, 0.85107421875}, + {0x3b14, 0.884765625}, + {0x3b42, 0.9072265625}, + {0x3b5c, 0.919921875}, + {0x3bde, 0.9833984375}, + {0x3c67, 1.1005859375}, + {0x3cb5, 1.1767578125}, + {0x3cca, 1.197265625}, + {0x3cdd, 1.2158203125}, + {0x3cfc, 1.24609375}, + {0x3d1f, 1.2802734375}, + {0x3e0c, 1.51171875}, + {0x3e1c, 1.52734375}, + {0x3e5b, 1.5888671875}, + {0x3e7f, 1.6240234375}, + {0x3eae, 1.669921875}, + {0x3efe, 1.748046875}, + {0x3f3e, 1.810546875}, + {0x3f9d, 1.9033203125}, + {0x400a, 2.01953125}, + {0x4070, 2.21875}, + {0x40a0, 2.3125}, + {0x40ce, 2.40234375}, + {0x40e6, 2.44921875}, + {0x410e, 2.52734375}, + {0x4129, 2.580078125}, + {0x4144, 2.6328125}, + {0x41a4, 2.8203125}, + {0x41f3, 2.974609375}, + {0x42f1, 3.470703125}, + {0x438f, 3.779296875}, + {0x43b0, 3.84375}, + {0x43c3, 3.880859375}, + {0x43de, 3.93359375}, + {0x4483, 4.51171875}, + {0x44f8, 4.96875}, + {0x4505, 5.01953125}, + {0x45dd, 5.86328125}, + {0x45f3, 5.94921875}, + {0x460e, 6.0546875}, + {0x46ce, 6.8046875}, + {0x4704, 7.015625}, + {0x471a, 7.1015625}, + {0x475e, 7.3671875}, + {0x4761, 7.37890625}, + {0x479f, 7.62109375}, + {0x47ca, 7.7890625}, + {0x47db, 7.85546875}, + {0x47fc, 7.984375}, + {0x481e, 8.234375}, + {0x4839, 8.4453125}, + {0x483d, 8.4765625}, + {0x48ac, 9.34375}, + {0x48da, 9.703125}, + {0x4919, 10.1953125}, + {0x4950, 10.625}, + {0x4987, 11.0546875}, + {0x49bb, 11.4609375}, + {0x4a14, 12.15625}, + {0x4a92, 13.140625}, + {0x4b25, 14.2890625}, + {0x4b81, 15.0078125}, + {0x4b99, 15.1953125}, + {0x4bbe, 15.484375}, + {0x4bf8, 15.9375}, + {0x4c1f, 16.484375}, + {0x4c49, 17.140625}, + {0x4d21, 20.515625}, + {0x4d4a, 21.15625}, + {0x4d51, 21.265625}, + {0x4de2, 23.53125}, + {0x4e05, 24.078125}, + {0x4ea3, 26.546875}, + {0x4eb0, 26.75}, + {0x4f0e, 28.21875}, + {0x4f4a, 29.15625}, + {0x4f6b, 29.671875}, + {0x4fa6, 30.59375}, + {0x4fae, 30.71875}, + {0x4ff6, 31.84375}, + {0x503c, 33.875}, + {0x50e4, 39.125}, + {0x514e, 42.4375}, + {0x516b, 43.34375}, + {0x51d3, 46.59375}, + {0x5213, 48.59375}, + {0x526e, 51.4375}, + {0x52a6, 53.1875}, + {0x52b4, 53.625}, + {0x52b6, 53.6875}, + {0x52bc, 53.875}, + {0x5300, 56}, + {0x5389, 60.28125}, + {0x5406, 64.375}, + {0x5498, 73.5}, + {0x54bd, 75.8125}, + {0x54cf, 76.9375}, + {0x5502, 80.125}, + {0x558e, 88.875}, + {0x5597, 89.4375}, + {0x55eb, 94.6875}, + {0x55f6, 95.375}, + {0x5629, 98.5625}, + {0x562b, 98.6875}, + {0x5635, 99.3125}, + {0x564e, 100.875}, + {0x5671, 103.0625}, + {0x5681, 104.0625}, + {0x56d1, 109.0625}, + {0x571c, 113.75}, + {0x5756, 117.375}, + {0x5790, 121}, + {0x57fd, 127.8125}, + {0x582d, 133.625}, + {0x5869, 141.125}, + {0x58ab, 149.375}, + {0x58ad, 149.625}, + {0x58c9, 153.125}, + {0x58f7, 158.875}, + {0x5904, 160.5}, + {0x59c2, 184.25}, + {0x59e6, 188.75}, + {0x5a88, 209}, + {0x5ada, 219.25}, + {0x5aef, 221.875}, + {0x5af5, 222.625}, + {0x5b7f, 239.875}, + {0x5ba4, 244.5}, + {0x5c08, 258}, + {0x5cbf, 303.75}, + {0x5d4d, 339.25}, + {0x5dc2, 368.5}, + {0x5dc4, 369}, + {0x5e31, 396.25}, + {0x5e38, 398}, + {0x5e7c, 415}, + {0x5e8d, 419.25}, + {0x5ead, 427.25}, + {0x5eb4, 429}, + {0x5ec0, 432}, + {0x5eef, 443.75}, + {0x5f04, 449}, + {0x5f41, 464.25}, + {0x5f58, 470}, + {0x5f61, 472.25}, + {0x5f77, 477.75}, + {0x5f7b, 478.75}, + {0x6029, 532.5}, + {0x6046, 547}, + {0x6055, 554.5}, + {0x60a8, 596}, + {0x60d7, 619.5}, + {0x6139, 668.5}, + {0x6167, 691.5}, + {0x61b5, 730.5}, + {0x61c0, 736}, + {0x61e6, 755}, + {0x625b, 813.5}, + {0x62c4, 866}, + {0x62fd, 894.5}, + {0x62fe, 895}, + {0x6332, 921}, + {0x636a, 949}, + {0x6374, 954}, + {0x6376, 955}, + {0x639f, 975.5}, + {0x63d6, 1003}, + {0x6417, 1047}, + {0x642e, 1070}, + {0x6431, 1073}, + {0x644f, 1103}, + {0x6459, 1113}, + {0x645b, 1115}, + {0x6480, 1152}, + {0x648d, 1165}, + {0x649f, 1183}, + {0x64bb, 1211}, + {0x6516, 1302}, + {0x6571, 1393}, + {0x6585, 1413}, + {0x65aa, 1450}, + {0x660c, 1548}, + {0x6694, 1684}, + {0x66d0, 1744}, + {0x6721, 1825}, + {0x672d, 1837}, + {0x6734, 1844}, + {0x6766, 1894}, + {0x6773, 1907}, + {0x677d, 1917}, + {0x679a, 1946}, + {0x690f, 2590}, + {0x6934, 2664}, + {0x6955, 2730}, + {0x697d, 2810}, + {0x698e, 2844}, + {0x6a3a, 3188}, + {0x6a63, 3270}, + {0x6a67, 3278}, + {0x6a7c, 3320}, + {0x6a87, 3342}, + {0x6b07, 3598}, + {0x6b11, 3618}, + {0x6b36, 3692}, + {0x6b3c, 3704}, + {0x6b75, 3818}, + {0x6b88, 3856}, + {0x6be6, 4044}, + {0x6bee, 4060}, + {0x6c62, 4488}, + {0x6c8b, 4652}, + {0x6d30, 5312}, + {0x6d48, 5408}, + {0x6ddd, 6004}, + {0x6de9, 6052}, + {0x6e39, 6372}, + {0x6e7e, 6648}, + {0x6ea5, 6804}, + {0x6ec5, 6932}, + {0x6ee1, 7044}, + {0x6ef1, 7108}, + {0x6fa2, 7816}, + {0x6fbc, 7920}, + {0x704c, 8800}, + {0x7083, 9240}, + {0x7108, 10304}, + {0x7115, 10408}, + {0x7128, 10560}, + {0x71af, 11640}, + {0x7222, 12560}, + {0x7228, 12608}, + {0x72a5, 13608}, + {0x72e0, 14080}, + {0x72e6, 14128}, + {0x731e, 14576}, + {0x7377, 15288}, + {0x741d, 16848}, + {0x7423, 16944}, + {0x7424, 16960}, + {0x7466, 18016}, + {0x74b0, 19200}, + {0x74ce, 19680}, + {0x74f0, 20224}, + {0x754b, 21680}, + {0x7575, 22352}, + {0x7594, 22848}, + {0x75b1, 23312}, + {0x7614, 24896}, + {0x7618, 24960}, + {0x7631, 25360}, + {0x7660, 26112}, + {0x76c8, 27776}, + {0x7773, 30512}, + {0x77af, 31472}, + {0x77b9, 31632}, + {0x77de, 32224}, + {0x7844, 34944}, + {0x78d2, 39488}, + {0x7924, 42112}, + {0x793b, 42848}, + {0x79db, 47968}, + {0x7a0f, 49632}, + {0x7a1a, 49984}, + {0x7a6c, 52608}, + {0x7a99, 54048}, + {0x7ada, 56128}, + {0x7b0f, 57824}, + {0x7b15, 58016}, + {0x7b41, 59424}, + {0x7b51, 59936}, + {0x7b9c, 62336}, + {0x7ba3, 62560}, + {0x7c00, std::numeric_limits::infinity()}, + {0x7c05, std::numeric_limits::quiet_NaN()}, + {0x7c0e, std::numeric_limits::quiet_NaN()}, + {0x7c3e, std::numeric_limits::quiet_NaN()}, + {0x7c4e, std::numeric_limits::quiet_NaN()}, + {0x7c55, std::numeric_limits::quiet_NaN()}, + {0x7c58, std::numeric_limits::quiet_NaN()}, + {0x7c66, std::numeric_limits::quiet_NaN()}, + {0x7cc9, std::numeric_limits::quiet_NaN()}, + {0x7cd8, std::numeric_limits::quiet_NaN()}, + {0x7d2d, std::numeric_limits::quiet_NaN()}, + {0x7d60, std::numeric_limits::quiet_NaN()}, + {0x7d79, std::numeric_limits::quiet_NaN()}, + {0x7dc7, std::numeric_limits::quiet_NaN()}, + {0x7dcf, std::numeric_limits::quiet_NaN()}, + {0x7dd8, std::numeric_limits::quiet_NaN()}, + {0x7dfb, std::numeric_limits::quiet_NaN()}, + {0x7e0f, std::numeric_limits::quiet_NaN()}, + {0x7e56, std::numeric_limits::quiet_NaN()}, + {0x7e89, std::numeric_limits::quiet_NaN()}, + {0x7e9c, std::numeric_limits::quiet_NaN()}, + {0x7eb2, std::numeric_limits::quiet_NaN()}, + {0x7ec3, std::numeric_limits::quiet_NaN()}, + {0x7ef9, std::numeric_limits::quiet_NaN()}, + {0x7f36, std::numeric_limits::quiet_NaN()}, + {0x8040, -0.0000038146972656}, + {0x8101, -0.0000153183937073}, + {0x813d, -0.0000188946723938}, + {0x81a8, -0.0000252723693848}, + {0x81bc, -0.0000264644622803}, + {0x81c2, -0.0000268220901489}, + {0x8259, -0.00003582239151}, + {0x8330, -0.0000486373901367}, + {0x8366, -0.0000518560409546}, + {0x8392, -0.0000544786453247}, + {0x83e4, -0.0000593662261963}, + {0x83ee, -0.000059962272644}, + {0x8402, -0.0000611543655396}, + {0x845e, -0.0000666379928589}, + {0x84ac, -0.0000712871551514}, + {0x84b1, -0.0000715851783752}, + {0x84fb, -0.0000759959220886}, + {0x8546, -0.0000804662704468}, + {0x856f, -0.0000829100608826}, + {0x85b5, -0.0000870823860168}, + {0x8638, -0.0000948905944824}, + {0x8656, -0.0000966787338257}, + {0x86b9, -0.0001025795936584}, + {0x86ba, -0.0001026391983032}, + {0x86fe, -0.0001066923141479}, + {0x8731, -0.0001097321510315}, + {0x8740, -0.0001106262207031}, + {0x8793, -0.0001155734062195}, + {0x87bd, -0.0001180768013}, + {0x87f1, -0.0001211762428284}, + {0x87f4, -0.0001213550567627}, + {0x8809, -0.000123143196106}, + {0x882a, -0.0001270771026611}, + {0x8848, -0.0001306533813477}, + {0x8852, -0.0001318454742432}, + {0x8874, -0.0001358985900879}, + {0x8892, -0.0001394748687744}, + {0x88a7, -0.000141978263855}, + {0x88c8, -0.0001459121704102}, + {0x8927, -0.0001572370529175}, + {0x892a, -0.0001575946807861}, + {0x8989, -0.0001689195632935}, + {0x89b9, -0.0001746416091919}, + {0x8b18, -0.0002164840698242}, + {0x8b4b, -0.0002225637435913}, + {0x8b62, -0.000225305557251}, + {0x8b7f, -0.0002287626266479}, + {0x8bca, -0.0002377033233643}, + {0x8bcf, -0.000238299369812}, + {0x8bff, -0.0002440214157104}, + {0x8c0b, -0.0002467632293701}, + {0x8c55, -0.0002644062042236}, + {0x8c63, -0.0002677440643311}, + {0x8d53, -0.0003249645233154}, + {0x8dba, -0.0003495216369629}, + {0x8e03, -0.0003669261932373}, + {0x8e82, -0.0003972053527832}, + {0x8e9c, -0.0004034042358398}, + {0x8faa, -0.0004677772521973}, + {0x902f, -0.0005106925964355}, + {0x9051, -0.0005269050598145}, + {0x9066, -0.0005369186401367}, + {0x907e, -0.0005483627319336}, + {0x9080, -0.00054931640625}, + {0x908e, -0.0005559921264648}, + {0x9102, -0.0006113052368164}, + {0x91eb, -0.0007224082946777}, + {0x9215, -0.0007424354553223}, + {0x9252, -0.0007715225219727}, + {0x9294, -0.0008029937744141}, + {0x9297, -0.0008044242858887}, + {0x933d, -0.0008835792541504}, + {0x936f, -0.0009074211120605}, + {0x93aa, -0.0009355545043945}, + {0x93f2, -0.0009698867797852}, + {0x941d, -0.0010042190551758}, + {0x945a, -0.0010623931884766}, + {0x94ad, -0.0011415481567383}, + {0x94d2, -0.0011768341064453}, + {0x951c, -0.0012474060058594}, + {0x9520, -0.001251220703125}, + {0x952f, -0.0012655258178711}, + {0x953f, -0.0012807846069336}, + {0x9549, -0.0012903213500977}, + {0x95c6, -0.0014095306396484}, + {0x9602, -0.0014667510986328}, + {0x969b, -0.001612663269043}, + {0x96fa, -0.0017032623291016}, + {0x977d, -0.0018281936645508}, + {0x97c3, -0.0018949508666992}, + {0x97c6, -0.0018978118896484}, + {0x97db, -0.001917839050293}, + {0x97f9, -0.0019464492797852}, + {0x983f, -0.0020732879638672}, + {0x984e, -0.0021018981933594}, + {0x985a, -0.0021247863769531}, + {0x988c, -0.0022201538085938}, + {0x990d, -0.0024662017822266}, + {0x9958, -0.0026092529296875}, + {0x9971, -0.0026569366455078}, + {0x9a4e, -0.0030784606933594}, + {0x9a8f, -0.0032024383544922}, + {0x9abe, -0.0032920837402344}, + {0x9ace, -0.0033226013183594}, + {0x9b1e, -0.0034751892089844}, + {0x9b3e, -0.0035362243652344}, + {0x9b77, -0.0036449432373047}, + {0x9b89, -0.0036792755126953}, + {0x9b90, -0.003692626953125}, + {0x9bec, -0.0038681030273438}, + {0x9c03, -0.0039176940917969}, + {0x9c75, -0.0043525695800781}, + {0x9d6c, -0.0052947998046875}, + {0x9d74, -0.0053253173828125}, + {0x9da7, -0.0055198669433594}, + {0x9e73, -0.0062980651855469}, + {0x9e94, -0.0064239501953125}, + {0x9f17, -0.0069236755371094}, + {0x9f3a, -0.0070571899414062}, + {0x9f6c, -0.0072479248046875}, + {0x9f89, -0.0073585510253906}, + {0x9fbd, -0.0075569152832031}, + {0xa003, -0.0078353881835938}, + {0xa014, -0.007965087890625}, + {0xa019, -0.0080032348632812}, + {0xa01d, -0.0080337524414062}, + {0xa090, -0.0089111328125}, + {0xa1cf, -0.0113449096679688}, + {0xa1dd, -0.0114517211914062}, + {0xa249, -0.0122756958007812}, + {0xa26d, -0.0125503540039062}, + {0xa288, -0.01275634765625}, + {0xa2fb, -0.0136337280273438}, + {0xa390, -0.0147705078125}, + {0xa3b3, -0.0150375366210938}, + {0xa3ed, -0.0154800415039062}, + {0xa434, -0.01641845703125}, + {0xa476, -0.017425537109375}, + {0xa571, -0.0212554931640625}, + {0xa57d, -0.0214385986328125}, + {0xa597, -0.0218353271484375}, + {0xa5d1, -0.0227203369140625}, + {0xa5f9, -0.0233306884765625}, + {0xa680, -0.025390625}, + {0xa6e3, -0.0269012451171875}, + {0xa6f0, -0.027099609375}, + {0xa72d, -0.0280303955078125}, + {0xa77e, -0.029266357421875}, + {0xa7d0, -0.030517578125}, + {0xa7ee, -0.030975341796875}, + {0xa7f3, -0.0310516357421875}, + {0xa80c, -0.0316162109375}, + {0xa827, -0.032440185546875}, + {0xa89f, -0.036102294921875}, + {0xa8a0, -0.0361328125}, + {0xa8a5, -0.036285400390625}, + {0xa948, -0.041259765625}, + {0xaa0c, -0.0472412109375}, + {0xaa16, -0.04754638671875}, + {0xaa9a, -0.05157470703125}, + {0xaaeb, -0.054046630859375}, + {0xab5c, -0.0574951171875}, + {0xac7e, -0.0701904296875}, + {0xad33, -0.08123779296875}, + {0xad37, -0.08148193359375}, + {0xad90, -0.0869140625}, + {0xada0, -0.087890625}, + {0xade5, -0.09210205078125}, + {0xadf8, -0.09326171875}, + {0xae02, -0.0938720703125}, + {0xae04, -0.093994140625}, + {0xae4f, -0.09857177734375}, + {0xae63, -0.09979248046875}, + {0xaebe, -0.1053466796875}, + {0xaee1, -0.10748291015625}, + {0xaef9, -0.10894775390625}, + {0xaf0b, -0.11004638671875}, + {0xaf78, -0.11669921875}, + {0xaf7d, -0.11700439453125}, + {0xaf7f, -0.11712646484375}, + {0xaf8c, -0.117919921875}, + {0xafcb, -0.12176513671875}, + {0xb06b, -0.1380615234375}, + {0xb07b, -0.1400146484375}, + {0xb088, -0.1416015625}, + {0xb0b2, -0.146728515625}, + {0xb0ed, -0.1539306640625}, + {0xb0f9, -0.1553955078125}, + {0xb16c, -0.16943359375}, + {0xb189, -0.1729736328125}, + {0xb1c5, -0.1802978515625}, + {0xb1f7, -0.1864013671875}, + {0xb22d, -0.1929931640625}, + {0xb23c, -0.19482421875}, + {0xb258, -0.1982421875}, + {0xb2c7, -0.2117919921875}, + {0xb2de, -0.214599609375}, + {0xb2e1, -0.2149658203125}, + {0xb317, -0.2215576171875}, + {0xb31d, -0.2222900390625}, + {0xb3ef, -0.2479248046875}, + {0xb3f8, -0.2490234375}, + {0xb45a, -0.27197265625}, + {0xb548, -0.330078125}, + {0xb5d8, -0.365234375}, + {0xb64e, -0.39404296875}, + {0xb69f, -0.413818359375}, + {0xb6e6, -0.43115234375}, + {0xb6ed, -0.432861328125}, + {0xb6f7, -0.435302734375}, + {0xb79a, -0.47509765625}, + {0xb7b6, -0.48193359375}, + {0xb7ee, -0.49560546875}, + {0xb856, -0.5419921875}, + {0xb8c0, -0.59375}, + {0xb96f, -0.67919921875}, + {0xb9a5, -0.70556640625}, + {0xba1e, -0.7646484375}, + {0xba2d, -0.77197265625}, + {0xba48, -0.78515625}, + {0xba65, -0.79931640625}, + {0xbaaf, -0.83544921875}, + {0xbab0, -0.8359375}, + {0xbb12, -0.8837890625}, + {0xbb35, -0.90087890625}, + {0xbb47, -0.90966796875}, + {0xbb97, -0.94873046875}, + {0xbba3, -0.95458984375}, + {0xbbcb, -0.97412109375}, + {0xbbe8, -0.98828125}, + {0xbbee, -0.9912109375}, + {0xbd03, -1.2529296875}, + {0xbd4b, -1.3232421875}, + {0xbd4c, -1.32421875}, + {0xbd8a, -1.384765625}, + {0xbdb6, -1.427734375}, + {0xbde1, -1.4697265625}, + {0xbe04, -1.50390625}, + {0xbe50, -1.578125}, + {0xbe54, -1.58203125}, + {0xbe6a, -1.603515625}, + {0xbf31, -1.7978515625}, + {0xbf87, -1.8818359375}, + {0xbfa2, -1.908203125}, + {0xc016, -2.04296875}, + {0xc074, -2.2265625}, + {0xc0ca, -2.39453125}, + {0xc100, -2.5}, + {0xc1b7, -2.857421875}, + {0xc1b9, -2.861328125}, + {0xc1d3, -2.912109375}, + {0xc23f, -3.123046875}, + {0xc2d5, -3.416015625}, + {0xc32f, -3.591796875}, + {0xc3e3, -3.943359375}, + {0xc412, -4.0703125}, + {0xc49a, -4.6015625}, + {0xc4ca, -4.7890625}, + {0xc4cf, -4.80859375}, + {0xc523, -5.13671875}, + {0xc55d, -5.36328125}, + {0xc5aa, -5.6640625}, + {0xc604, -6.015625}, + {0xc61b, -6.10546875}, + {0xc642, -6.2578125}, + {0xc68b, -6.54296875}, + {0xc69e, -6.6171875}, + {0xc6b0, -6.6875}, + {0xc6ca, -6.7890625}, + {0xc71e, -7.1171875}, + {0xc721, -7.12890625}, + {0xc73b, -7.23046875}, + {0xc7d4, -7.828125}, + {0xc831, -8.3828125}, + {0xc89a, -9.203125}, + {0xc8be, -9.484375}, + {0xc8dc, -9.71875}, + {0xc8e4, -9.78125}, + {0xc8fa, -9.953125}, + {0xc8fe, -9.984375}, + {0xc969, -10.8203125}, + {0xca0f, -12.1171875}, + {0xca1a, -12.203125}, + {0xca6f, -12.8671875}, + {0xca7b, -12.9609375}, + {0xca8f, -13.1171875}, + {0xcaca, -13.578125}, + {0xcafd, -13.9765625}, + {0xcb05, -14.0390625}, + {0xcb6b, -14.8359375}, + {0xcbaf, -15.3671875}, + {0xcbb4, -15.40625}, + {0xcbdf, -15.7421875}, + {0xcc2d, -16.703125}, + {0xcc74, -17.8125}, + {0xccac, -18.6875}, + {0xcd11, -20.265625}, + {0xce04, -24.0625}, + {0xce0f, -24.234375}, + {0xceaf, -26.734375}, + {0xceb8, -26.875}, + {0xcf36, -28.84375}, + {0xcfad, -30.703125}, + {0xd019, -32.78125}, + {0xd08d, -36.40625}, + {0xd115, -40.65625}, + {0xd119, -40.78125}, + {0xd128, -41.25}, + {0xd1a4, -45.125}, + {0xd1b7, -45.71875}, + {0xd1b8, -45.75}, + {0xd203, -48.09375}, + {0xd20a, -48.3125}, + {0xd28b, -52.34375}, + {0xd2ac, -53.375}, + {0xd2ae, -53.4375}, + {0xd2c5, -54.15625}, + {0xd2f2, -55.5625}, + {0xd326, -57.1875}, + {0xd337, -57.71875}, + {0xd343, -58.09375}, + {0xd34e, -58.4375}, + {0xd40c, -64.75}, + {0xd43b, -67.6875}, + {0xd45a, -69.625}, + {0xd464, -70.25}, + {0xd4c3, -76.1875}, + {0xd505, -80.3125}, + {0xd52d, -82.8125}, + {0xd5cf, -92.9375}, + {0xd5f0, -95}, + {0xd607, -96.4375}, + {0xd635, -99.3125}, + {0xd63d, -99.8125}, + {0xd644, -100.25}, + {0xd658, -101.5}, + {0xd789, -120.5625}, + {0xd863, -140.375}, + {0xd866, -140.75}, + {0xd884, -144.5}, + {0xd88d, -145.625}, + {0xd89b, -147.375}, + {0xd8da, -155.25}, + {0xd93b, -167.375}, + {0xd982, -176.25}, + {0xd995, -178.625}, + {0xd99d, -179.625}, + {0xd9cf, -185.875}, + {0xdaaf, -213.875}, + {0xdabd, -215.625}, + {0xdb54, -234.5}, + {0xdc10, -260}, + {0xdca1, -296.25}, + {0xdd0a, -322.5}, + {0xdd56, -341.5}, + {0xddcf, -371.75}, + {0xde04, -385}, + {0xde0d, -387.25}, + {0xde3d, -399.25}, + {0xde4f, -403.75}, + {0xde66, -409.5}, + {0xdeae, -427.5}, + {0xdf52, -468.5}, + {0xdf63, -472.75}, + {0xdf6a, -474.5}, + {0xdf77, -477.75}, + {0xdf7b, -478.75}, + {0xdfc5, -497.25}, + {0xdfcf, -499.75}, + {0xdfd2, -500.5}, + {0xdfd8, -502}, + {0xdfe1, -504.25}, + {0xe022, -529}, + {0xe046, -547}, + {0xe092, -585}, + {0xe0b0, -600}, + {0xe0be, -607}, + {0xe0f4, -634}, + {0xe11b, -653.5}, + {0xe19c, -718}, + {0xe213, -777.5}, + {0xe232, -793}, + {0xe25b, -813.5}, + {0xe262, -817}, + {0xe279, -828.5}, + {0xe2cc, -870}, + {0xe2da, -877}, + {0xe326, -915}, + {0xe330, -920}, + {0xe3c3, -993.5}, + {0xe3cc, -998}, + {0xe566, -1382}, + {0xe57e, -1406}, + {0xe5c8, -1480}, + {0xe609, -1545}, + {0xe628, -1576}, + {0xe663, -1635}, + {0xe6ac, -1708}, + {0xe710, -1808}, + {0xe77f, -1919}, + {0xe7e7, -2023}, + {0xe868, -2256}, + {0xe885, -2314}, + {0xe8ea, -2516}, + {0xe919, -2610}, + {0xe92c, -2648}, + {0xea60, -3264}, + {0xeac1, -3458}, + {0xeacb, -3478}, + {0xeb22, -3652}, + {0xeb2c, -3672}, + {0xeb59, -3762}, + {0xeba5, -3914}, + {0xec53, -4428}, + {0xec97, -4700}, + {0xed16, -5208}, + {0xed4a, -5416}, + {0xed69, -5540}, + {0xee14, -6224}, + {0xee59, -6500}, + {0xee8a, -6696}, + {0xee93, -6732}, + {0xeed7, -7004}, + {0xef0b, -7212}, + {0xef59, -7524}, + {0xef61, -7556}, + {0xef67, -7580}, + {0xefb6, -7896}, + {0xf03a, -8656}, + {0xf04e, -8816}, + {0xf05f, -8952}, + {0xf09f, -9464}, + {0xf0c0, -9728}, + {0xf173, -11160}, + {0xf1d7, -11960}, + {0xf225, -12584}, + {0xf2ca, -13904}, + {0xf2d8, -14016}, + {0xf2e5, -14120}, + {0xf317, -14520}, + {0xf35d, -15080}, + {0xf3bd, -15848}, + {0xf3d3, -16024}, + {0xf3e6, -16176}, + {0xf3fb, -16344}, + {0xf477, -18288}, + {0xf4e0, -19968}, + {0xf4e5, -20048}, + {0xf50b, -20656}, + {0xf5a2, -23072}, + {0xf5c1, -23568}, + {0xf634, -25408}, + {0xf651, -25872}, + {0xf68a, -26784}, + {0xf69c, -27072}, + {0xf6ce, -27872}, + {0xf816, -33472}, + {0xf849, -35104}, + {0xf869, -36128}, + {0xf878, -36608}, + {0xf8cf, -39392}, + {0xf90a, -41280}, + {0xf916, -41664}, + {0xf91e, -41920}, + {0xf9c1, -47136}, + {0xfa0a, -49472}, + {0xfa11, -49696}, + {0xfa1d, -50080}, + {0xfa51, -51744}, + {0xfa86, -53440}, + {0xfaac, -54656}, + {0xfb95, -62112}, + {0xfbd1, -64032}, + {0xfbe0, -64512}, + {0xfbf5, -65184}, + {0xfc00, -std::numeric_limits::infinity()}, + {0xfca5, std::numeric_limits::quiet_NaN()}, + {0xfcb9, std::numeric_limits::quiet_NaN()}, + {0xfcc6, std::numeric_limits::quiet_NaN()}, + {0xfd72, std::numeric_limits::quiet_NaN()}, + {0xfd77, std::numeric_limits::quiet_NaN()}, + {0xfda3, std::numeric_limits::quiet_NaN()}, + {0xfe3e, std::numeric_limits::quiet_NaN()}, + {0xfe89, std::numeric_limits::quiet_NaN()}, + {0xfe91, std::numeric_limits::quiet_NaN()}, + {0xfe93, std::numeric_limits::quiet_NaN()}, + {0xfed1, std::numeric_limits::quiet_NaN()}, + {0xff7a, std::numeric_limits::quiet_NaN()}, + {0xffa3, std::numeric_limits::quiet_NaN()}, +}; + -TEST_CASE(test_qnan) +TEST_CASE(check_half_values) { - fp16 fp16_snan = fp16::qnan(); - EXPECT(fp16_snan.is_nan()); - EXPECT(std::isnan(fp16_snan)); -} + for(auto [x, f] : half_lut) + { + auto h = migraphx::bit_cast(x); + if(std::isnan(f)) + { + CHECK(std::isnan(h)); + } + else if(std::isinf(f)) + { + CHECK(std::isinf(h)); + CHECK((h < 0) == (f < 0)); + CHECK(bit_equal(x, migraphx::half(f))); + } + else + { + std::cout << h << " " << x << " " << f << std::endl; + std::cout << float(h) << " " << f << std::endl; + + std::cout << x << " " << migraphx::half(f) << std::endl; + + CHECK(migraphx::float_equal(float(h), f)); + CHECK(bit_equal(x, migraphx::half(f))); + std::cout << " " << std::endl; + std::cout << " " << std::endl; + } + } +} int main(int argc, const char* argv[]) { test::run(argc, argv); } diff --git a/test/float32.cpp b/test/float32.cpp index a6025cd8b21..d18b924bad1 100644 --- a/test/float32.cpp +++ b/test/float32.cpp @@ -23,6 +23,7 @@ */ #include #include +#include #include #include "test.hpp" #include @@ -31,11 +32,19 @@ using fp32 = migraphx::generic_float<23, 8>; +template +bool bit_equal(const T& x, const U& y) +{ + static_assert(sizeof(T) == sizeof(U)); + using type = std::array; + return migraphx::bit_cast(x) == migraphx::bit_cast(y); +} + #define CHECK_FLOAT(x, y) \ - CHECK(migraphx::float_equal(x, y)); \ - CHECK(migraphx::float_equal(x, y.to_float())); \ - CHECK(migraphx::float_equal(fp32{x}, y)); \ - CHECK(migraphx::float_equal(fp32{x}.to_float(), y.to_float())) + CHECK(bit_equal(x, y)); \ + CHECK(bit_equal(x, y.to_float())); \ + CHECK(bit_equal(fp32{x}, y)); \ + CHECK(bit_equal(fp32{x}.to_float(), y.to_float())) TEST_CASE(fp32_values_working) @@ -47,38 +56,9 @@ TEST_CASE(fp32_values_working) CHECK_FLOAT(std::numeric_limits::max(), fp32::max()); CHECK_FLOAT(std::numeric_limits::epsilon(), fp32::epsilon()); CHECK_FLOAT(std::numeric_limits::denorm_min(), fp32::denorm_min()); - - // CHECK_FLOAT(std::numeric_limits::infinity(), fp32::infinity()); - // CHECK_FLOAT(std::numeric_limits::quiet_NaN(), fp32::qnan()); - // CHECK_FLOAT(std::numeric_limits::signaling_NaN(), fp32::snan()); -} - -TEST_CASE(test_infinity_1) -{ - float f_inf = std::numeric_limits::infinity(); - float f32_inf = fp32::infinity().to_float(); - EXPECT(f32_inf == f_inf); -} - -TEST_CASE(test_infinity_2) -{ - float f_inf = -1.0 * std::numeric_limits::infinity(); - float f32_inf = -1.0 * fp32::infinity().to_float(); - EXPECT(f32_inf == f_inf); -} - -TEST_CASE(test_snan) -{ - fp32 fp32_snan = fp32::snan(); - EXPECT(fp32_snan.is_nan()); - EXPECT(std::isnan(fp32_snan)); -} - -TEST_CASE(test_qnan) -{ - fp32 fp32_snan = fp32::qnan(); - EXPECT(fp32_snan.is_nan()); - EXPECT(std::isnan(fp32_snan)); + CHECK_FLOAT(std::numeric_limits::infinity(), fp32::infinity()); + CHECK_FLOAT(std::numeric_limits::quiet_NaN(), fp32::qnan()); + CHECK_FLOAT(std::numeric_limits::signaling_NaN(), fp32::snan()); } From e36fd65141d9b99caa6627964dd77dd8232e0441 Mon Sep 17 00:00:00 2001 From: richagadgil Date: Tue, 22 Oct 2024 06:49:23 +0000 Subject: [PATCH 14/58] half tests --- src/include/migraphx/generic_float.hpp | 56 +++++++++++++++++++++----- test/{float16.cpp => half.cpp} | 14 ++----- 2 files changed, 48 insertions(+), 22 deletions(-) rename test/{float16.cpp => half.cpp} (99%) diff --git a/src/include/migraphx/generic_float.hpp b/src/include/migraphx/generic_float.hpp index 75ed3d21c1f..e8fb2feb0bd 100644 --- a/src/include/migraphx/generic_float.hpp +++ b/src/include/migraphx/generic_float.hpp @@ -27,6 +27,7 @@ #include #include #include +#include namespace migraphx { inline namespace MIGRAPHX_INLINE_NS { @@ -34,7 +35,7 @@ inline namespace MIGRAPHX_INLINE_NS { template constexpr unsigned int all_ones() noexcept { - return (1u << N) - 1u; + return (1 << N) - 1; } struct float32_parts @@ -100,37 +101,65 @@ struct __attribute__((packed)) generic_float return result; } - constexpr float to_float() const noexcept { float32_parts f{}; + f.sign = sign; - f.mantissa = mantissa << (float32_parts::mantissa_width() - MantissaSize); - if(exponent == all_ones()) + if(exponent == 0) { + + if(mantissa == 0) + { + + f.exponent = 0; + f.mantissa = 0; + } + else + { + int shift = 0; + f.mantissa = mantissa; + + while((f.mantissa & (1 << MantissaSize)) == 0) + { + f.mantissa <<= 1; + shift++; + } + + f.mantissa &= all_ones(); + + f.exponent = float32_parts::exponent_bias() - exponent_bias() - shift + 1; + f.mantissa = f.mantissa << (float32_parts::mantissa_width() - MantissaSize); + } + } + else if(exponent == all_ones()) + { + f.mantissa = mantissa << (float32_parts::mantissa_width() - MantissaSize); f.exponent = float32_parts::max_exponent(); } else { - constexpr const auto diff = float32_parts::exponent_bias() - exponent_bias(); - f.exponent = exponent + diff; + f.mantissa = mantissa << (float32_parts::mantissa_width() - MantissaSize); + constexpr const int diff = float32_parts::exponent_bias() - exponent_bias(); + f.exponent = int(exponent) + diff; } return f.to_float(); } constexpr void from_float(float32_parts f) noexcept { - sign = f.sign; - mantissa = f.mantissa >> (float32_parts::mantissa_width() - MantissaSize); + sign = f.sign; if(f.exponent == 0) { exponent = 0; + mantissa = f.mantissa >> (float32_parts::mantissa_width() - MantissaSize); } else if(f.exponent == float32_parts::max_exponent()) { exponent = all_ones(); + mantissa = f.mantissa >> (float32_parts::mantissa_width() - MantissaSize); } else { @@ -142,14 +171,19 @@ struct __attribute__((packed)) generic_float exponent = all_ones(); mantissa = 0; } - else if(e < 0) + else if(e <= 0) { exponent = 0; - mantissa = 0; + + auto shift = diff - int(f.exponent); + mantissa = + (f.mantissa | (1 << static_cast(float32_parts::mantissa_width()))) >> + (shift + (float32_parts::mantissa_width() - MantissaSize) + 1); } else { - exponent = f.exponent - diff; + exponent = int(f.exponent) - diff; + mantissa = f.mantissa >> (float32_parts::mantissa_width() - MantissaSize); } } diff --git a/test/float16.cpp b/test/half.cpp similarity index 99% rename from test/float16.cpp rename to test/half.cpp index e2153e095c0..07a01106cc6 100644 --- a/test/float16.cpp +++ b/test/half.cpp @@ -29,8 +29,9 @@ #include #include +#include -template +template bool bit_equal(const T& x, const U& y) { static_assert(sizeof(T) == sizeof(U)); @@ -1078,7 +1079,6 @@ static const std::map half_lut = { {0xffa3, std::numeric_limits::quiet_NaN()}, }; - TEST_CASE(check_half_values) { for(auto [x, f] : half_lut) @@ -1096,16 +1096,8 @@ TEST_CASE(check_half_values) } else { - std::cout << h << " " << x << " " << f << std::endl; - - std::cout << float(h) << " " << f << std::endl; - - std::cout << x << " " << migraphx::half(f) << std::endl; - - CHECK(migraphx::float_equal(float(h), f)); CHECK(bit_equal(x, migraphx::half(f))); - std::cout << " " << std::endl; - std::cout << " " << std::endl; + CHECK(migraphx::float_equal(float(h), f)); } } } From 9ac4e2a99bd2457afeb48139170853424bd66287 Mon Sep 17 00:00:00 2001 From: richagadgil Date: Tue, 22 Oct 2024 19:20:21 +0000 Subject: [PATCH 15/58] underflow and overflow tests --- test/half.cpp | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/test/half.cpp b/test/half.cpp index 07a01106cc6..390baf5b776 100644 --- a/test/half.cpp +++ b/test/half.cpp @@ -1102,4 +1102,17 @@ TEST_CASE(check_half_values) } } +TEST_CASE(check_flows) +{ + // check positive underflow + CHECK(bit_equal(std::numeric_limits::min() * std::numeric_limits::min(), migraphx::half(0))); + + // check overflow + CHECK(bit_equal(std::numeric_limits::max() + std::numeric_limits::max(), std::numeric_limits::infinity())); + CHECK(bit_equal(std::numeric_limits::max() / std::numeric_limits::epsilon(), std::numeric_limits::infinity())); + + // check negative underflow + CHECK(bit_equal(std::numeric_limits::lowest() + std::numeric_limits::lowest(), -std::numeric_limits::infinity())); +} + int main(int argc, const char* argv[]) { test::run(argc, argv); } From f05fd319dbd4ef2d4c61d04f52ef7784bedfe99a Mon Sep 17 00:00:00 2001 From: richagadgil Date: Tue, 22 Oct 2024 20:19:06 +0000 Subject: [PATCH 16/58] generate map --- test/half.cpp | 64 +++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 64 insertions(+) diff --git a/test/half.cpp b/test/half.cpp index 390baf5b776..963c3987b31 100644 --- a/test/half.cpp +++ b/test/half.cpp @@ -1115,4 +1115,68 @@ TEST_CASE(check_flows) CHECK(bit_equal(std::numeric_limits::lowest() + std::numeric_limits::lowest(), -std::numeric_limits::infinity())); } + +float halfToFloat(uint16_t half) { + uint32_t sign = (half >> 15) & 0x1; + uint32_t exponent = (half >> 10) & 0x1F; + uint32_t mantissa = half & 0x3FF; + + if (exponent == 31) { + if (mantissa == 0) { + return sign ? -std::numeric_limits::infinity() : std::numeric_limits::infinity(); + } else { + return std::numeric_limits::quiet_NaN(); + } + } + + float result; + if (exponent == 0) { + result = std::ldexp(static_cast(mantissa), -24); + } else { + result = std::ldexp(static_cast(mantissa | 0x400), exponent - 25); + } + + if (sign) { + result = -result; + } + + return result; +} + +TEST_CASE(check_map) { + std::map half_lut_all; + + for (uint16_t hexValue = 0x0000; hexValue <= 0x03FF; ++hexValue) { + float floatValue = halfToFloat(hexValue); + half_lut_all[hexValue] = floatValue; + } + + half_lut_all[0x7c00] = std::numeric_limits::infinity(); + half_lut_all[0xfc00] = -std::numeric_limits::infinity(); + half_lut_all[0x7c05] = std::numeric_limits::quiet_NaN(); + + + for(auto [x, f] : half_lut_all) + { + auto h = migraphx::bit_cast(x); + if(std::isnan(f)) + { + CHECK(std::isnan(h)); + } + else if(std::isinf(f)) + { + CHECK(std::isinf(h)); + CHECK((h < 0) == (f < 0)); + CHECK(bit_equal(x, migraphx::half(f))); + } + else + { + CHECK(bit_equal(x, migraphx::half(f))); + CHECK(migraphx::float_equal(float(h), f)); + } + } + +} + + int main(int argc, const char* argv[]) { test::run(argc, argv); } From cb4d92df73600457b32e05cf8557a7cf1f12ec63 Mon Sep 17 00:00:00 2001 From: richagadgil Date: Tue, 22 Oct 2024 20:53:15 +0000 Subject: [PATCH 17/58] add more tests --- test/half.cpp | 140 ++++++++++++++++++++++++++++++++------------------ 1 file changed, 91 insertions(+), 49 deletions(-) diff --git a/test/half.cpp b/test/half.cpp index 963c3987b31..5abbb76a02b 100644 --- a/test/half.cpp +++ b/test/half.cpp @@ -1108,74 +1108,116 @@ TEST_CASE(check_flows) CHECK(bit_equal(std::numeric_limits::min() * std::numeric_limits::min(), migraphx::half(0))); // check overflow + CHECK(bit_equal(std::numeric_limits::infinity() + std::numeric_limits::infinity(), std::numeric_limits::infinity())); CHECK(bit_equal(std::numeric_limits::max() + std::numeric_limits::max(), std::numeric_limits::infinity())); CHECK(bit_equal(std::numeric_limits::max() / std::numeric_limits::epsilon(), std::numeric_limits::infinity())); // check negative underflow CHECK(bit_equal(std::numeric_limits::lowest() + std::numeric_limits::lowest(), -std::numeric_limits::infinity())); + CHECK(bit_equal(-std::numeric_limits::infinity() - std::numeric_limits::infinity(), -std::numeric_limits::infinity())); } +TEST_CASE(test_nan) +{ + float f_qnan = std::numeric_limits::quiet_NaN(); + migraphx::half half_qnan(f_qnan); + EXPECT(half_qnan.is_nan()); + EXPECT(std::isnan(half_qnan)); -float halfToFloat(uint16_t half) { - uint32_t sign = (half >> 15) & 0x1; - uint32_t exponent = (half >> 10) & 0x1F; - uint32_t mantissa = half & 0x3FF; + float f_snan = std::numeric_limits::signaling_NaN(); + migraphx::half half_snan(f_snan); + EXPECT(half_snan.is_nan()); + EXPECT(std::isnan(half_snan)); +} - if (exponent == 31) { - if (mantissa == 0) { - return sign ? -std::numeric_limits::infinity() : std::numeric_limits::infinity(); - } else { - return std::numeric_limits::quiet_NaN(); - } - } +TEST_CASE(test_bool) +{ + float zero = 0.0; + float two = 2.0; + float other = -0.375; + migraphx::half fp8_zero(zero); + migraphx::half fp8_two(two); + migraphx::half fp8_other(other); + EXPECT(not static_cast(fp8_zero)); + EXPECT(static_cast(fp8_two)); + EXPECT(static_cast(fp8_other)); +} - float result; - if (exponent == 0) { - result = std::ldexp(static_cast(mantissa), -24); - } else { - result = std::ldexp(static_cast(mantissa | 0x400), exponent - 25); - } +TEST_CASE(test_pos_infinity) +{ + float finf = std::numeric_limits::infinity(); + migraphx::half half_inf_1(finf); + CHECK(bit_equal(half_inf_1, std::numeric_limits::infinity())); +} - if (sign) { - result = -result; - } +TEST_CASE(test_neg_infinity) +{ + float finf = -1.0 * std::numeric_limits::infinity(); + migraphx::half half_neginf_1(finf); + CHECK(bit_equal(half_neginf_1, -std::numeric_limits::infinity())); +} - return result; +TEST_CASE(test_numeric_max_1) +{ + float fmax = std::numeric_limits::max(); // fp32 max is fp16 inf + migraphx::half half_inf(fmax); + CHECK(bit_equal(half_inf, std::numeric_limits::infinity())); } -TEST_CASE(check_map) { - std::map half_lut_all; - for (uint16_t hexValue = 0x0000; hexValue <= 0x03FF; ++hexValue) { - float floatValue = halfToFloat(hexValue); - half_lut_all[hexValue] = floatValue; - } +TEST_CASE(test_numeric_lowest_1) +{ + float flowest = std::numeric_limits::lowest(); + migraphx::half half_neginf(flowest); + CHECK(bit_equal(half_neginf, -std::numeric_limits::infinity())); +} - half_lut_all[0x7c00] = std::numeric_limits::infinity(); - half_lut_all[0xfc00] = -std::numeric_limits::infinity(); - half_lut_all[0x7c05] = std::numeric_limits::quiet_NaN(); +TEST_CASE(test_max_eq_lowest) +{ + EXPECT(migraphx::float_equal(std::numeric_limits::lowest(), + -1 * std::numeric_limits::max())); +} +TEST_CASE(test_isfinite) +{ + EXPECT(std::isfinite(migraphx::half(0.0))); + EXPECT(std::isfinite(migraphx::half(-0.0))); + EXPECT(not std::isfinite( + migraphx::half(std::numeric_limits::quiet_NaN()))); +} - for(auto [x, f] : half_lut_all) - { - auto h = migraphx::bit_cast(x); - if(std::isnan(f)) - { - CHECK(std::isnan(h)); - } - else if(std::isinf(f)) - { - CHECK(std::isinf(h)); - CHECK((h < 0) == (f < 0)); - CHECK(bit_equal(x, migraphx::half(f))); - } - else - { - CHECK(bit_equal(x, migraphx::half(f))); - CHECK(migraphx::float_equal(float(h), f)); - } - } +TEST_CASE(test_binary_ops) +{ + auto a = migraphx::half(-1.0); + auto b = migraphx::half(1.0); + auto c = migraphx::half(0.0); + auto d = migraphx::half(-0.0); + EXPECT(migraphx::float_equal((c + d), c)); + EXPECT(migraphx::float_equal((c + d), d)); + EXPECT(migraphx::float_equal((a + b), c)); + EXPECT(migraphx::float_equal((a + b), d)); + auto e = migraphx::half(10.0); + auto f = migraphx::half(-10.0); + EXPECT(e > f); + EXPECT(f < e); + EXPECT(f <= e); + EXPECT(e >= f); + EXPECT(e <= e); + EXPECT(f >= f); + EXPECT(not migraphx::float_equal(f, e)); +} + +TEST_CASE(test_stream_op) +{ + auto a = migraphx::half(-1.0); + std::stringstream ss; + ss << a; + EXPECT(std::string("-1") == ss.str()); + ss = std::stringstream(); + auto b = std::numeric_limits::quiet_NaN(); + ss << b; + EXPECT(std::string("nan") == ss.str()); } From 0cc1946a8b223235ee71fd5aa4b680a1d552a544 Mon Sep 17 00:00:00 2001 From: richagadgil Date: Tue, 22 Oct 2024 22:26:12 +0000 Subject: [PATCH 18/58] fix names --- test/half.cpp | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/test/half.cpp b/test/half.cpp index 5abbb76a02b..300f8ac40e4 100644 --- a/test/half.cpp +++ b/test/half.cpp @@ -1135,12 +1135,12 @@ TEST_CASE(test_bool) float zero = 0.0; float two = 2.0; float other = -0.375; - migraphx::half fp8_zero(zero); - migraphx::half fp8_two(two); - migraphx::half fp8_other(other); - EXPECT(not static_cast(fp8_zero)); - EXPECT(static_cast(fp8_two)); - EXPECT(static_cast(fp8_other)); + migraphx::half half_zero(zero); + migraphx::half half_two(two); + migraphx::half half_other(other); + EXPECT(not static_cast(half_zero)); + EXPECT(static_cast(half_two)); + EXPECT(static_cast(half_other)); } TEST_CASE(test_pos_infinity) From 85a761b3b10af457803a4ba3d7593447b441dcb1 Mon Sep 17 00:00:00 2001 From: richagadgil Date: Wed, 23 Oct 2024 17:41:25 +0000 Subject: [PATCH 19/58] update tests --- test/half.cpp | 3 +++ 1 file changed, 3 insertions(+) diff --git a/test/half.cpp b/test/half.cpp index 300f8ac40e4..96d76c9a15a 100644 --- a/test/half.cpp +++ b/test/half.cpp @@ -1111,10 +1111,13 @@ TEST_CASE(check_flows) CHECK(bit_equal(std::numeric_limits::infinity() + std::numeric_limits::infinity(), std::numeric_limits::infinity())); CHECK(bit_equal(std::numeric_limits::max() + std::numeric_limits::max(), std::numeric_limits::infinity())); CHECK(bit_equal(std::numeric_limits::max() / std::numeric_limits::epsilon(), std::numeric_limits::infinity())); + CHECK(bit_equal(std::numeric_limits::max() + std::numeric_limits::min(), std::numeric_limits::max())); // check negative underflow CHECK(bit_equal(std::numeric_limits::lowest() + std::numeric_limits::lowest(), -std::numeric_limits::infinity())); CHECK(bit_equal(-std::numeric_limits::infinity() - std::numeric_limits::infinity(), -std::numeric_limits::infinity())); + CHECK(bit_equal(std::numeric_limits::lowest() - std::numeric_limits::min(), std::numeric_limits::lowest())); + } TEST_CASE(test_nan) From 65cf9ae975ce5a083caf2df7ded9b9fed10728d0 Mon Sep 17 00:00:00 2001 From: richagadgil Date: Thu, 24 Oct 2024 16:42:25 -0500 Subject: [PATCH 20/58] remove and --- src/include/migraphx/generic_float.hpp | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/include/migraphx/generic_float.hpp b/src/include/migraphx/generic_float.hpp index e8fb2feb0bd..44a6e82e0e6 100644 --- a/src/include/migraphx/generic_float.hpp +++ b/src/include/migraphx/generic_float.hpp @@ -127,8 +127,6 @@ struct __attribute__((packed)) generic_float shift++; } - f.mantissa &= all_ones(); - f.exponent = float32_parts::exponent_bias() - exponent_bias() - shift + 1; f.mantissa = f.mantissa << (float32_parts::mantissa_width() - MantissaSize); } From fbabf54a2b53e397bc885a644705209e61e17b9f Mon Sep 17 00:00:00 2001 From: richagadgil Date: Thu, 24 Oct 2024 16:48:21 -0500 Subject: [PATCH 21/58] disable warning --- src/include/migraphx/bit_cast.hpp | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/include/migraphx/bit_cast.hpp b/src/include/migraphx/bit_cast.hpp index 951b34bc340..4711819779d 100644 --- a/src/include/migraphx/bit_cast.hpp +++ b/src/include/migraphx/bit_cast.hpp @@ -30,8 +30,12 @@ #include #include +#if defined(__GNUC__) and !defined(__clang__) +#define MIGRAPHX_CONST_FOLD(x) (x) +#else // NOLINTNEXTLINE(cppcoreguidelines-macro-usage) #define MIGRAPHX_CONST_FOLD(x) (__builtin_constant_p(x) ? (x) : (x)) +#endif namespace migraphx { inline namespace MIGRAPHX_INLINE_NS { From 549f5e6646e5bc1f013738a01fa5b4d814e05c32 Mon Sep 17 00:00:00 2001 From: richagadgil Date: Thu, 24 Oct 2024 17:08:07 -0500 Subject: [PATCH 22/58] fix tidy warning --- src/include/migraphx/generic_float.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/include/migraphx/generic_float.hpp b/src/include/migraphx/generic_float.hpp index 44a6e82e0e6..53790337956 100644 --- a/src/include/migraphx/generic_float.hpp +++ b/src/include/migraphx/generic_float.hpp @@ -35,7 +35,7 @@ inline namespace MIGRAPHX_INLINE_NS { template constexpr unsigned int all_ones() noexcept { - return (1 << N) - 1; + return (1u << N) - 1u; } struct float32_parts From d302e5d0dd01e965bb0cc004e89c96855726f907 Mon Sep 17 00:00:00 2001 From: richagadgil Date: Fri, 25 Oct 2024 13:19:12 -0500 Subject: [PATCH 23/58] migraphx py fix --- src/py/migraphx_py.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/py/migraphx_py.cpp b/src/py/migraphx_py.cpp index 04daa5e35a3..9d05e32e67d 100644 --- a/src/py/migraphx_py.cpp +++ b/src/py/migraphx_py.cpp @@ -48,7 +48,7 @@ #include #endif -using half = half_float::half; +using half = migraphx::half; namespace py = pybind11; #ifdef __clang__ From 8d475e38475fbd11209b88ae61c729629bafa10c Mon Sep 17 00:00:00 2001 From: richagadgil Date: Fri, 25 Oct 2024 15:26:17 -0500 Subject: [PATCH 24/58] add increments --- src/include/migraphx/generic_float.hpp | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/src/include/migraphx/generic_float.hpp b/src/include/migraphx/generic_float.hpp index 53790337956..919b5ec02ce 100644 --- a/src/include/migraphx/generic_float.hpp +++ b/src/include/migraphx/generic_float.hpp @@ -317,6 +317,12 @@ struct __attribute__((packed)) generic_float { return not(x == y); } + + constexpr generic_float& operator++() noexcept + { + *this += generic_float(1.0f); + return *this; + } }; From a0fd055bee1b28bcccae82ac905a9b4261e76947 Mon Sep 17 00:00:00 2001 From: richagadgil Date: Fri, 25 Oct 2024 16:41:45 -0500 Subject: [PATCH 25/58] fix warnings --- src/include/migraphx/bit_cast.hpp | 60 ------------------------------- 1 file changed, 60 deletions(-) diff --git a/src/include/migraphx/bit_cast.hpp b/src/include/migraphx/bit_cast.hpp index 4711819779d..e69de29bb2d 100644 --- a/src/include/migraphx/bit_cast.hpp +++ b/src/include/migraphx/bit_cast.hpp @@ -1,60 +0,0 @@ -/* ************************************************************************ - * Copyright (C) 2015-2023 Advanced Micro Devices, Inc. All rights reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell cop- - * ies of the Software, and to permit persons to whom the Software is furnished - * to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IM- - * PLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS - * FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR - * COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER - * IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNE- - * CTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - * ************************************************************************ */ -#ifndef MIGRAPHX_GUARD_RTGLIB_BITCAST_HPP -#define MIGRAPHX_GUARD_RTGLIB_BITCAST_HPP -#include -#if defined(__GNUC__) && !defined(__clang__) -#pragma GCC diagnostic push -#pragma GCC diagnostic ignored "-Wstrict-aliasing" -#endif - -#include -#include - -#if defined(__GNUC__) and !defined(__clang__) -#define MIGRAPHX_CONST_FOLD(x) (x) -#else -// NOLINTNEXTLINE(cppcoreguidelines-macro-usage) -#define MIGRAPHX_CONST_FOLD(x) (__builtin_constant_p(x) ? (x) : (x)) -#endif - -namespace migraphx { -inline namespace MIGRAPHX_INLINE_NS { -template {} and - std::is_trivially_copyable{})> -inline constexpr To bit_cast(From fr) noexcept -{ - static_assert(sizeof(To) == sizeof(From)); -#if defined(__GNUC__) and !defined(__clang__) - return MIGRAPHX_CONST_FOLD(*reinterpret_cast(&fr)); -#else - return __builtin_bit_cast(To, fr); -#endif -} -} // namespace MIGRAPHX_INLINE_NS -} // namespace migraphx -#if defined(__GNUC__) && !defined(__clang__) -#pragma GCC diagnostic pop -#endif -#endif // MIGRAPHX_GUARD_RTGLIB_BITCAST_HPP From 41379fea0ddd1a7d031886d982c300ca3feb0820 Mon Sep 17 00:00:00 2001 From: richagadgil Date: Fri, 25 Oct 2024 16:46:01 -0500 Subject: [PATCH 26/58] disable duplicate branch warning --- src/include/migraphx/bit_cast.hpp | 57 +++++++++++++++++++++++++++++++ 1 file changed, 57 insertions(+) diff --git a/src/include/migraphx/bit_cast.hpp b/src/include/migraphx/bit_cast.hpp index e69de29bb2d..fc4aab2e3b6 100644 --- a/src/include/migraphx/bit_cast.hpp +++ b/src/include/migraphx/bit_cast.hpp @@ -0,0 +1,57 @@ +/* ************************************************************************ + * Copyright (C) 2015-2023 Advanced Micro Devices, Inc. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell cop- + * ies of the Software, and to permit persons to whom the Software is furnished + * to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IM- + * PLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS + * FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR + * COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER + * IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNE- + * CTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + * ************************************************************************ */ +#ifndef MIGRAPHX_GUARD_RTGLIB_BITCAST_HPP +#define MIGRAPHX_GUARD_RTGLIB_BITCAST_HPP +#include +#if defined(__GNUC__) && !defined(__clang__) +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wstrict-aliasing" +#pragma GCC diagnostic ignored "-Wduplicated-branches" +#endif + +#include +#include + +// NOLINTNEXTLINE(cppcoreguidelines-macro-usage) +#define MIGRAPHX_CONST_FOLD(x) (__builtin_constant_p(x) ? (x) : (x)) + +namespace migraphx { +inline namespace MIGRAPHX_INLINE_NS { +template {} and + std::is_trivially_copyable{})> +inline constexpr To bit_cast(From fr) noexcept +{ + static_assert(sizeof(To) == sizeof(From)); +#if defined(__GNUC__) and !defined(__clang__) + return MIGRAPHX_CONST_FOLD(*reinterpret_cast(&fr)); +#else + return __builtin_bit_cast(To, fr); +#endif +} +} // namespace MIGRAPHX_INLINE_NS +} // namespace migraphx +#if defined(__GNUC__) && !defined(__clang__) +#pragma GCC diagnostic pop +#endif +#endif // MIGRAPHX_GUARD_RTGLIB_BITCAST_HPP From 0c29c7bba23c3223eba894f8a5d313367baf2535 Mon Sep 17 00:00:00 2001 From: richagadgil Date: Mon, 28 Oct 2024 13:35:28 -0500 Subject: [PATCH 27/58] add countzero_std --- src/include/migraphx/generic_float.hpp | 31 +++++++++++++++++++------- 1 file changed, 23 insertions(+), 8 deletions(-) diff --git a/src/include/migraphx/generic_float.hpp b/src/include/migraphx/generic_float.hpp index 919b5ec02ce..74b4b8a5b77 100644 --- a/src/include/migraphx/generic_float.hpp +++ b/src/include/migraphx/generic_float.hpp @@ -27,7 +27,6 @@ #include #include #include -#include namespace migraphx { inline namespace MIGRAPHX_INLINE_NS { @@ -38,6 +37,23 @@ constexpr unsigned int all_ones() noexcept return (1u << N) - 1u; } +template +constexpr int countl_zero(T value) +{ + if(value == 0) + return sizeof(T) * 8; + + int count = 0; + constexpr int bits = sizeof(T) * 8; + + while(count < bits && (value & (static_cast(1) << (bits - 1 - count))) == 0) + { + count++; + } + + return count; +} + struct float32_parts { unsigned int mantissa : 23; @@ -104,7 +120,6 @@ struct __attribute__((packed)) generic_float constexpr float to_float() const noexcept { float32_parts f{}; - f.sign = sign; if(exponent == 0) @@ -112,7 +127,6 @@ struct __attribute__((packed)) generic_float if(mantissa == 0) { - f.exponent = 0; f.mantissa = 0; } @@ -121,13 +135,13 @@ struct __attribute__((packed)) generic_float int shift = 0; f.mantissa = mantissa; - while((f.mantissa & (1 << MantissaSize)) == 0) + if(MantissaSize < float32_parts::mantissa_width()) { - f.mantissa <<= 1; - shift++; + shift = MantissaSize - (32 - countl_zero(mantissa)); + f.mantissa <<= (shift + 1); } - f.exponent = float32_parts::exponent_bias() - exponent_bias() - shift + 1; + f.exponent = float32_parts::exponent_bias() - exponent_bias() - shift; f.mantissa = f.mantissa << (float32_parts::mantissa_width() - MantissaSize); } } @@ -142,6 +156,7 @@ struct __attribute__((packed)) generic_float constexpr const int diff = float32_parts::exponent_bias() - exponent_bias(); f.exponent = int(exponent) + diff; } + return f.to_float(); } @@ -175,7 +190,7 @@ struct __attribute__((packed)) generic_float auto shift = diff - int(f.exponent); mantissa = - (f.mantissa | (1 << static_cast(float32_parts::mantissa_width()))) >> + (f.mantissa | (1u << static_cast(float32_parts::mantissa_width()))) >> (shift + (float32_parts::mantissa_width() - MantissaSize) + 1); } else From 4b012a86658173df509bf9973e6df1e744802755 Mon Sep 17 00:00:00 2001 From: richagadgil Date: Mon, 28 Oct 2024 16:35:23 -0500 Subject: [PATCH 28/58] ci error --- src/include/migraphx/generic_float.hpp | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/include/migraphx/generic_float.hpp b/src/include/migraphx/generic_float.hpp index 74b4b8a5b77..44eec0bf28b 100644 --- a/src/include/migraphx/generic_float.hpp +++ b/src/include/migraphx/generic_float.hpp @@ -60,6 +60,9 @@ struct float32_parts unsigned int exponent : 8; unsigned int sign : 1; + constexpr float32_parts(unsigned int m = 0, unsigned int e = 0, unsigned int s = 0) + : mantissa(m), exponent(e), sign(s) {} + static constexpr unsigned int mantissa_width() { return 23; From dbaa3a8c88acde648c616006b76fad0d089b9343 Mon Sep 17 00:00:00 2001 From: richagadgil Date: Mon, 28 Oct 2024 17:49:18 -0500 Subject: [PATCH 29/58] simplify countl --- src/include/migraphx/generic_float.hpp | 16 ++++------------ 1 file changed, 4 insertions(+), 12 deletions(-) diff --git a/src/include/migraphx/generic_float.hpp b/src/include/migraphx/generic_float.hpp index 44eec0bf28b..1eefd339553 100644 --- a/src/include/migraphx/generic_float.hpp +++ b/src/include/migraphx/generic_float.hpp @@ -40,18 +40,10 @@ constexpr unsigned int all_ones() noexcept template constexpr int countl_zero(T value) { - if(value == 0) - return sizeof(T) * 8; - - int count = 0; - constexpr int bits = sizeof(T) * 8; - - while(count < bits && (value & (static_cast(1) << (bits - 1 - count))) == 0) - { - count++; - } - - return count; + unsigned int r = 0; + for(; value != 0; value >>= 1) + r++; + return 8 * sizeof(value) - r; } struct float32_parts From b2bd2a0cee82c0a5b6baa44919dbb925d90846b8 Mon Sep 17 00:00:00 2001 From: richagadgil Date: Mon, 28 Oct 2024 18:00:17 -0500 Subject: [PATCH 30/58] fix ci --- src/include/migraphx/generic_float.hpp | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/src/include/migraphx/generic_float.hpp b/src/include/migraphx/generic_float.hpp index 1eefd339553..7032cd2236b 100644 --- a/src/include/migraphx/generic_float.hpp +++ b/src/include/migraphx/generic_float.hpp @@ -52,9 +52,6 @@ struct float32_parts unsigned int exponent : 8; unsigned int sign : 1; - constexpr float32_parts(unsigned int m = 0, unsigned int e = 0, unsigned int s = 0) - : mantissa(m), exponent(e), sign(s) {} - static constexpr unsigned int mantissa_width() { return 23; @@ -83,7 +80,7 @@ constexpr float32_parts get_parts(float f) template -struct __attribute__((packed)) generic_float +struct __attribute__((packed, may_alias)) generic_float { unsigned int mantissa : MantissaSize; unsigned int exponent : ExponentSize; From 6f328f013db49eabbd9101366d857651e5f39f3c Mon Sep 17 00:00:00 2001 From: richagadgil Date: Mon, 28 Oct 2024 19:31:45 -0500 Subject: [PATCH 31/58] src --- src/include/migraphx/bit_cast.hpp | 1 + 1 file changed, 1 insertion(+) diff --git a/src/include/migraphx/bit_cast.hpp b/src/include/migraphx/bit_cast.hpp index fc4aab2e3b6..69824acf66c 100644 --- a/src/include/migraphx/bit_cast.hpp +++ b/src/include/migraphx/bit_cast.hpp @@ -24,6 +24,7 @@ #include #if defined(__GNUC__) && !defined(__clang__) #pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wpsabi" #pragma GCC diagnostic ignored "-Wstrict-aliasing" #pragma GCC diagnostic ignored "-Wduplicated-branches" #endif From e6d9763559c29783c4445a65ae3f5e08352e0445 Mon Sep 17 00:00:00 2001 From: richagadgil Date: Mon, 28 Oct 2024 20:07:09 -0500 Subject: [PATCH 32/58] remove flag --- src/include/migraphx/bit_cast.hpp | 1 - 1 file changed, 1 deletion(-) diff --git a/src/include/migraphx/bit_cast.hpp b/src/include/migraphx/bit_cast.hpp index 69824acf66c..fc4aab2e3b6 100644 --- a/src/include/migraphx/bit_cast.hpp +++ b/src/include/migraphx/bit_cast.hpp @@ -24,7 +24,6 @@ #include #if defined(__GNUC__) && !defined(__clang__) #pragma GCC diagnostic push -#pragma GCC diagnostic ignored "-Wpsabi" #pragma GCC diagnostic ignored "-Wstrict-aliasing" #pragma GCC diagnostic ignored "-Wduplicated-branches" #endif From 65380506c9eff6e26bdc5fda10c9ad0d723551ee Mon Sep 17 00:00:00 2001 From: richagadgil Date: Tue, 29 Oct 2024 12:22:31 -0500 Subject: [PATCH 33/58] hide abi warning --- src/include/migraphx/generic_float.hpp | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/src/include/migraphx/generic_float.hpp b/src/include/migraphx/generic_float.hpp index 7032cd2236b..d467aa57ca1 100644 --- a/src/include/migraphx/generic_float.hpp +++ b/src/include/migraphx/generic_float.hpp @@ -28,6 +28,11 @@ #include #include +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wabi" +// Code that triggers the ABI warning +#pragma GCC diagnostic pop + namespace migraphx { inline namespace MIGRAPHX_INLINE_NS { From 4e96d4da9930ea6cd92f17b44c2404dddbdd8f1b Mon Sep 17 00:00:00 2001 From: richagadgil Date: Tue, 29 Oct 2024 12:48:44 -0500 Subject: [PATCH 34/58] revert changes --- src/include/migraphx/generic_float.hpp | 5 ----- 1 file changed, 5 deletions(-) diff --git a/src/include/migraphx/generic_float.hpp b/src/include/migraphx/generic_float.hpp index d467aa57ca1..7032cd2236b 100644 --- a/src/include/migraphx/generic_float.hpp +++ b/src/include/migraphx/generic_float.hpp @@ -28,11 +28,6 @@ #include #include -#pragma GCC diagnostic push -#pragma GCC diagnostic ignored "-Wabi" -// Code that triggers the ABI warning -#pragma GCC diagnostic pop - namespace migraphx { inline namespace MIGRAPHX_INLINE_NS { From e4a25bd3f3fac026b2d54e4d0d41476089d46cdb Mon Sep 17 00:00:00 2001 From: richagadgil Date: Tue, 29 Oct 2024 13:05:41 -0500 Subject: [PATCH 35/58] change half in tests --- test/onnx/verify/negativelogliklihood_kd_dim_weighted.cpp | 6 +++--- .../onnx/verify/softmaxcrossentropyloss_kd_dim_weighted.cpp | 4 ++-- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/test/onnx/verify/negativelogliklihood_kd_dim_weighted.cpp b/test/onnx/verify/negativelogliklihood_kd_dim_weighted.cpp index 06865e637b2..69de5d2c15f 100644 --- a/test/onnx/verify/negativelogliklihood_kd_dim_weighted.cpp +++ b/test/onnx/verify/negativelogliklihood_kd_dim_weighted.cpp @@ -170,7 +170,7 @@ TEST_CASE(negativeloglikelihoodloss_kd_mean_reduction_weighted_test) pp["2"] = migraphx::argument(weight_shape, weight_data.data()); auto result = p.eval(pp).back(); - std::vector result_vector; + std::vector result_vector; result.visit([&](auto output) { result_vector.assign(output.begin(), output.end()); }); std::vector gold = {half{-35.266666666666666}}; EXPECT(migraphx::verify::verify_rms_range(result_vector, gold)); @@ -200,7 +200,7 @@ TEST_CASE(negativeloglikelihoodloss_kd_mean_reduction_weighted_test2) migraphx::shape label_shape{migraphx::shape::int32_type, {2, 2}}; std::vector label_data = {2, 1, 0, 2}; migraphx::shape weight_shape{migraphx::shape::half_type, {3}}; - std::vector weight_data = {half(0.2), half(0.3), half(0.1)}; + std::vector weight_data = {half(0.2), half(0.3), half(0.1)}; migraphx::parameter_map pp; pp["0"] = migraphx::argument(score_shape, score_data.data()); @@ -208,7 +208,7 @@ TEST_CASE(negativeloglikelihoodloss_kd_mean_reduction_weighted_test2) pp["2"] = migraphx::argument(weight_shape, weight_data.data()); auto result = p.eval(pp).back(); - std::vector result_vector; + std::vector result_vector; result.visit([&](auto output) { result_vector.assign(output.begin(), output.end()); }); std::vector gold = {half{-1.5714285714285714}}; EXPECT(migraphx::verify::verify_rms_range(result_vector, gold)); diff --git a/test/onnx/verify/softmaxcrossentropyloss_kd_dim_weighted.cpp b/test/onnx/verify/softmaxcrossentropyloss_kd_dim_weighted.cpp index 14b5a0da963..34fb82c9070 100644 --- a/test/onnx/verify/softmaxcrossentropyloss_kd_dim_weighted.cpp +++ b/test/onnx/verify/softmaxcrossentropyloss_kd_dim_weighted.cpp @@ -180,7 +180,7 @@ TEST_CASE(softmaxcrossentropyloss_kd_mean_reduction_weighted_test) pp["2"] = migraphx::argument(weight_shape, weight_data.data()); auto result = p.eval(pp).back(); - std::vector result_vector; + std::vector result_vector; result.visit([&](auto output) { result_vector.assign(output.begin(), output.end()); }); std::vector gold = {half{1.38629436}}; EXPECT(migraphx::verify::verify_rms_range(result_vector, gold)); @@ -207,7 +207,7 @@ TEST_CASE(softmaxcrossentropyloss_kd_mean_reduction_uneven_weighted_test) pp["2"] = migraphx::argument(weight_shape, weight_data.data()); auto result = p.eval(pp).back(); - std::vector result_vector; + std::vector result_vector; result.visit([&](auto output) { result_vector.assign(output.begin(), output.end()); }); std::vector gold = {half{1.38629436}}; From 3354c6e9bab593174002917afdedb79bca6f7cf3 Mon Sep 17 00:00:00 2001 From: Richa Gadgil Date: Tue, 29 Oct 2024 14:17:38 -0700 Subject: [PATCH 36/58] Update generic_float.hpp --- src/include/migraphx/generic_float.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/include/migraphx/generic_float.hpp b/src/include/migraphx/generic_float.hpp index 7032cd2236b..01efd56ff2d 100644 --- a/src/include/migraphx/generic_float.hpp +++ b/src/include/migraphx/generic_float.hpp @@ -129,7 +129,7 @@ struct __attribute__((packed, may_alias)) generic_float if(MantissaSize < float32_parts::mantissa_width()) { - shift = MantissaSize - (32 - countl_zero(mantissa)); + shift = MantissaSize - ((sizeof(unsigned int) * 8) - countl_zero(mantissa)); f.mantissa <<= (shift + 1); } From 6de079b3ee5fea98ddf083244afee5284343947d Mon Sep 17 00:00:00 2001 From: richagadgil Date: Tue, 29 Oct 2024 21:56:13 +0000 Subject: [PATCH 37/58] format --- src/include/migraphx/generic_float.hpp | 163 ++++++++++++------------- src/include/migraphx/half.hpp | 2 +- test/float32.cpp | 10 +- test/half.cpp | 40 +++--- 4 files changed, 111 insertions(+), 104 deletions(-) diff --git a/src/include/migraphx/generic_float.hpp b/src/include/migraphx/generic_float.hpp index 01efd56ff2d..aad985984de 100644 --- a/src/include/migraphx/generic_float.hpp +++ b/src/include/migraphx/generic_float.hpp @@ -31,7 +31,7 @@ namespace migraphx { inline namespace MIGRAPHX_INLINE_NS { -template +template constexpr unsigned int all_ones() noexcept { return (1u << N) - 1u; @@ -46,57 +46,35 @@ constexpr int countl_zero(T value) return 8 * sizeof(value) - r; } -struct float32_parts +struct float32_parts { unsigned int mantissa : 23; unsigned int exponent : 8; unsigned int sign : 1; - static constexpr unsigned int mantissa_width() - { - return 23; - } + static constexpr unsigned int mantissa_width() { return 23; } - static constexpr unsigned int max_exponent() - { - return all_ones<8>(); - } + static constexpr unsigned int max_exponent() { return all_ones<8>(); } - static constexpr int exponent_bias() - { - return all_ones<7>(); - } + static constexpr int exponent_bias() { return all_ones<7>(); } - constexpr float to_float() const noexcept - { - return migraphx::bit_cast(*this); - } + constexpr float to_float() const noexcept { return migraphx::bit_cast(*this); } }; -constexpr float32_parts get_parts(float f) -{ - return migraphx::bit_cast(f); -} - +constexpr float32_parts get_parts(float f) { return migraphx::bit_cast(f); } -template +template struct __attribute__((packed, may_alias)) generic_float { unsigned int mantissa : MantissaSize; unsigned int exponent : ExponentSize; unsigned int sign : 1; - static constexpr int exponent_bias() - { - return all_ones(); - } + static constexpr int exponent_bias() { return all_ones(); } - explicit constexpr generic_float(float f = 0.0) noexcept - { - from_float(get_parts(f)); - } + explicit constexpr generic_float(float f = 0.0) noexcept { from_float(get_parts(f)); } - constexpr generic_float &operator=(float f) noexcept + constexpr generic_float& operator=(float f) noexcept { from_float(get_parts(f)); return *this; @@ -105,7 +83,7 @@ struct __attribute__((packed, may_alias)) generic_float constexpr generic_float operator-() const noexcept { generic_float result = *this; - result.sign = !this->sign; + result.sign = !this->sign; return result; } @@ -169,7 +147,7 @@ struct __attribute__((packed, may_alias)) generic_float else { constexpr const int diff = float32_parts::exponent_bias() - exponent_bias(); - auto e = int(f.exponent) - diff; + auto e = int(f.exponent) - diff; if(e >= static_cast(all_ones())) { @@ -210,15 +188,9 @@ struct __attribute__((packed, may_alias)) generic_float return exponent == all_ones() and mantissa != 0; } - constexpr bool is_finite() const noexcept - { - return exponent != all_ones(); - } + constexpr bool is_finite() const noexcept { return exponent != all_ones(); } - constexpr operator float() const noexcept - { - return this->to_float(); - } + constexpr operator float() const noexcept { return this->to_float(); } static constexpr generic_float infinity() { @@ -256,7 +228,7 @@ struct __attribute__((packed, may_alias)) generic_float generic_float x{}; x.exponent = 0; x.mantissa = 1; - x.sign = 0; + x.sign = 0; return x; } @@ -265,7 +237,7 @@ struct __attribute__((packed, may_alias)) generic_float generic_float x{}; x.exponent = all_ones() - 1; x.mantissa = all_ones(); - x.sign = 1; + x.sign = 1; return x; } @@ -274,7 +246,7 @@ struct __attribute__((packed, may_alias)) generic_float generic_float x{}; x.exponent = all_ones() - 1; x.mantissa = all_ones(); - x.sign = 0; + x.sign = 0; return x; } @@ -285,24 +257,24 @@ struct __attribute__((packed, may_alias)) generic_float return generic_float{x.to_float() - 1.0f}; } // NOLINTNEXTLINE -#define MIGRAPHX_GENERIC_FLOAT_ASSIGN_OP(op) \ - constexpr generic_float& operator op(const generic_float& rhs) \ - { \ - float self = *this; \ - float frhs = rhs; \ - self op frhs; \ - *this = generic_float(self); \ - return *this; \ +#define MIGRAPHX_GENERIC_FLOAT_ASSIGN_OP(op) \ + constexpr generic_float& operator op(const generic_float & rhs) \ + { \ + float self = *this; \ + float frhs = rhs; \ + self op frhs; \ + *this = generic_float(self); \ + return *this; \ } MIGRAPHX_GENERIC_FLOAT_ASSIGN_OP(*=) MIGRAPHX_GENERIC_FLOAT_ASSIGN_OP(-=) MIGRAPHX_GENERIC_FLOAT_ASSIGN_OP(+=) MIGRAPHX_GENERIC_FLOAT_ASSIGN_OP(/=) // NOLINTNEXTLINE -#define MIGRAPHX_GENERIC_FLOAT_BINARY_OP(op) \ +#define MIGRAPHX_GENERIC_FLOAT_BINARY_OP(op) \ friend constexpr generic_float operator op(const generic_float& x, const generic_float& y) \ - { \ - return generic_float(float(x) op float(y)); \ + { \ + return generic_float(float(x) op float(y)); \ } MIGRAPHX_GENERIC_FLOAT_BINARY_OP(*) MIGRAPHX_GENERIC_FLOAT_BINARY_OP(-) @@ -315,7 +287,7 @@ struct __attribute__((packed, may_alias)) generic_float friend constexpr bool operator==(const generic_float& x, const generic_float& y) { - if (not x.is_finite() or not y.is_finite()) + if(not x.is_finite() or not y.is_finite()) return false; return std::tie(x.mantissa, x.exponent, x.sign) == std::tie(y.mantissa, y.exponent, y.sign); } @@ -325,70 +297,96 @@ struct __attribute__((packed, may_alias)) generic_float return not(x == y); } - constexpr generic_float& operator++() noexcept + constexpr generic_float& operator++() noexcept { - *this += generic_float(1.0f); + *this += generic_float(1.0f); return *this; } }; - -} -} +} // namespace MIGRAPHX_INLINE_NS +} // namespace migraphx namespace std { -template +template class numeric_limits> { public: static constexpr bool has_infinity = true; - static constexpr migraphx::generic_float epsilon() { return migraphx::generic_float::epsilon(); } - - static constexpr migraphx::generic_float quiet_NaN() { return migraphx::generic_float::qnan(); } + static constexpr migraphx::generic_float epsilon() + { + return migraphx::generic_float::epsilon(); + } - static constexpr migraphx::generic_float signaling_NaN() { return migraphx::generic_float::snan(); } + static constexpr migraphx::generic_float quiet_NaN() + { + return migraphx::generic_float::qnan(); + } - static constexpr migraphx::generic_float max() { return migraphx::generic_float::max(); } + static constexpr migraphx::generic_float signaling_NaN() + { + return migraphx::generic_float::snan(); + } - static constexpr migraphx::generic_float min() { return migraphx::generic_float::min(); } + static constexpr migraphx::generic_float max() + { + return migraphx::generic_float::max(); + } - static constexpr migraphx::generic_float lowest() { return migraphx::generic_float::lowest(); } + static constexpr migraphx::generic_float min() + { + return migraphx::generic_float::min(); + } - static constexpr migraphx::generic_float infinity() { return migraphx::generic_float::infinity(); } + static constexpr migraphx::generic_float lowest() + { + return migraphx::generic_float::lowest(); + } - static constexpr migraphx::generic_float denorm_min() { return migraphx::generic_float::denorm_min(); } + static constexpr migraphx::generic_float infinity() + { + return migraphx::generic_float::infinity(); + } + static constexpr migraphx::generic_float denorm_min() + { + return migraphx::generic_float::denorm_min(); + } }; -template +template struct common_type, T> : std::common_type // NOLINT { }; -template +template struct common_type> : std::common_type // NOLINT { }; // template -// struct common_type, migraphx::fp8::float8> : std::common_type +// struct common_type, +// migraphx::fp8::float8> : std::common_type // {}; // template -// struct common_type, migraphx::generic_float> : std::common_type +// struct common_type, +// migraphx::generic_float> : std::common_type // {}; // template -// struct common_type, migraphx::fp8::float8> : std::common_type +// struct common_type, migraphx::fp8::float8> : +// std::common_type // {}; // template -// struct common_type, migraphx::generic_float> : std::common_type +// struct common_type, migraphx::generic_float> : +// std::common_type // {}; -template -struct common_type, migraphx::generic_float> +template +struct common_type, migraphx::generic_float> { using type = migraphx::generic_float; }; @@ -399,5 +397,4 @@ struct common_type, migraphx::generic_float; +using half = migraphx::generic_float<10, 5>; namespace detail { template diff --git a/test/float32.cpp b/test/float32.cpp index d18b924bad1..d5960cd7fdd 100644 --- a/test/float32.cpp +++ b/test/float32.cpp @@ -32,7 +32,7 @@ using fp32 = migraphx::generic_float<23, 8>; -template +template bool bit_equal(const T& x, const U& y) { static_assert(sizeof(T) == sizeof(U)); @@ -40,13 +40,12 @@ bool bit_equal(const T& x, const U& y) return migraphx::bit_cast(x) == migraphx::bit_cast(y); } -#define CHECK_FLOAT(x, y) \ - CHECK(bit_equal(x, y)); \ +#define CHECK_FLOAT(x, y) \ + CHECK(bit_equal(x, y)); \ CHECK(bit_equal(x, y.to_float())); \ - CHECK(bit_equal(fp32{x}, y)); \ + CHECK(bit_equal(fp32{x}, y)); \ CHECK(bit_equal(fp32{x}.to_float(), y.to_float())) - TEST_CASE(fp32_values_working) { CHECK_FLOAT(1.0f, fp32{1.0f}); @@ -61,5 +60,4 @@ TEST_CASE(fp32_values_working) CHECK_FLOAT(std::numeric_limits::signaling_NaN(), fp32::snan()); } - int main(int argc, const char* argv[]) { test::run(argc, argv); } diff --git a/test/half.cpp b/test/half.cpp index 96d76c9a15a..99171880bc6 100644 --- a/test/half.cpp +++ b/test/half.cpp @@ -1105,19 +1105,34 @@ TEST_CASE(check_half_values) TEST_CASE(check_flows) { // check positive underflow - CHECK(bit_equal(std::numeric_limits::min() * std::numeric_limits::min(), migraphx::half(0))); + CHECK(bit_equal(std::numeric_limits::min() * + std::numeric_limits::min(), + migraphx::half(0))); // check overflow - CHECK(bit_equal(std::numeric_limits::infinity() + std::numeric_limits::infinity(), std::numeric_limits::infinity())); - CHECK(bit_equal(std::numeric_limits::max() + std::numeric_limits::max(), std::numeric_limits::infinity())); - CHECK(bit_equal(std::numeric_limits::max() / std::numeric_limits::epsilon(), std::numeric_limits::infinity())); - CHECK(bit_equal(std::numeric_limits::max() + std::numeric_limits::min(), std::numeric_limits::max())); - - // check negative underflow - CHECK(bit_equal(std::numeric_limits::lowest() + std::numeric_limits::lowest(), -std::numeric_limits::infinity())); - CHECK(bit_equal(-std::numeric_limits::infinity() - std::numeric_limits::infinity(), -std::numeric_limits::infinity())); - CHECK(bit_equal(std::numeric_limits::lowest() - std::numeric_limits::min(), std::numeric_limits::lowest())); + CHECK(bit_equal(std::numeric_limits::infinity() + + std::numeric_limits::infinity(), + std::numeric_limits::infinity())); + CHECK(bit_equal(std::numeric_limits::max() + + std::numeric_limits::max(), + std::numeric_limits::infinity())); + CHECK(bit_equal(std::numeric_limits::max() / + std::numeric_limits::epsilon(), + std::numeric_limits::infinity())); + CHECK(bit_equal(std::numeric_limits::max() + + std::numeric_limits::min(), + std::numeric_limits::max())); + // check negative underflow + CHECK(bit_equal(std::numeric_limits::lowest() + + std::numeric_limits::lowest(), + -std::numeric_limits::infinity())); + CHECK(bit_equal(-std::numeric_limits::infinity() - + std::numeric_limits::infinity(), + -std::numeric_limits::infinity())); + CHECK(bit_equal(std::numeric_limits::lowest() - + std::numeric_limits::min(), + std::numeric_limits::lowest())); } TEST_CASE(test_nan) @@ -1167,7 +1182,6 @@ TEST_CASE(test_numeric_max_1) CHECK(bit_equal(half_inf, std::numeric_limits::infinity())); } - TEST_CASE(test_numeric_lowest_1) { float flowest = std::numeric_limits::lowest(); @@ -1185,8 +1199,7 @@ TEST_CASE(test_isfinite) { EXPECT(std::isfinite(migraphx::half(0.0))); EXPECT(std::isfinite(migraphx::half(-0.0))); - EXPECT(not std::isfinite( - migraphx::half(std::numeric_limits::quiet_NaN()))); + EXPECT(not std::isfinite(migraphx::half(std::numeric_limits::quiet_NaN()))); } TEST_CASE(test_binary_ops) @@ -1223,5 +1236,4 @@ TEST_CASE(test_stream_op) EXPECT(std::string("nan") == ss.str()); } - int main(int argc, const char* argv[]) { test::run(argc, argv); } From 33e2c8df60c40fe95667005b907bcb76916c5389 Mon Sep 17 00:00:00 2001 From: richagadgil Date: Wed, 30 Oct 2024 19:23:45 +0000 Subject: [PATCH 38/58] fix bug --- src/include/migraphx/generic_float.hpp | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/src/include/migraphx/generic_float.hpp b/src/include/migraphx/generic_float.hpp index aad985984de..758875d3767 100644 --- a/src/include/migraphx/generic_float.hpp +++ b/src/include/migraphx/generic_float.hpp @@ -27,6 +27,7 @@ #include #include #include +#include namespace migraphx { inline namespace MIGRAPHX_INLINE_NS { @@ -154,14 +155,21 @@ struct __attribute__((packed, may_alias)) generic_float exponent = all_ones(); mantissa = 0; } - else if(e <= 0) + else if(e < 1) { exponent = 0; auto shift = diff - int(f.exponent); - mantissa = - (f.mantissa | (1u << static_cast(float32_parts::mantissa_width()))) >> - (shift + (float32_parts::mantissa_width() - MantissaSize) + 1); + auto shift_amount = shift + (float32_parts::mantissa_width() - MantissaSize) + 1; + + if (shift_amount <= 32) { + mantissa = + (f.mantissa | (1u << static_cast(float32_parts::mantissa_width()))) >> + shift_amount; + } else { + mantissa = 0; + } + } else { From b3c345dffc84a81492d8bdd2402eef16ca056875 Mon Sep 17 00:00:00 2001 From: richagadgil Date: Wed, 30 Oct 2024 21:02:32 +0000 Subject: [PATCH 39/58] fix err --- src/include/migraphx/generic_float.hpp | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/src/include/migraphx/generic_float.hpp b/src/include/migraphx/generic_float.hpp index 758875d3767..90276813b05 100644 --- a/src/include/migraphx/generic_float.hpp +++ b/src/include/migraphx/generic_float.hpp @@ -155,21 +155,23 @@ struct __attribute__((packed, may_alias)) generic_float exponent = all_ones(); mantissa = 0; } - else if(e < 1) + else if(e < 1) { exponent = 0; auto shift = diff - int(f.exponent); auto shift_amount = shift + (float32_parts::mantissa_width() - MantissaSize) + 1; - - if (shift_amount <= 32) { + + if(shift_amount < 32) + { mantissa = (f.mantissa | (1u << static_cast(float32_parts::mantissa_width()))) >> - shift_amount; - } else { + (shift + (float32_parts::mantissa_width() - MantissaSize) + 1); + } + else + { mantissa = 0; - } - + } } else { From 03df6f9cd48efe1eb96baf20d0b8005766494221 Mon Sep 17 00:00:00 2001 From: richagadgil Date: Thu, 31 Oct 2024 18:09:13 +0000 Subject: [PATCH 40/58] edits --- src/include/migraphx/generic_float.hpp | 2 +- src/include/migraphx/half.hpp | 9 --------- test/float32.cpp | 22 +++++++++++----------- 3 files changed, 12 insertions(+), 21 deletions(-) diff --git a/src/include/migraphx/generic_float.hpp b/src/include/migraphx/generic_float.hpp index 90276813b05..690bd81d219 100644 --- a/src/include/migraphx/generic_float.hpp +++ b/src/include/migraphx/generic_float.hpp @@ -27,7 +27,7 @@ #include #include #include -#include +#include namespace migraphx { inline namespace MIGRAPHX_INLINE_NS { diff --git a/src/include/migraphx/half.hpp b/src/include/migraphx/half.hpp index ea2f251c12c..34cf3f96a3b 100644 --- a/src/include/migraphx/half.hpp +++ b/src/include/migraphx/half.hpp @@ -25,7 +25,6 @@ #ifndef MIGRAPHX_GUARD_RTGLIB_HALF_HPP #define MIGRAPHX_GUARD_RTGLIB_HALF_HPP -#include #include #include #include @@ -41,14 +40,6 @@ struct deduce { using type = T; }; - -#ifdef HAS_HALF_V1 -template <> -struct deduce -{ - using type = half; -}; -#endif } // namespace detail template diff --git a/test/float32.cpp b/test/float32.cpp index d5960cd7fdd..04994124253 100644 --- a/test/float32.cpp +++ b/test/float32.cpp @@ -40,7 +40,7 @@ bool bit_equal(const T& x, const U& y) return migraphx::bit_cast(x) == migraphx::bit_cast(y); } -#define CHECK_FLOAT(x, y) \ +#define MIGRAPHX_CHECK_FLOAT(x, y) \ CHECK(bit_equal(x, y)); \ CHECK(bit_equal(x, y.to_float())); \ CHECK(bit_equal(fp32{x}, y)); \ @@ -48,16 +48,16 @@ bool bit_equal(const T& x, const U& y) TEST_CASE(fp32_values_working) { - CHECK_FLOAT(1.0f, fp32{1.0f}); - CHECK_FLOAT(-1.0f, fp32{-1.0f}); - CHECK_FLOAT(std::numeric_limits::min(), fp32::min()); - CHECK_FLOAT(std::numeric_limits::lowest(), fp32::lowest()); - CHECK_FLOAT(std::numeric_limits::max(), fp32::max()); - CHECK_FLOAT(std::numeric_limits::epsilon(), fp32::epsilon()); - CHECK_FLOAT(std::numeric_limits::denorm_min(), fp32::denorm_min()); - CHECK_FLOAT(std::numeric_limits::infinity(), fp32::infinity()); - CHECK_FLOAT(std::numeric_limits::quiet_NaN(), fp32::qnan()); - CHECK_FLOAT(std::numeric_limits::signaling_NaN(), fp32::snan()); + MIGRAPHX_CHECK_FLOAT(1.0f, fp32{1.0f}); + MIGRAPHX_CHECK_FLOAT(-1.0f, fp32{-1.0f}); + MIGRAPHX_CHECK_FLOAT(std::numeric_limits::min(), fp32::min()); + MIGRAPHX_CHECK_FLOAT(std::numeric_limits::lowest(), fp32::lowest()); + MIGRAPHX_CHECK_FLOAT(std::numeric_limits::max(), fp32::max()); + MIGRAPHX_CHECK_FLOAT(std::numeric_limits::epsilon(), fp32::epsilon()); + MIGRAPHX_CHECK_FLOAT(std::numeric_limits::denorm_min(), fp32::denorm_min()); + MIGRAPHX_CHECK_FLOAT(std::numeric_limits::infinity(), fp32::infinity()); + MIGRAPHX_CHECK_FLOAT(std::numeric_limits::quiet_NaN(), fp32::qnan()); + MIGRAPHX_CHECK_FLOAT(std::numeric_limits::signaling_NaN(), fp32::snan()); } int main(int argc, const char* argv[]) { test::run(argc, argv); } From ad817b262756ba3033126d8008fd11b1685aa701 Mon Sep 17 00:00:00 2001 From: richagadgil Date: Thu, 31 Oct 2024 20:32:46 +0000 Subject: [PATCH 41/58] tidy and format --- src/include/migraphx/generic_float.hpp | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/src/include/migraphx/generic_float.hpp b/src/include/migraphx/generic_float.hpp index 690bd81d219..2209d7914c8 100644 --- a/src/include/migraphx/generic_float.hpp +++ b/src/include/migraphx/generic_float.hpp @@ -84,7 +84,7 @@ struct __attribute__((packed, may_alias)) generic_float constexpr generic_float operator-() const noexcept { generic_float result = *this; - result.sign = !this->sign; + result.sign = not this->sign; return result; } @@ -109,7 +109,7 @@ struct __attribute__((packed, may_alias)) generic_float if(MantissaSize < float32_parts::mantissa_width()) { shift = MantissaSize - ((sizeof(unsigned int) * 8) - countl_zero(mantissa)); - f.mantissa <<= (shift + 1); + f.mantissa <<= static_cast(shift + 1); } f.exponent = float32_parts::exponent_bias() - exponent_bias() - shift; @@ -159,13 +159,14 @@ struct __attribute__((packed, may_alias)) generic_float { exponent = 0; - auto shift = diff - int(f.exponent); + auto shift = diff - int(f.exponent); auto shift_amount = shift + (float32_parts::mantissa_width() - MantissaSize) + 1; - if(shift_amount < 32) + if(shift_amount < (sizeof(unsigned int) * 8)) { mantissa = - (f.mantissa | (1u << static_cast(float32_parts::mantissa_width()))) >> + (f.mantissa | + (1u << static_cast(float32_parts::mantissa_width()))) >> (shift + (float32_parts::mantissa_width() - MantissaSize) + 1); } else @@ -213,7 +214,7 @@ struct __attribute__((packed, may_alias)) generic_float { generic_float x{}; x.exponent = all_ones(); - x.mantissa = 1 << (MantissaSize - 2); + x.mantissa = 1u << static_cast(MantissaSize - 2); return x; } @@ -221,7 +222,7 @@ struct __attribute__((packed, may_alias)) generic_float { generic_float x{}; x.exponent = all_ones(); - x.mantissa = 1 << (MantissaSize - 1); + x.mantissa = 1u << static_cast(MantissaSize - 1); return x; } From 898417bb83566ad1ff39e0039bc94cbaef3afd05 Mon Sep 17 00:00:00 2001 From: richagadgil Date: Thu, 31 Oct 2024 22:41:34 +0000 Subject: [PATCH 42/58] tidy etc --- src/include/migraphx/generic_float.hpp | 37 +++++++++++++++++--------- 1 file changed, 25 insertions(+), 12 deletions(-) diff --git a/src/include/migraphx/generic_float.hpp b/src/include/migraphx/generic_float.hpp index 2209d7914c8..cc5988ba4b9 100644 --- a/src/include/migraphx/generic_float.hpp +++ b/src/include/migraphx/generic_float.hpp @@ -88,6 +88,8 @@ struct __attribute__((packed, may_alias)) generic_float return result; } + constexpr generic_float operator+() const noexcept { return *this; } + constexpr float to_float() const noexcept { float32_parts f{}; @@ -103,13 +105,13 @@ struct __attribute__((packed, may_alias)) generic_float } else { - int shift = 0; + unsigned int shift = 0; f.mantissa = mantissa; if(MantissaSize < float32_parts::mantissa_width()) { shift = MantissaSize - ((sizeof(unsigned int) * 8) - countl_zero(mantissa)); - f.mantissa <<= static_cast(shift + 1); + f.mantissa <<= (shift + 1); } f.exponent = float32_parts::exponent_bias() - exponent_bias() - shift; @@ -164,10 +166,8 @@ struct __attribute__((packed, may_alias)) generic_float if(shift_amount < (sizeof(unsigned int) * 8)) { - mantissa = - (f.mantissa | - (1u << static_cast(float32_parts::mantissa_width()))) >> - (shift + (float32_parts::mantissa_width() - MantissaSize) + 1); + mantissa = (f.mantissa | (1u << float32_parts::mantissa_width())) >> + (shift + (float32_parts::mantissa_width() - MantissaSize) + 1); } else { @@ -214,7 +214,7 @@ struct __attribute__((packed, may_alias)) generic_float { generic_float x{}; x.exponent = all_ones(); - x.mantissa = 1u << static_cast(MantissaSize - 2); + x.mantissa = 1u << (MantissaSize - 2u); return x; } @@ -222,7 +222,7 @@ struct __attribute__((packed, may_alias)) generic_float { generic_float x{}; x.exponent = all_ones(); - x.mantissa = 1u << static_cast(MantissaSize - 1); + x.mantissa = 1u << (MantissaSize - 1u); return x; } @@ -291,10 +291,16 @@ struct __attribute__((packed, may_alias)) generic_float MIGRAPHX_GENERIC_FLOAT_BINARY_OP(-) MIGRAPHX_GENERIC_FLOAT_BINARY_OP(+) MIGRAPHX_GENERIC_FLOAT_BINARY_OP(/) - MIGRAPHX_GENERIC_FLOAT_BINARY_OP(<) - MIGRAPHX_GENERIC_FLOAT_BINARY_OP(<=) - MIGRAPHX_GENERIC_FLOAT_BINARY_OP(>) - MIGRAPHX_GENERIC_FLOAT_BINARY_OP(>=) +// NOLINTNEXTLINE +#define MIGRAPHX_GENERIC_FLOAT_COMPARE_OP(op) \ + friend constexpr bool operator op(const generic_float& x, const generic_float& y) \ + { \ + return float(x) op float(y); \ + } + MIGRAPHX_GENERIC_FLOAT_COMPARE_OP(<) + MIGRAPHX_GENERIC_FLOAT_COMPARE_OP(<=) + MIGRAPHX_GENERIC_FLOAT_COMPARE_OP(>) + MIGRAPHX_GENERIC_FLOAT_COMPARE_OP(>=) friend constexpr bool operator==(const generic_float& x, const generic_float& y) { @@ -313,6 +319,13 @@ struct __attribute__((packed, may_alias)) generic_float *this += generic_float(1.0f); return *this; } + + constexpr generic_float operator++(int) noexcept + { + generic_float temp = *this; + *this += generic_float(1.0f); + return temp; + } }; } // namespace MIGRAPHX_INLINE_NS From aa5b9c9b5cf495de523e0b4359f66e030a6a2303 Mon Sep 17 00:00:00 2001 From: richagadgil Date: Thu, 31 Oct 2024 23:30:21 +0000 Subject: [PATCH 43/58] gf --- src/include/migraphx/generic_float.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/include/migraphx/generic_float.hpp b/src/include/migraphx/generic_float.hpp index cc5988ba4b9..ddf9a79e980 100644 --- a/src/include/migraphx/generic_float.hpp +++ b/src/include/migraphx/generic_float.hpp @@ -106,7 +106,7 @@ struct __attribute__((packed, may_alias)) generic_float else { unsigned int shift = 0; - f.mantissa = mantissa; + f.mantissa = mantissa; if(MantissaSize < float32_parts::mantissa_width()) { From 6f7237076ce147ae13327827cc661f2be7718a11 Mon Sep 17 00:00:00 2001 From: richagadgil Date: Fri, 1 Nov 2024 11:27:29 -0500 Subject: [PATCH 44/58] fix tidy errs --- .clang-tidy | 2 ++ src/include/migraphx/generic_float.hpp | 5 +++-- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/.clang-tidy b/.clang-tidy index caecc0cb295..f4262fd3c13 100755 --- a/.clang-tidy +++ b/.clang-tidy @@ -115,3 +115,5 @@ CheckOptions: value: UPPER_CASE - key: readability-identifier-naming.MacroDefinitionPrefix value: MIGRAPHX_ + - key: readability-identifier-naming.ConstexprMethodIgnoredRegexp + value: 'quiet_NaN|signaling_NaN' diff --git a/src/include/migraphx/generic_float.hpp b/src/include/migraphx/generic_float.hpp index ddf9a79e980..93992953658 100644 --- a/src/include/migraphx/generic_float.hpp +++ b/src/include/migraphx/generic_float.hpp @@ -95,7 +95,7 @@ struct __attribute__((packed, may_alias)) generic_float float32_parts f{}; f.sign = sign; - if(exponent == 0) + if(exponent == 0) // subnormal fps { if(mantissa == 0) @@ -320,7 +320,7 @@ struct __attribute__((packed, may_alias)) generic_float return *this; } - constexpr generic_float operator++(int) noexcept + const generic_float operator++(int) noexcept { generic_float temp = *this; *this += generic_float(1.0f); @@ -331,6 +331,7 @@ struct __attribute__((packed, may_alias)) generic_float } // namespace MIGRAPHX_INLINE_NS } // namespace migraphx +// NOLINT(cert-dcl58-cpp) namespace std { template From 0aab1a0635082ac569c4ec9d9e6030a91f65332d Mon Sep 17 00:00:00 2001 From: richagadgil Date: Mon, 4 Nov 2024 11:16:33 -0600 Subject: [PATCH 45/58] bf16 changes --- src/include/migraphx/generic_float.hpp | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/src/include/migraphx/generic_float.hpp b/src/include/migraphx/generic_float.hpp index 93992953658..6526a1943b9 100644 --- a/src/include/migraphx/generic_float.hpp +++ b/src/include/migraphx/generic_float.hpp @@ -42,7 +42,7 @@ template constexpr int countl_zero(T value) { unsigned int r = 0; - for(; value != 0; value >>= 1) + for(; value != 0u; value >>= 1u) r++; return 8 * sizeof(value) - r; } @@ -95,7 +95,7 @@ struct __attribute__((packed, may_alias)) generic_float float32_parts f{}; f.sign = sign; - if(exponent == 0) // subnormal fps + if(exponent == 0 and ExponentSize != 8) // subnormal fps { if(mantissa == 0) @@ -306,6 +306,12 @@ struct __attribute__((packed, may_alias)) generic_float { if(not x.is_finite() or not y.is_finite()) return false; + + if((x.mantissa == 0 and x.exponent == 0) and (y.mantissa == 0 and y.exponent == 0)) + { + return true; + } + return std::tie(x.mantissa, x.exponent, x.sign) == std::tie(y.mantissa, y.exponent, y.sign); } @@ -320,7 +326,7 @@ struct __attribute__((packed, may_alias)) generic_float return *this; } - const generic_float operator++(int) noexcept + const generic_float operator++(int) noexcept // NOLINT(readability-const-return-type) { generic_float temp = *this; *this += generic_float(1.0f); @@ -331,11 +337,10 @@ struct __attribute__((packed, may_alias)) generic_float } // namespace MIGRAPHX_INLINE_NS } // namespace migraphx -// NOLINT(cert-dcl58-cpp) namespace std { template -class numeric_limits> +class numeric_limits> // NOLINT(cert-dcl58-cpp) { public: static constexpr bool has_infinity = true; From a337b16ec6ac1088f95703edf528f3a4eedbe741 Mon Sep 17 00:00:00 2001 From: Richa Gadgil Date: Mon, 4 Nov 2024 11:25:01 -0800 Subject: [PATCH 46/58] Update generic_float.hpp --- src/include/migraphx/generic_float.hpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/include/migraphx/generic_float.hpp b/src/include/migraphx/generic_float.hpp index 6526a1943b9..1ac940e4999 100644 --- a/src/include/migraphx/generic_float.hpp +++ b/src/include/migraphx/generic_float.hpp @@ -386,12 +386,12 @@ class numeric_limits> // NOLINT(cert-dcl58-cpp) }; template -struct common_type, T> : std::common_type // NOLINT +struct common_type, T> : std::common_type // NOLINT(cert-dcl58-cpp) { }; template -struct common_type> : std::common_type // NOLINT +struct common_type> : std::common_type // NOLINT(cert-dcl58-cpp) { }; @@ -416,7 +416,7 @@ struct common_type> : std::common_type -struct common_type, migraphx::generic_float> +struct common_type, migraphx::generic_float> // NOLINT(cert-dcl58-cpp) { using type = migraphx::generic_float; }; From 894ed7fbdcf5300d2d71b1bf43b662d1c3bfa94b Mon Sep 17 00:00:00 2001 From: Richa Gadgil Date: Mon, 4 Nov 2024 14:17:03 -0800 Subject: [PATCH 47/58] Update float32.cpp --- test/float32.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/float32.cpp b/test/float32.cpp index 04994124253..cf6ad1f12ad 100644 --- a/test/float32.cpp +++ b/test/float32.cpp @@ -39,7 +39,7 @@ bool bit_equal(const T& x, const U& y) using type = std::array; return migraphx::bit_cast(x) == migraphx::bit_cast(y); } - +// NOLINTNEXTLINE #define MIGRAPHX_CHECK_FLOAT(x, y) \ CHECK(bit_equal(x, y)); \ CHECK(bit_equal(x, y.to_float())); \ From 4895a68df97b521a500fe78ecbbb9b3e7c799486 Mon Sep 17 00:00:00 2001 From: richagadgil Date: Mon, 4 Nov 2024 22:37:54 +0000 Subject: [PATCH 48/58] fix tidy --- src/include/migraphx/generic_float.hpp | 2 +- test/half.cpp | 2060 ++++++++++++------------ 2 files changed, 1033 insertions(+), 1029 deletions(-) diff --git a/src/include/migraphx/generic_float.hpp b/src/include/migraphx/generic_float.hpp index 1ac940e4999..d3dab157afb 100644 --- a/src/include/migraphx/generic_float.hpp +++ b/src/include/migraphx/generic_float.hpp @@ -95,7 +95,7 @@ struct __attribute__((packed, may_alias)) generic_float float32_parts f{}; f.sign = sign; - if(exponent == 0 and ExponentSize != 8) // subnormal fps + if(exponent == 0) // subnormal fps { if(mantissa == 0) diff --git a/test/half.cpp b/test/half.cpp index 99171880bc6..04abedcac52 100644 --- a/test/half.cpp +++ b/test/half.cpp @@ -51,1037 +51,1041 @@ TEST_CASE(check_numeric_limits) CHECK(bit_equal(std::numeric_limits::signaling_NaN(), uint16_t{0x7d00})); } -static const std::map half_lut = { - {0x0000, 0}, - {0x0058, 0.0000052452087402}, - {0x0079, 0.0000072121620178}, - {0x0097, 0.0000090003013611}, - {0x009e, 0.0000094175338745}, - {0x0125, 0.0000174641609192}, - {0x0167, 0.0000213980674744}, - {0x0196, 0.0000241994857788}, - {0x01c4, 0.0000269412994385}, - {0x01c8, 0.0000271797180176}, - {0x0236, 0.0000337362289429}, - {0x029f, 0.0000399947166443}, - {0x02bf, 0.0000419020652771}, - {0x02d6, 0.0000432729721069}, - {0x03a6, 0.0000556707382202}, - {0x03b7, 0.0000566840171814}, - {0x03d4, 0.0000584125518799}, - {0x03d8, 0.000058650970459}, - {0x03ed, 0.0000599026679993}, - {0x0427, 0.0000633597373962}, - {0x0430, 0.0000638961791992}, - {0x0435, 0.0000641942024231}, - {0x0454, 0.0000660419464111}, - {0x047a, 0.0000683069229126}, - {0x04b6, 0.0000718832015991}, - {0x056a, 0.0000826120376587}, - {0x056f, 0.0000829100608826}, - {0x0584, 0.0000841617584229}, - {0x05a1, 0.0000858902931213}, - {0x05a4, 0.0000860691070557}, - {0x05b8, 0.0000872611999512}, - {0x05bc, 0.0000874996185303}, - {0x0635, 0.0000947117805481}, - {0x0641, 0.0000954270362854}, - {0x0686, 0.0000995397567749}, - {0x0694, 0.0001003742218018}, - {0x06db, 0.0001046061515808}, - {0x0725, 0.0001090168952942}, - {0x0777, 0.0001139044761658}, - {0x07b2, 0.0001174211502075}, - {0x0812, 0.0001242160797119}, - {0x082e, 0.0001275539398193}, - {0x0859, 0.00013267993927}, - {0x0895, 0.0001398324966431}, - {0x08af, 0.0001429319381714}, - {0x08fc, 0.0001521110534668}, - {0x092e, 0.0001580715179443}, - {0x0971, 0.0001660585403442}, - {0x0991, 0.0001698732376099}, - {0x09ca, 0.0001766681671143}, - {0x0a63, 0.0001949071884155}, - {0x0a8e, 0.0002000331878662}, - {0x0a93, 0.000200629234314}, - {0x0b2a, 0.0002186298370361}, - {0x0b3a, 0.0002205371856689}, - {0x0b3c, 0.000220775604248}, - {0x0b4e, 0.00022292137146}, - {0x0bae, 0.0002343654632568}, - {0x0bff, 0.0002440214157104}, - {0x0c08, 0.0002460479736328}, - {0x0c56, 0.0002646446228027}, - {0x0c61, 0.0002672672271729}, - {0x0c70, 0.0002708435058594}, - {0x0c7c, 0.0002737045288086}, - {0x0cd8, 0.0002956390380859}, - {0x0cdd, 0.0002968311309814}, - {0x0d05, 0.0003063678741455}, - {0x0d61, 0.0003283023834229}, - {0x0d85, 0.0003368854522705}, - {0x0d8c, 0.0003385543823242}, - {0x0d90, 0.0003395080566406}, - {0x0d9e, 0.000342845916748}, - {0x0da5, 0.0003445148468018}, - {0x0dda, 0.0003571510314941}, - {0x0dde, 0.0003581047058105}, - {0x0df6, 0.000363826751709}, - {0x0eec, 0.000422477722168}, - {0x0f1c, 0.0004339218139648}, - {0x0f99, 0.0004637241363525}, - {0x0fac, 0.0004682540893555}, - {0x0fb0, 0.0004692077636719}, - {0x0ff5, 0.0004856586456299}, - {0x107f, 0.0005488395690918}, - {0x1096, 0.0005598068237305}, - {0x10c8, 0.0005836486816406}, - {0x10e9, 0.0005993843078613}, - {0x110a, 0.000615119934082}, - {0x118a, 0.000676155090332}, - {0x11b5, 0.0006966590881348}, - {0x1293, 0.0008025169372559}, - {0x133f, 0.0008845329284668}, - {0x1342, 0.0008859634399414}, - {0x1372, 0.0009088516235352}, - {0x13cf, 0.000953197479248}, - {0x140c, 0.0009880065917969}, - {0x1437, 0.0010290145874023}, - {0x14a3, 0.0011320114135742}, - {0x14a6, 0.0011348724365234}, - {0x14b2, 0.0011463165283203}, - {0x14ba, 0.0011539459228516}, - {0x14d9, 0.0011835098266602}, - {0x14da, 0.0011844635009766}, - {0x14e7, 0.0011968612670898}, - {0x14fe, 0.0012187957763672}, - {0x1521, 0.0012521743774414}, - {0x153d, 0.0012788772583008}, - {0x15ad, 0.0013856887817383}, - {0x15fd, 0.0014619827270508}, - {0x1649, 0.0015344619750977}, - {0x1658, 0.0015487670898438}, - {0x168a, 0.0015964508056641}, - {0x169d, 0.0016145706176758}, - {0x16b3, 0.0016355514526367}, - {0x16c9, 0.0016565322875977}, - {0x16d1, 0.0016641616821289}, - {0x16e0, 0.001678466796875}, - {0x170a, 0.0017185211181641}, - {0x176d, 0.0018129348754883}, - {0x185b, 0.0021266937255859}, - {0x185e, 0.0021324157714844}, - {0x187e, 0.0021934509277344}, - {0x18ca, 0.0023384094238281}, - {0x18e9, 0.0023975372314453}, - {0x1901, 0.0024433135986328}, - {0x191e, 0.0024986267089844}, - {0x1963, 0.0026302337646484}, - {0x199f, 0.0027446746826172}, - {0x19b2, 0.0027809143066406}, - {0x19d4, 0.0028457641601562}, - {0x1a31, 0.0030231475830078}, - {0x1a4a, 0.0030708312988281}, - {0x1a7a, 0.0031623840332031}, - {0x1ace, 0.0033226013183594}, - {0x1b03, 0.0034236907958984}, - {0x1b22, 0.0034828186035156}, - {0x1d49, 0.0051612854003906}, - {0x1d5a, 0.0052261352539062}, - {0x1d6c, 0.0052947998046875}, - {0x1e02, 0.0058670043945312}, - {0x1e19, 0.0059547424316406}, - {0x1e4c, 0.0061492919921875}, - {0x1eb3, 0.0065422058105469}, - {0x1f32, 0.0070266723632812}, - {0x1f36, 0.0070419311523438}, - {0x1f41, 0.0070838928222656}, - {0x1f7a, 0.0073013305664062}, - {0x1f8d, 0.0073738098144531}, - {0x200b, 0.0078964233398438}, - {0x205f, 0.0085372924804688}, - {0x2060, 0.008544921875}, - {0x2067, 0.0085983276367188}, - {0x20e2, 0.0095367431640625}, - {0x2164, 0.010528564453125}, - {0x22a4, 0.012969970703125}, - {0x22b4, 0.013092041015625}, - {0x22f2, 0.0135650634765625}, - {0x230c, 0.013763427734375}, - {0x2314, 0.013824462890625}, - {0x2341, 0.0141677856445312}, - {0x2356, 0.0143280029296875}, - {0x236e, 0.0145111083984375}, - {0x2371, 0.0145339965820312}, - {0x23cd, 0.0152359008789062}, - {0x2405, 0.0157012939453125}, - {0x24a2, 0.018096923828125}, - {0x24ba, 0.018463134765625}, - {0x24e7, 0.0191497802734375}, - {0x266c, 0.02508544921875}, - {0x26a2, 0.025909423828125}, - {0x26cc, 0.02655029296875}, - {0x26f0, 0.027099609375}, - {0x271e, 0.027801513671875}, - {0x2798, 0.0296630859375}, - {0x287d, 0.035064697265625}, - {0x28a2, 0.03619384765625}, - {0x28ca, 0.03741455078125}, - {0x2933, 0.040618896484375}, - {0x298d, 0.043365478515625}, - {0x299e, 0.04388427734375}, - {0x29c0, 0.044921875}, - {0x29c2, 0.04498291015625}, - {0x29cf, 0.045379638671875}, - {0x29fa, 0.04669189453125}, - {0x2a06, 0.04705810546875}, - {0x2aa5, 0.051910400390625}, - {0x2bcb, 0.060882568359375}, - {0x2c18, 0.06396484375}, - {0x2c65, 0.06866455078125}, - {0x2c66, 0.0687255859375}, - {0x2c93, 0.07147216796875}, - {0x2d24, 0.080322265625}, - {0x2d35, 0.08135986328125}, - {0x2d4c, 0.082763671875}, - {0x2db7, 0.08929443359375}, - {0x2dec, 0.092529296875}, - {0x2e31, 0.09674072265625}, - {0x2ec9, 0.10601806640625}, - {0x2f85, 0.11749267578125}, - {0x2f94, 0.118408203125}, - {0x302b, 0.1302490234375}, - {0x3094, 0.14306640625}, - {0x3096, 0.143310546875}, - {0x30ae, 0.146240234375}, - {0x30b9, 0.1475830078125}, - {0x310c, 0.15771484375}, - {0x31bd, 0.1793212890625}, - {0x3213, 0.1898193359375}, - {0x325b, 0.1986083984375}, - {0x32aa, 0.208251953125}, - {0x32c0, 0.2109375}, - {0x32d7, 0.2137451171875}, - {0x3391, 0.2364501953125}, - {0x340d, 0.253173828125}, - {0x343d, 0.264892578125}, - {0x3566, 0.33740234375}, - {0x35e6, 0.36865234375}, - {0x35f4, 0.3720703125}, - {0x363b, 0.389404296875}, - {0x363e, 0.39013671875}, - {0x3650, 0.39453125}, - {0x3698, 0.412109375}, - {0x36e7, 0.431396484375}, - {0x36fe, 0.43701171875}, - {0x374a, 0.45556640625}, - {0x3760, 0.4609375}, - {0x3761, 0.461181640625}, - {0x379e, 0.47607421875}, - {0x37cc, 0.4873046875}, - {0x37fd, 0.499267578125}, - {0x3828, 0.51953125}, - {0x3841, 0.53173828125}, - {0x3877, 0.55810546875}, - {0x38a4, 0.580078125}, - {0x38d3, 0.60302734375}, - {0x39b2, 0.7119140625}, - {0x3a60, 0.796875}, - {0x3aa3, 0.82958984375}, - {0x3aa6, 0.8310546875}, - {0x3ac9, 0.84814453125}, - {0x3acf, 0.85107421875}, - {0x3b14, 0.884765625}, - {0x3b42, 0.9072265625}, - {0x3b5c, 0.919921875}, - {0x3bde, 0.9833984375}, - {0x3c67, 1.1005859375}, - {0x3cb5, 1.1767578125}, - {0x3cca, 1.197265625}, - {0x3cdd, 1.2158203125}, - {0x3cfc, 1.24609375}, - {0x3d1f, 1.2802734375}, - {0x3e0c, 1.51171875}, - {0x3e1c, 1.52734375}, - {0x3e5b, 1.5888671875}, - {0x3e7f, 1.6240234375}, - {0x3eae, 1.669921875}, - {0x3efe, 1.748046875}, - {0x3f3e, 1.810546875}, - {0x3f9d, 1.9033203125}, - {0x400a, 2.01953125}, - {0x4070, 2.21875}, - {0x40a0, 2.3125}, - {0x40ce, 2.40234375}, - {0x40e6, 2.44921875}, - {0x410e, 2.52734375}, - {0x4129, 2.580078125}, - {0x4144, 2.6328125}, - {0x41a4, 2.8203125}, - {0x41f3, 2.974609375}, - {0x42f1, 3.470703125}, - {0x438f, 3.779296875}, - {0x43b0, 3.84375}, - {0x43c3, 3.880859375}, - {0x43de, 3.93359375}, - {0x4483, 4.51171875}, - {0x44f8, 4.96875}, - {0x4505, 5.01953125}, - {0x45dd, 5.86328125}, - {0x45f3, 5.94921875}, - {0x460e, 6.0546875}, - {0x46ce, 6.8046875}, - {0x4704, 7.015625}, - {0x471a, 7.1015625}, - {0x475e, 7.3671875}, - {0x4761, 7.37890625}, - {0x479f, 7.62109375}, - {0x47ca, 7.7890625}, - {0x47db, 7.85546875}, - {0x47fc, 7.984375}, - {0x481e, 8.234375}, - {0x4839, 8.4453125}, - {0x483d, 8.4765625}, - {0x48ac, 9.34375}, - {0x48da, 9.703125}, - {0x4919, 10.1953125}, - {0x4950, 10.625}, - {0x4987, 11.0546875}, - {0x49bb, 11.4609375}, - {0x4a14, 12.15625}, - {0x4a92, 13.140625}, - {0x4b25, 14.2890625}, - {0x4b81, 15.0078125}, - {0x4b99, 15.1953125}, - {0x4bbe, 15.484375}, - {0x4bf8, 15.9375}, - {0x4c1f, 16.484375}, - {0x4c49, 17.140625}, - {0x4d21, 20.515625}, - {0x4d4a, 21.15625}, - {0x4d51, 21.265625}, - {0x4de2, 23.53125}, - {0x4e05, 24.078125}, - {0x4ea3, 26.546875}, - {0x4eb0, 26.75}, - {0x4f0e, 28.21875}, - {0x4f4a, 29.15625}, - {0x4f6b, 29.671875}, - {0x4fa6, 30.59375}, - {0x4fae, 30.71875}, - {0x4ff6, 31.84375}, - {0x503c, 33.875}, - {0x50e4, 39.125}, - {0x514e, 42.4375}, - {0x516b, 43.34375}, - {0x51d3, 46.59375}, - {0x5213, 48.59375}, - {0x526e, 51.4375}, - {0x52a6, 53.1875}, - {0x52b4, 53.625}, - {0x52b6, 53.6875}, - {0x52bc, 53.875}, - {0x5300, 56}, - {0x5389, 60.28125}, - {0x5406, 64.375}, - {0x5498, 73.5}, - {0x54bd, 75.8125}, - {0x54cf, 76.9375}, - {0x5502, 80.125}, - {0x558e, 88.875}, - {0x5597, 89.4375}, - {0x55eb, 94.6875}, - {0x55f6, 95.375}, - {0x5629, 98.5625}, - {0x562b, 98.6875}, - {0x5635, 99.3125}, - {0x564e, 100.875}, - {0x5671, 103.0625}, - {0x5681, 104.0625}, - {0x56d1, 109.0625}, - {0x571c, 113.75}, - {0x5756, 117.375}, - {0x5790, 121}, - {0x57fd, 127.8125}, - {0x582d, 133.625}, - {0x5869, 141.125}, - {0x58ab, 149.375}, - {0x58ad, 149.625}, - {0x58c9, 153.125}, - {0x58f7, 158.875}, - {0x5904, 160.5}, - {0x59c2, 184.25}, - {0x59e6, 188.75}, - {0x5a88, 209}, - {0x5ada, 219.25}, - {0x5aef, 221.875}, - {0x5af5, 222.625}, - {0x5b7f, 239.875}, - {0x5ba4, 244.5}, - {0x5c08, 258}, - {0x5cbf, 303.75}, - {0x5d4d, 339.25}, - {0x5dc2, 368.5}, - {0x5dc4, 369}, - {0x5e31, 396.25}, - {0x5e38, 398}, - {0x5e7c, 415}, - {0x5e8d, 419.25}, - {0x5ead, 427.25}, - {0x5eb4, 429}, - {0x5ec0, 432}, - {0x5eef, 443.75}, - {0x5f04, 449}, - {0x5f41, 464.25}, - {0x5f58, 470}, - {0x5f61, 472.25}, - {0x5f77, 477.75}, - {0x5f7b, 478.75}, - {0x6029, 532.5}, - {0x6046, 547}, - {0x6055, 554.5}, - {0x60a8, 596}, - {0x60d7, 619.5}, - {0x6139, 668.5}, - {0x6167, 691.5}, - {0x61b5, 730.5}, - {0x61c0, 736}, - {0x61e6, 755}, - {0x625b, 813.5}, - {0x62c4, 866}, - {0x62fd, 894.5}, - {0x62fe, 895}, - {0x6332, 921}, - {0x636a, 949}, - {0x6374, 954}, - {0x6376, 955}, - {0x639f, 975.5}, - {0x63d6, 1003}, - {0x6417, 1047}, - {0x642e, 1070}, - {0x6431, 1073}, - {0x644f, 1103}, - {0x6459, 1113}, - {0x645b, 1115}, - {0x6480, 1152}, - {0x648d, 1165}, - {0x649f, 1183}, - {0x64bb, 1211}, - {0x6516, 1302}, - {0x6571, 1393}, - {0x6585, 1413}, - {0x65aa, 1450}, - {0x660c, 1548}, - {0x6694, 1684}, - {0x66d0, 1744}, - {0x6721, 1825}, - {0x672d, 1837}, - {0x6734, 1844}, - {0x6766, 1894}, - {0x6773, 1907}, - {0x677d, 1917}, - {0x679a, 1946}, - {0x690f, 2590}, - {0x6934, 2664}, - {0x6955, 2730}, - {0x697d, 2810}, - {0x698e, 2844}, - {0x6a3a, 3188}, - {0x6a63, 3270}, - {0x6a67, 3278}, - {0x6a7c, 3320}, - {0x6a87, 3342}, - {0x6b07, 3598}, - {0x6b11, 3618}, - {0x6b36, 3692}, - {0x6b3c, 3704}, - {0x6b75, 3818}, - {0x6b88, 3856}, - {0x6be6, 4044}, - {0x6bee, 4060}, - {0x6c62, 4488}, - {0x6c8b, 4652}, - {0x6d30, 5312}, - {0x6d48, 5408}, - {0x6ddd, 6004}, - {0x6de9, 6052}, - {0x6e39, 6372}, - {0x6e7e, 6648}, - {0x6ea5, 6804}, - {0x6ec5, 6932}, - {0x6ee1, 7044}, - {0x6ef1, 7108}, - {0x6fa2, 7816}, - {0x6fbc, 7920}, - {0x704c, 8800}, - {0x7083, 9240}, - {0x7108, 10304}, - {0x7115, 10408}, - {0x7128, 10560}, - {0x71af, 11640}, - {0x7222, 12560}, - {0x7228, 12608}, - {0x72a5, 13608}, - {0x72e0, 14080}, - {0x72e6, 14128}, - {0x731e, 14576}, - {0x7377, 15288}, - {0x741d, 16848}, - {0x7423, 16944}, - {0x7424, 16960}, - {0x7466, 18016}, - {0x74b0, 19200}, - {0x74ce, 19680}, - {0x74f0, 20224}, - {0x754b, 21680}, - {0x7575, 22352}, - {0x7594, 22848}, - {0x75b1, 23312}, - {0x7614, 24896}, - {0x7618, 24960}, - {0x7631, 25360}, - {0x7660, 26112}, - {0x76c8, 27776}, - {0x7773, 30512}, - {0x77af, 31472}, - {0x77b9, 31632}, - {0x77de, 32224}, - {0x7844, 34944}, - {0x78d2, 39488}, - {0x7924, 42112}, - {0x793b, 42848}, - {0x79db, 47968}, - {0x7a0f, 49632}, - {0x7a1a, 49984}, - {0x7a6c, 52608}, - {0x7a99, 54048}, - {0x7ada, 56128}, - {0x7b0f, 57824}, - {0x7b15, 58016}, - {0x7b41, 59424}, - {0x7b51, 59936}, - {0x7b9c, 62336}, - {0x7ba3, 62560}, - {0x7c00, std::numeric_limits::infinity()}, - {0x7c05, std::numeric_limits::quiet_NaN()}, - {0x7c0e, std::numeric_limits::quiet_NaN()}, - {0x7c3e, std::numeric_limits::quiet_NaN()}, - {0x7c4e, std::numeric_limits::quiet_NaN()}, - {0x7c55, std::numeric_limits::quiet_NaN()}, - {0x7c58, std::numeric_limits::quiet_NaN()}, - {0x7c66, std::numeric_limits::quiet_NaN()}, - {0x7cc9, std::numeric_limits::quiet_NaN()}, - {0x7cd8, std::numeric_limits::quiet_NaN()}, - {0x7d2d, std::numeric_limits::quiet_NaN()}, - {0x7d60, std::numeric_limits::quiet_NaN()}, - {0x7d79, std::numeric_limits::quiet_NaN()}, - {0x7dc7, std::numeric_limits::quiet_NaN()}, - {0x7dcf, std::numeric_limits::quiet_NaN()}, - {0x7dd8, std::numeric_limits::quiet_NaN()}, - {0x7dfb, std::numeric_limits::quiet_NaN()}, - {0x7e0f, std::numeric_limits::quiet_NaN()}, - {0x7e56, std::numeric_limits::quiet_NaN()}, - {0x7e89, std::numeric_limits::quiet_NaN()}, - {0x7e9c, std::numeric_limits::quiet_NaN()}, - {0x7eb2, std::numeric_limits::quiet_NaN()}, - {0x7ec3, std::numeric_limits::quiet_NaN()}, - {0x7ef9, std::numeric_limits::quiet_NaN()}, - {0x7f36, std::numeric_limits::quiet_NaN()}, - {0x8040, -0.0000038146972656}, - {0x8101, -0.0000153183937073}, - {0x813d, -0.0000188946723938}, - {0x81a8, -0.0000252723693848}, - {0x81bc, -0.0000264644622803}, - {0x81c2, -0.0000268220901489}, - {0x8259, -0.00003582239151}, - {0x8330, -0.0000486373901367}, - {0x8366, -0.0000518560409546}, - {0x8392, -0.0000544786453247}, - {0x83e4, -0.0000593662261963}, - {0x83ee, -0.000059962272644}, - {0x8402, -0.0000611543655396}, - {0x845e, -0.0000666379928589}, - {0x84ac, -0.0000712871551514}, - {0x84b1, -0.0000715851783752}, - {0x84fb, -0.0000759959220886}, - {0x8546, -0.0000804662704468}, - {0x856f, -0.0000829100608826}, - {0x85b5, -0.0000870823860168}, - {0x8638, -0.0000948905944824}, - {0x8656, -0.0000966787338257}, - {0x86b9, -0.0001025795936584}, - {0x86ba, -0.0001026391983032}, - {0x86fe, -0.0001066923141479}, - {0x8731, -0.0001097321510315}, - {0x8740, -0.0001106262207031}, - {0x8793, -0.0001155734062195}, - {0x87bd, -0.0001180768013}, - {0x87f1, -0.0001211762428284}, - {0x87f4, -0.0001213550567627}, - {0x8809, -0.000123143196106}, - {0x882a, -0.0001270771026611}, - {0x8848, -0.0001306533813477}, - {0x8852, -0.0001318454742432}, - {0x8874, -0.0001358985900879}, - {0x8892, -0.0001394748687744}, - {0x88a7, -0.000141978263855}, - {0x88c8, -0.0001459121704102}, - {0x8927, -0.0001572370529175}, - {0x892a, -0.0001575946807861}, - {0x8989, -0.0001689195632935}, - {0x89b9, -0.0001746416091919}, - {0x8b18, -0.0002164840698242}, - {0x8b4b, -0.0002225637435913}, - {0x8b62, -0.000225305557251}, - {0x8b7f, -0.0002287626266479}, - {0x8bca, -0.0002377033233643}, - {0x8bcf, -0.000238299369812}, - {0x8bff, -0.0002440214157104}, - {0x8c0b, -0.0002467632293701}, - {0x8c55, -0.0002644062042236}, - {0x8c63, -0.0002677440643311}, - {0x8d53, -0.0003249645233154}, - {0x8dba, -0.0003495216369629}, - {0x8e03, -0.0003669261932373}, - {0x8e82, -0.0003972053527832}, - {0x8e9c, -0.0004034042358398}, - {0x8faa, -0.0004677772521973}, - {0x902f, -0.0005106925964355}, - {0x9051, -0.0005269050598145}, - {0x9066, -0.0005369186401367}, - {0x907e, -0.0005483627319336}, - {0x9080, -0.00054931640625}, - {0x908e, -0.0005559921264648}, - {0x9102, -0.0006113052368164}, - {0x91eb, -0.0007224082946777}, - {0x9215, -0.0007424354553223}, - {0x9252, -0.0007715225219727}, - {0x9294, -0.0008029937744141}, - {0x9297, -0.0008044242858887}, - {0x933d, -0.0008835792541504}, - {0x936f, -0.0009074211120605}, - {0x93aa, -0.0009355545043945}, - {0x93f2, -0.0009698867797852}, - {0x941d, -0.0010042190551758}, - {0x945a, -0.0010623931884766}, - {0x94ad, -0.0011415481567383}, - {0x94d2, -0.0011768341064453}, - {0x951c, -0.0012474060058594}, - {0x9520, -0.001251220703125}, - {0x952f, -0.0012655258178711}, - {0x953f, -0.0012807846069336}, - {0x9549, -0.0012903213500977}, - {0x95c6, -0.0014095306396484}, - {0x9602, -0.0014667510986328}, - {0x969b, -0.001612663269043}, - {0x96fa, -0.0017032623291016}, - {0x977d, -0.0018281936645508}, - {0x97c3, -0.0018949508666992}, - {0x97c6, -0.0018978118896484}, - {0x97db, -0.001917839050293}, - {0x97f9, -0.0019464492797852}, - {0x983f, -0.0020732879638672}, - {0x984e, -0.0021018981933594}, - {0x985a, -0.0021247863769531}, - {0x988c, -0.0022201538085938}, - {0x990d, -0.0024662017822266}, - {0x9958, -0.0026092529296875}, - {0x9971, -0.0026569366455078}, - {0x9a4e, -0.0030784606933594}, - {0x9a8f, -0.0032024383544922}, - {0x9abe, -0.0032920837402344}, - {0x9ace, -0.0033226013183594}, - {0x9b1e, -0.0034751892089844}, - {0x9b3e, -0.0035362243652344}, - {0x9b77, -0.0036449432373047}, - {0x9b89, -0.0036792755126953}, - {0x9b90, -0.003692626953125}, - {0x9bec, -0.0038681030273438}, - {0x9c03, -0.0039176940917969}, - {0x9c75, -0.0043525695800781}, - {0x9d6c, -0.0052947998046875}, - {0x9d74, -0.0053253173828125}, - {0x9da7, -0.0055198669433594}, - {0x9e73, -0.0062980651855469}, - {0x9e94, -0.0064239501953125}, - {0x9f17, -0.0069236755371094}, - {0x9f3a, -0.0070571899414062}, - {0x9f6c, -0.0072479248046875}, - {0x9f89, -0.0073585510253906}, - {0x9fbd, -0.0075569152832031}, - {0xa003, -0.0078353881835938}, - {0xa014, -0.007965087890625}, - {0xa019, -0.0080032348632812}, - {0xa01d, -0.0080337524414062}, - {0xa090, -0.0089111328125}, - {0xa1cf, -0.0113449096679688}, - {0xa1dd, -0.0114517211914062}, - {0xa249, -0.0122756958007812}, - {0xa26d, -0.0125503540039062}, - {0xa288, -0.01275634765625}, - {0xa2fb, -0.0136337280273438}, - {0xa390, -0.0147705078125}, - {0xa3b3, -0.0150375366210938}, - {0xa3ed, -0.0154800415039062}, - {0xa434, -0.01641845703125}, - {0xa476, -0.017425537109375}, - {0xa571, -0.0212554931640625}, - {0xa57d, -0.0214385986328125}, - {0xa597, -0.0218353271484375}, - {0xa5d1, -0.0227203369140625}, - {0xa5f9, -0.0233306884765625}, - {0xa680, -0.025390625}, - {0xa6e3, -0.0269012451171875}, - {0xa6f0, -0.027099609375}, - {0xa72d, -0.0280303955078125}, - {0xa77e, -0.029266357421875}, - {0xa7d0, -0.030517578125}, - {0xa7ee, -0.030975341796875}, - {0xa7f3, -0.0310516357421875}, - {0xa80c, -0.0316162109375}, - {0xa827, -0.032440185546875}, - {0xa89f, -0.036102294921875}, - {0xa8a0, -0.0361328125}, - {0xa8a5, -0.036285400390625}, - {0xa948, -0.041259765625}, - {0xaa0c, -0.0472412109375}, - {0xaa16, -0.04754638671875}, - {0xaa9a, -0.05157470703125}, - {0xaaeb, -0.054046630859375}, - {0xab5c, -0.0574951171875}, - {0xac7e, -0.0701904296875}, - {0xad33, -0.08123779296875}, - {0xad37, -0.08148193359375}, - {0xad90, -0.0869140625}, - {0xada0, -0.087890625}, - {0xade5, -0.09210205078125}, - {0xadf8, -0.09326171875}, - {0xae02, -0.0938720703125}, - {0xae04, -0.093994140625}, - {0xae4f, -0.09857177734375}, - {0xae63, -0.09979248046875}, - {0xaebe, -0.1053466796875}, - {0xaee1, -0.10748291015625}, - {0xaef9, -0.10894775390625}, - {0xaf0b, -0.11004638671875}, - {0xaf78, -0.11669921875}, - {0xaf7d, -0.11700439453125}, - {0xaf7f, -0.11712646484375}, - {0xaf8c, -0.117919921875}, - {0xafcb, -0.12176513671875}, - {0xb06b, -0.1380615234375}, - {0xb07b, -0.1400146484375}, - {0xb088, -0.1416015625}, - {0xb0b2, -0.146728515625}, - {0xb0ed, -0.1539306640625}, - {0xb0f9, -0.1553955078125}, - {0xb16c, -0.16943359375}, - {0xb189, -0.1729736328125}, - {0xb1c5, -0.1802978515625}, - {0xb1f7, -0.1864013671875}, - {0xb22d, -0.1929931640625}, - {0xb23c, -0.19482421875}, - {0xb258, -0.1982421875}, - {0xb2c7, -0.2117919921875}, - {0xb2de, -0.214599609375}, - {0xb2e1, -0.2149658203125}, - {0xb317, -0.2215576171875}, - {0xb31d, -0.2222900390625}, - {0xb3ef, -0.2479248046875}, - {0xb3f8, -0.2490234375}, - {0xb45a, -0.27197265625}, - {0xb548, -0.330078125}, - {0xb5d8, -0.365234375}, - {0xb64e, -0.39404296875}, - {0xb69f, -0.413818359375}, - {0xb6e6, -0.43115234375}, - {0xb6ed, -0.432861328125}, - {0xb6f7, -0.435302734375}, - {0xb79a, -0.47509765625}, - {0xb7b6, -0.48193359375}, - {0xb7ee, -0.49560546875}, - {0xb856, -0.5419921875}, - {0xb8c0, -0.59375}, - {0xb96f, -0.67919921875}, - {0xb9a5, -0.70556640625}, - {0xba1e, -0.7646484375}, - {0xba2d, -0.77197265625}, - {0xba48, -0.78515625}, - {0xba65, -0.79931640625}, - {0xbaaf, -0.83544921875}, - {0xbab0, -0.8359375}, - {0xbb12, -0.8837890625}, - {0xbb35, -0.90087890625}, - {0xbb47, -0.90966796875}, - {0xbb97, -0.94873046875}, - {0xbba3, -0.95458984375}, - {0xbbcb, -0.97412109375}, - {0xbbe8, -0.98828125}, - {0xbbee, -0.9912109375}, - {0xbd03, -1.2529296875}, - {0xbd4b, -1.3232421875}, - {0xbd4c, -1.32421875}, - {0xbd8a, -1.384765625}, - {0xbdb6, -1.427734375}, - {0xbde1, -1.4697265625}, - {0xbe04, -1.50390625}, - {0xbe50, -1.578125}, - {0xbe54, -1.58203125}, - {0xbe6a, -1.603515625}, - {0xbf31, -1.7978515625}, - {0xbf87, -1.8818359375}, - {0xbfa2, -1.908203125}, - {0xc016, -2.04296875}, - {0xc074, -2.2265625}, - {0xc0ca, -2.39453125}, - {0xc100, -2.5}, - {0xc1b7, -2.857421875}, - {0xc1b9, -2.861328125}, - {0xc1d3, -2.912109375}, - {0xc23f, -3.123046875}, - {0xc2d5, -3.416015625}, - {0xc32f, -3.591796875}, - {0xc3e3, -3.943359375}, - {0xc412, -4.0703125}, - {0xc49a, -4.6015625}, - {0xc4ca, -4.7890625}, - {0xc4cf, -4.80859375}, - {0xc523, -5.13671875}, - {0xc55d, -5.36328125}, - {0xc5aa, -5.6640625}, - {0xc604, -6.015625}, - {0xc61b, -6.10546875}, - {0xc642, -6.2578125}, - {0xc68b, -6.54296875}, - {0xc69e, -6.6171875}, - {0xc6b0, -6.6875}, - {0xc6ca, -6.7890625}, - {0xc71e, -7.1171875}, - {0xc721, -7.12890625}, - {0xc73b, -7.23046875}, - {0xc7d4, -7.828125}, - {0xc831, -8.3828125}, - {0xc89a, -9.203125}, - {0xc8be, -9.484375}, - {0xc8dc, -9.71875}, - {0xc8e4, -9.78125}, - {0xc8fa, -9.953125}, - {0xc8fe, -9.984375}, - {0xc969, -10.8203125}, - {0xca0f, -12.1171875}, - {0xca1a, -12.203125}, - {0xca6f, -12.8671875}, - {0xca7b, -12.9609375}, - {0xca8f, -13.1171875}, - {0xcaca, -13.578125}, - {0xcafd, -13.9765625}, - {0xcb05, -14.0390625}, - {0xcb6b, -14.8359375}, - {0xcbaf, -15.3671875}, - {0xcbb4, -15.40625}, - {0xcbdf, -15.7421875}, - {0xcc2d, -16.703125}, - {0xcc74, -17.8125}, - {0xccac, -18.6875}, - {0xcd11, -20.265625}, - {0xce04, -24.0625}, - {0xce0f, -24.234375}, - {0xceaf, -26.734375}, - {0xceb8, -26.875}, - {0xcf36, -28.84375}, - {0xcfad, -30.703125}, - {0xd019, -32.78125}, - {0xd08d, -36.40625}, - {0xd115, -40.65625}, - {0xd119, -40.78125}, - {0xd128, -41.25}, - {0xd1a4, -45.125}, - {0xd1b7, -45.71875}, - {0xd1b8, -45.75}, - {0xd203, -48.09375}, - {0xd20a, -48.3125}, - {0xd28b, -52.34375}, - {0xd2ac, -53.375}, - {0xd2ae, -53.4375}, - {0xd2c5, -54.15625}, - {0xd2f2, -55.5625}, - {0xd326, -57.1875}, - {0xd337, -57.71875}, - {0xd343, -58.09375}, - {0xd34e, -58.4375}, - {0xd40c, -64.75}, - {0xd43b, -67.6875}, - {0xd45a, -69.625}, - {0xd464, -70.25}, - {0xd4c3, -76.1875}, - {0xd505, -80.3125}, - {0xd52d, -82.8125}, - {0xd5cf, -92.9375}, - {0xd5f0, -95}, - {0xd607, -96.4375}, - {0xd635, -99.3125}, - {0xd63d, -99.8125}, - {0xd644, -100.25}, - {0xd658, -101.5}, - {0xd789, -120.5625}, - {0xd863, -140.375}, - {0xd866, -140.75}, - {0xd884, -144.5}, - {0xd88d, -145.625}, - {0xd89b, -147.375}, - {0xd8da, -155.25}, - {0xd93b, -167.375}, - {0xd982, -176.25}, - {0xd995, -178.625}, - {0xd99d, -179.625}, - {0xd9cf, -185.875}, - {0xdaaf, -213.875}, - {0xdabd, -215.625}, - {0xdb54, -234.5}, - {0xdc10, -260}, - {0xdca1, -296.25}, - {0xdd0a, -322.5}, - {0xdd56, -341.5}, - {0xddcf, -371.75}, - {0xde04, -385}, - {0xde0d, -387.25}, - {0xde3d, -399.25}, - {0xde4f, -403.75}, - {0xde66, -409.5}, - {0xdeae, -427.5}, - {0xdf52, -468.5}, - {0xdf63, -472.75}, - {0xdf6a, -474.5}, - {0xdf77, -477.75}, - {0xdf7b, -478.75}, - {0xdfc5, -497.25}, - {0xdfcf, -499.75}, - {0xdfd2, -500.5}, - {0xdfd8, -502}, - {0xdfe1, -504.25}, - {0xe022, -529}, - {0xe046, -547}, - {0xe092, -585}, - {0xe0b0, -600}, - {0xe0be, -607}, - {0xe0f4, -634}, - {0xe11b, -653.5}, - {0xe19c, -718}, - {0xe213, -777.5}, - {0xe232, -793}, - {0xe25b, -813.5}, - {0xe262, -817}, - {0xe279, -828.5}, - {0xe2cc, -870}, - {0xe2da, -877}, - {0xe326, -915}, - {0xe330, -920}, - {0xe3c3, -993.5}, - {0xe3cc, -998}, - {0xe566, -1382}, - {0xe57e, -1406}, - {0xe5c8, -1480}, - {0xe609, -1545}, - {0xe628, -1576}, - {0xe663, -1635}, - {0xe6ac, -1708}, - {0xe710, -1808}, - {0xe77f, -1919}, - {0xe7e7, -2023}, - {0xe868, -2256}, - {0xe885, -2314}, - {0xe8ea, -2516}, - {0xe919, -2610}, - {0xe92c, -2648}, - {0xea60, -3264}, - {0xeac1, -3458}, - {0xeacb, -3478}, - {0xeb22, -3652}, - {0xeb2c, -3672}, - {0xeb59, -3762}, - {0xeba5, -3914}, - {0xec53, -4428}, - {0xec97, -4700}, - {0xed16, -5208}, - {0xed4a, -5416}, - {0xed69, -5540}, - {0xee14, -6224}, - {0xee59, -6500}, - {0xee8a, -6696}, - {0xee93, -6732}, - {0xeed7, -7004}, - {0xef0b, -7212}, - {0xef59, -7524}, - {0xef61, -7556}, - {0xef67, -7580}, - {0xefb6, -7896}, - {0xf03a, -8656}, - {0xf04e, -8816}, - {0xf05f, -8952}, - {0xf09f, -9464}, - {0xf0c0, -9728}, - {0xf173, -11160}, - {0xf1d7, -11960}, - {0xf225, -12584}, - {0xf2ca, -13904}, - {0xf2d8, -14016}, - {0xf2e5, -14120}, - {0xf317, -14520}, - {0xf35d, -15080}, - {0xf3bd, -15848}, - {0xf3d3, -16024}, - {0xf3e6, -16176}, - {0xf3fb, -16344}, - {0xf477, -18288}, - {0xf4e0, -19968}, - {0xf4e5, -20048}, - {0xf50b, -20656}, - {0xf5a2, -23072}, - {0xf5c1, -23568}, - {0xf634, -25408}, - {0xf651, -25872}, - {0xf68a, -26784}, - {0xf69c, -27072}, - {0xf6ce, -27872}, - {0xf816, -33472}, - {0xf849, -35104}, - {0xf869, -36128}, - {0xf878, -36608}, - {0xf8cf, -39392}, - {0xf90a, -41280}, - {0xf916, -41664}, - {0xf91e, -41920}, - {0xf9c1, -47136}, - {0xfa0a, -49472}, - {0xfa11, -49696}, - {0xfa1d, -50080}, - {0xfa51, -51744}, - {0xfa86, -53440}, - {0xfaac, -54656}, - {0xfb95, -62112}, - {0xfbd1, -64032}, - {0xfbe0, -64512}, - {0xfbf5, -65184}, - {0xfc00, -std::numeric_limits::infinity()}, - {0xfca5, std::numeric_limits::quiet_NaN()}, - {0xfcb9, std::numeric_limits::quiet_NaN()}, - {0xfcc6, std::numeric_limits::quiet_NaN()}, - {0xfd72, std::numeric_limits::quiet_NaN()}, - {0xfd77, std::numeric_limits::quiet_NaN()}, - {0xfda3, std::numeric_limits::quiet_NaN()}, - {0xfe3e, std::numeric_limits::quiet_NaN()}, - {0xfe89, std::numeric_limits::quiet_NaN()}, - {0xfe91, std::numeric_limits::quiet_NaN()}, - {0xfe93, std::numeric_limits::quiet_NaN()}, - {0xfed1, std::numeric_limits::quiet_NaN()}, - {0xff7a, std::numeric_limits::quiet_NaN()}, - {0xffa3, std::numeric_limits::quiet_NaN()}, -}; +const std::map& half_lut() +{ + static const std::map result = { + {0x0000, 0}, + {0x0058, 0.0000052452087402}, + {0x0079, 0.0000072121620178}, + {0x0097, 0.0000090003013611}, + {0x009e, 0.0000094175338745}, + {0x0125, 0.0000174641609192}, + {0x0167, 0.0000213980674744}, + {0x0196, 0.0000241994857788}, + {0x01c4, 0.0000269412994385}, + {0x01c8, 0.0000271797180176}, + {0x0236, 0.0000337362289429}, + {0x029f, 0.0000399947166443}, + {0x02bf, 0.0000419020652771}, + {0x02d6, 0.0000432729721069}, + {0x03a6, 0.0000556707382202}, + {0x03b7, 0.0000566840171814}, + {0x03d4, 0.0000584125518799}, + {0x03d8, 0.000058650970459}, + {0x03ed, 0.0000599026679993}, + {0x0427, 0.0000633597373962}, + {0x0430, 0.0000638961791992}, + {0x0435, 0.0000641942024231}, + {0x0454, 0.0000660419464111}, + {0x047a, 0.0000683069229126}, + {0x04b6, 0.0000718832015991}, + {0x056a, 0.0000826120376587}, + {0x056f, 0.0000829100608826}, + {0x0584, 0.0000841617584229}, + {0x05a1, 0.0000858902931213}, + {0x05a4, 0.0000860691070557}, + {0x05b8, 0.0000872611999512}, + {0x05bc, 0.0000874996185303}, + {0x0635, 0.0000947117805481}, + {0x0641, 0.0000954270362854}, + {0x0686, 0.0000995397567749}, + {0x0694, 0.0001003742218018}, + {0x06db, 0.0001046061515808}, + {0x0725, 0.0001090168952942}, + {0x0777, 0.0001139044761658}, + {0x07b2, 0.0001174211502075}, + {0x0812, 0.0001242160797119}, + {0x082e, 0.0001275539398193}, + {0x0859, 0.00013267993927}, + {0x0895, 0.0001398324966431}, + {0x08af, 0.0001429319381714}, + {0x08fc, 0.0001521110534668}, + {0x092e, 0.0001580715179443}, + {0x0971, 0.0001660585403442}, + {0x0991, 0.0001698732376099}, + {0x09ca, 0.0001766681671143}, + {0x0a63, 0.0001949071884155}, + {0x0a8e, 0.0002000331878662}, + {0x0a93, 0.000200629234314}, + {0x0b2a, 0.0002186298370361}, + {0x0b3a, 0.0002205371856689}, + {0x0b3c, 0.000220775604248}, + {0x0b4e, 0.00022292137146}, + {0x0bae, 0.0002343654632568}, + {0x0bff, 0.0002440214157104}, + {0x0c08, 0.0002460479736328}, + {0x0c56, 0.0002646446228027}, + {0x0c61, 0.0002672672271729}, + {0x0c70, 0.0002708435058594}, + {0x0c7c, 0.0002737045288086}, + {0x0cd8, 0.0002956390380859}, + {0x0cdd, 0.0002968311309814}, + {0x0d05, 0.0003063678741455}, + {0x0d61, 0.0003283023834229}, + {0x0d85, 0.0003368854522705}, + {0x0d8c, 0.0003385543823242}, + {0x0d90, 0.0003395080566406}, + {0x0d9e, 0.000342845916748}, + {0x0da5, 0.0003445148468018}, + {0x0dda, 0.0003571510314941}, + {0x0dde, 0.0003581047058105}, + {0x0df6, 0.000363826751709}, + {0x0eec, 0.000422477722168}, + {0x0f1c, 0.0004339218139648}, + {0x0f99, 0.0004637241363525}, + {0x0fac, 0.0004682540893555}, + {0x0fb0, 0.0004692077636719}, + {0x0ff5, 0.0004856586456299}, + {0x107f, 0.0005488395690918}, + {0x1096, 0.0005598068237305}, + {0x10c8, 0.0005836486816406}, + {0x10e9, 0.0005993843078613}, + {0x110a, 0.000615119934082}, + {0x118a, 0.000676155090332}, + {0x11b5, 0.0006966590881348}, + {0x1293, 0.0008025169372559}, + {0x133f, 0.0008845329284668}, + {0x1342, 0.0008859634399414}, + {0x1372, 0.0009088516235352}, + {0x13cf, 0.000953197479248}, + {0x140c, 0.0009880065917969}, + {0x1437, 0.0010290145874023}, + {0x14a3, 0.0011320114135742}, + {0x14a6, 0.0011348724365234}, + {0x14b2, 0.0011463165283203}, + {0x14ba, 0.0011539459228516}, + {0x14d9, 0.0011835098266602}, + {0x14da, 0.0011844635009766}, + {0x14e7, 0.0011968612670898}, + {0x14fe, 0.0012187957763672}, + {0x1521, 0.0012521743774414}, + {0x153d, 0.0012788772583008}, + {0x15ad, 0.0013856887817383}, + {0x15fd, 0.0014619827270508}, + {0x1649, 0.0015344619750977}, + {0x1658, 0.0015487670898438}, + {0x168a, 0.0015964508056641}, + {0x169d, 0.0016145706176758}, + {0x16b3, 0.0016355514526367}, + {0x16c9, 0.0016565322875977}, + {0x16d1, 0.0016641616821289}, + {0x16e0, 0.001678466796875}, + {0x170a, 0.0017185211181641}, + {0x176d, 0.0018129348754883}, + {0x185b, 0.0021266937255859}, + {0x185e, 0.0021324157714844}, + {0x187e, 0.0021934509277344}, + {0x18ca, 0.0023384094238281}, + {0x18e9, 0.0023975372314453}, + {0x1901, 0.0024433135986328}, + {0x191e, 0.0024986267089844}, + {0x1963, 0.0026302337646484}, + {0x199f, 0.0027446746826172}, + {0x19b2, 0.0027809143066406}, + {0x19d4, 0.0028457641601562}, + {0x1a31, 0.0030231475830078}, + {0x1a4a, 0.0030708312988281}, + {0x1a7a, 0.0031623840332031}, + {0x1ace, 0.0033226013183594}, + {0x1b03, 0.0034236907958984}, + {0x1b22, 0.0034828186035156}, + {0x1d49, 0.0051612854003906}, + {0x1d5a, 0.0052261352539062}, + {0x1d6c, 0.0052947998046875}, + {0x1e02, 0.0058670043945312}, + {0x1e19, 0.0059547424316406}, + {0x1e4c, 0.0061492919921875}, + {0x1eb3, 0.0065422058105469}, + {0x1f32, 0.0070266723632812}, + {0x1f36, 0.0070419311523438}, + {0x1f41, 0.0070838928222656}, + {0x1f7a, 0.0073013305664062}, + {0x1f8d, 0.0073738098144531}, + {0x200b, 0.0078964233398438}, + {0x205f, 0.0085372924804688}, + {0x2060, 0.008544921875}, + {0x2067, 0.0085983276367188}, + {0x20e2, 0.0095367431640625}, + {0x2164, 0.010528564453125}, + {0x22a4, 0.012969970703125}, + {0x22b4, 0.013092041015625}, + {0x22f2, 0.0135650634765625}, + {0x230c, 0.013763427734375}, + {0x2314, 0.013824462890625}, + {0x2341, 0.0141677856445312}, + {0x2356, 0.0143280029296875}, + {0x236e, 0.0145111083984375}, + {0x2371, 0.0145339965820312}, + {0x23cd, 0.0152359008789062}, + {0x2405, 0.0157012939453125}, + {0x24a2, 0.018096923828125}, + {0x24ba, 0.018463134765625}, + {0x24e7, 0.0191497802734375}, + {0x266c, 0.02508544921875}, + {0x26a2, 0.025909423828125}, + {0x26cc, 0.02655029296875}, + {0x26f0, 0.027099609375}, + {0x271e, 0.027801513671875}, + {0x2798, 0.0296630859375}, + {0x287d, 0.035064697265625}, + {0x28a2, 0.03619384765625}, + {0x28ca, 0.03741455078125}, + {0x2933, 0.040618896484375}, + {0x298d, 0.043365478515625}, + {0x299e, 0.04388427734375}, + {0x29c0, 0.044921875}, + {0x29c2, 0.04498291015625}, + {0x29cf, 0.045379638671875}, + {0x29fa, 0.04669189453125}, + {0x2a06, 0.04705810546875}, + {0x2aa5, 0.051910400390625}, + {0x2bcb, 0.060882568359375}, + {0x2c18, 0.06396484375}, + {0x2c65, 0.06866455078125}, + {0x2c66, 0.0687255859375}, + {0x2c93, 0.07147216796875}, + {0x2d24, 0.080322265625}, + {0x2d35, 0.08135986328125}, + {0x2d4c, 0.082763671875}, + {0x2db7, 0.08929443359375}, + {0x2dec, 0.092529296875}, + {0x2e31, 0.09674072265625}, + {0x2ec9, 0.10601806640625}, + {0x2f85, 0.11749267578125}, + {0x2f94, 0.118408203125}, + {0x302b, 0.1302490234375}, + {0x3094, 0.14306640625}, + {0x3096, 0.143310546875}, + {0x30ae, 0.146240234375}, + {0x30b9, 0.1475830078125}, + {0x310c, 0.15771484375}, + {0x31bd, 0.1793212890625}, + {0x3213, 0.1898193359375}, + {0x325b, 0.1986083984375}, + {0x32aa, 0.208251953125}, + {0x32c0, 0.2109375}, + {0x32d7, 0.2137451171875}, + {0x3391, 0.2364501953125}, + {0x340d, 0.253173828125}, + {0x343d, 0.264892578125}, + {0x3566, 0.33740234375}, + {0x35e6, 0.36865234375}, + {0x35f4, 0.3720703125}, + {0x363b, 0.389404296875}, + {0x363e, 0.39013671875}, + {0x3650, 0.39453125}, + {0x3698, 0.412109375}, + {0x36e7, 0.431396484375}, + {0x36fe, 0.43701171875}, + {0x374a, 0.45556640625}, + {0x3760, 0.4609375}, + {0x3761, 0.461181640625}, + {0x379e, 0.47607421875}, + {0x37cc, 0.4873046875}, + {0x37fd, 0.499267578125}, + {0x3828, 0.51953125}, + {0x3841, 0.53173828125}, + {0x3877, 0.55810546875}, + {0x38a4, 0.580078125}, + {0x38d3, 0.60302734375}, + {0x39b2, 0.7119140625}, + {0x3a60, 0.796875}, + {0x3aa3, 0.82958984375}, + {0x3aa6, 0.8310546875}, + {0x3ac9, 0.84814453125}, + {0x3acf, 0.85107421875}, + {0x3b14, 0.884765625}, + {0x3b42, 0.9072265625}, + {0x3b5c, 0.919921875}, + {0x3bde, 0.9833984375}, + {0x3c67, 1.1005859375}, + {0x3cb5, 1.1767578125}, + {0x3cca, 1.197265625}, + {0x3cdd, 1.2158203125}, + {0x3cfc, 1.24609375}, + {0x3d1f, 1.2802734375}, + {0x3e0c, 1.51171875}, + {0x3e1c, 1.52734375}, + {0x3e5b, 1.5888671875}, + {0x3e7f, 1.6240234375}, + {0x3eae, 1.669921875}, + {0x3efe, 1.748046875}, + {0x3f3e, 1.810546875}, + {0x3f9d, 1.9033203125}, + {0x400a, 2.01953125}, + {0x4070, 2.21875}, + {0x40a0, 2.3125}, + {0x40ce, 2.40234375}, + {0x40e6, 2.44921875}, + {0x410e, 2.52734375}, + {0x4129, 2.580078125}, + {0x4144, 2.6328125}, + {0x41a4, 2.8203125}, + {0x41f3, 2.974609375}, + {0x42f1, 3.470703125}, + {0x438f, 3.779296875}, + {0x43b0, 3.84375}, + {0x43c3, 3.880859375}, + {0x43de, 3.93359375}, + {0x4483, 4.51171875}, + {0x44f8, 4.96875}, + {0x4505, 5.01953125}, + {0x45dd, 5.86328125}, + {0x45f3, 5.94921875}, + {0x460e, 6.0546875}, + {0x46ce, 6.8046875}, + {0x4704, 7.015625}, + {0x471a, 7.1015625}, + {0x475e, 7.3671875}, + {0x4761, 7.37890625}, + {0x479f, 7.62109375}, + {0x47ca, 7.7890625}, + {0x47db, 7.85546875}, + {0x47fc, 7.984375}, + {0x481e, 8.234375}, + {0x4839, 8.4453125}, + {0x483d, 8.4765625}, + {0x48ac, 9.34375}, + {0x48da, 9.703125}, + {0x4919, 10.1953125}, + {0x4950, 10.625}, + {0x4987, 11.0546875}, + {0x49bb, 11.4609375}, + {0x4a14, 12.15625}, + {0x4a92, 13.140625}, + {0x4b25, 14.2890625}, + {0x4b81, 15.0078125}, + {0x4b99, 15.1953125}, + {0x4bbe, 15.484375}, + {0x4bf8, 15.9375}, + {0x4c1f, 16.484375}, + {0x4c49, 17.140625}, + {0x4d21, 20.515625}, + {0x4d4a, 21.15625}, + {0x4d51, 21.265625}, + {0x4de2, 23.53125}, + {0x4e05, 24.078125}, + {0x4ea3, 26.546875}, + {0x4eb0, 26.75}, + {0x4f0e, 28.21875}, + {0x4f4a, 29.15625}, + {0x4f6b, 29.671875}, + {0x4fa6, 30.59375}, + {0x4fae, 30.71875}, + {0x4ff6, 31.84375}, + {0x503c, 33.875}, + {0x50e4, 39.125}, + {0x514e, 42.4375}, + {0x516b, 43.34375}, + {0x51d3, 46.59375}, + {0x5213, 48.59375}, + {0x526e, 51.4375}, + {0x52a6, 53.1875}, + {0x52b4, 53.625}, + {0x52b6, 53.6875}, + {0x52bc, 53.875}, + {0x5300, 56}, + {0x5389, 60.28125}, + {0x5406, 64.375}, + {0x5498, 73.5}, + {0x54bd, 75.8125}, + {0x54cf, 76.9375}, + {0x5502, 80.125}, + {0x558e, 88.875}, + {0x5597, 89.4375}, + {0x55eb, 94.6875}, + {0x55f6, 95.375}, + {0x5629, 98.5625}, + {0x562b, 98.6875}, + {0x5635, 99.3125}, + {0x564e, 100.875}, + {0x5671, 103.0625}, + {0x5681, 104.0625}, + {0x56d1, 109.0625}, + {0x571c, 113.75}, + {0x5756, 117.375}, + {0x5790, 121}, + {0x57fd, 127.8125}, + {0x582d, 133.625}, + {0x5869, 141.125}, + {0x58ab, 149.375}, + {0x58ad, 149.625}, + {0x58c9, 153.125}, + {0x58f7, 158.875}, + {0x5904, 160.5}, + {0x59c2, 184.25}, + {0x59e6, 188.75}, + {0x5a88, 209}, + {0x5ada, 219.25}, + {0x5aef, 221.875}, + {0x5af5, 222.625}, + {0x5b7f, 239.875}, + {0x5ba4, 244.5}, + {0x5c08, 258}, + {0x5cbf, 303.75}, + {0x5d4d, 339.25}, + {0x5dc2, 368.5}, + {0x5dc4, 369}, + {0x5e31, 396.25}, + {0x5e38, 398}, + {0x5e7c, 415}, + {0x5e8d, 419.25}, + {0x5ead, 427.25}, + {0x5eb4, 429}, + {0x5ec0, 432}, + {0x5eef, 443.75}, + {0x5f04, 449}, + {0x5f41, 464.25}, + {0x5f58, 470}, + {0x5f61, 472.25}, + {0x5f77, 477.75}, + {0x5f7b, 478.75}, + {0x6029, 532.5}, + {0x6046, 547}, + {0x6055, 554.5}, + {0x60a8, 596}, + {0x60d7, 619.5}, + {0x6139, 668.5}, + {0x6167, 691.5}, + {0x61b5, 730.5}, + {0x61c0, 736}, + {0x61e6, 755}, + {0x625b, 813.5}, + {0x62c4, 866}, + {0x62fd, 894.5}, + {0x62fe, 895}, + {0x6332, 921}, + {0x636a, 949}, + {0x6374, 954}, + {0x6376, 955}, + {0x639f, 975.5}, + {0x63d6, 1003}, + {0x6417, 1047}, + {0x642e, 1070}, + {0x6431, 1073}, + {0x644f, 1103}, + {0x6459, 1113}, + {0x645b, 1115}, + {0x6480, 1152}, + {0x648d, 1165}, + {0x649f, 1183}, + {0x64bb, 1211}, + {0x6516, 1302}, + {0x6571, 1393}, + {0x6585, 1413}, + {0x65aa, 1450}, + {0x660c, 1548}, + {0x6694, 1684}, + {0x66d0, 1744}, + {0x6721, 1825}, + {0x672d, 1837}, + {0x6734, 1844}, + {0x6766, 1894}, + {0x6773, 1907}, + {0x677d, 1917}, + {0x679a, 1946}, + {0x690f, 2590}, + {0x6934, 2664}, + {0x6955, 2730}, + {0x697d, 2810}, + {0x698e, 2844}, + {0x6a3a, 3188}, + {0x6a63, 3270}, + {0x6a67, 3278}, + {0x6a7c, 3320}, + {0x6a87, 3342}, + {0x6b07, 3598}, + {0x6b11, 3618}, + {0x6b36, 3692}, + {0x6b3c, 3704}, + {0x6b75, 3818}, + {0x6b88, 3856}, + {0x6be6, 4044}, + {0x6bee, 4060}, + {0x6c62, 4488}, + {0x6c8b, 4652}, + {0x6d30, 5312}, + {0x6d48, 5408}, + {0x6ddd, 6004}, + {0x6de9, 6052}, + {0x6e39, 6372}, + {0x6e7e, 6648}, + {0x6ea5, 6804}, + {0x6ec5, 6932}, + {0x6ee1, 7044}, + {0x6ef1, 7108}, + {0x6fa2, 7816}, + {0x6fbc, 7920}, + {0x704c, 8800}, + {0x7083, 9240}, + {0x7108, 10304}, + {0x7115, 10408}, + {0x7128, 10560}, + {0x71af, 11640}, + {0x7222, 12560}, + {0x7228, 12608}, + {0x72a5, 13608}, + {0x72e0, 14080}, + {0x72e6, 14128}, + {0x731e, 14576}, + {0x7377, 15288}, + {0x741d, 16848}, + {0x7423, 16944}, + {0x7424, 16960}, + {0x7466, 18016}, + {0x74b0, 19200}, + {0x74ce, 19680}, + {0x74f0, 20224}, + {0x754b, 21680}, + {0x7575, 22352}, + {0x7594, 22848}, + {0x75b1, 23312}, + {0x7614, 24896}, + {0x7618, 24960}, + {0x7631, 25360}, + {0x7660, 26112}, + {0x76c8, 27776}, + {0x7773, 30512}, + {0x77af, 31472}, + {0x77b9, 31632}, + {0x77de, 32224}, + {0x7844, 34944}, + {0x78d2, 39488}, + {0x7924, 42112}, + {0x793b, 42848}, + {0x79db, 47968}, + {0x7a0f, 49632}, + {0x7a1a, 49984}, + {0x7a6c, 52608}, + {0x7a99, 54048}, + {0x7ada, 56128}, + {0x7b0f, 57824}, + {0x7b15, 58016}, + {0x7b41, 59424}, + {0x7b51, 59936}, + {0x7b9c, 62336}, + {0x7ba3, 62560}, + {0x7c00, std::numeric_limits::infinity()}, + {0x7c05, std::numeric_limits::quiet_NaN()}, + {0x7c0e, std::numeric_limits::quiet_NaN()}, + {0x7c3e, std::numeric_limits::quiet_NaN()}, + {0x7c4e, std::numeric_limits::quiet_NaN()}, + {0x7c55, std::numeric_limits::quiet_NaN()}, + {0x7c58, std::numeric_limits::quiet_NaN()}, + {0x7c66, std::numeric_limits::quiet_NaN()}, + {0x7cc9, std::numeric_limits::quiet_NaN()}, + {0x7cd8, std::numeric_limits::quiet_NaN()}, + {0x7d2d, std::numeric_limits::quiet_NaN()}, + {0x7d60, std::numeric_limits::quiet_NaN()}, + {0x7d79, std::numeric_limits::quiet_NaN()}, + {0x7dc7, std::numeric_limits::quiet_NaN()}, + {0x7dcf, std::numeric_limits::quiet_NaN()}, + {0x7dd8, std::numeric_limits::quiet_NaN()}, + {0x7dfb, std::numeric_limits::quiet_NaN()}, + {0x7e0f, std::numeric_limits::quiet_NaN()}, + {0x7e56, std::numeric_limits::quiet_NaN()}, + {0x7e89, std::numeric_limits::quiet_NaN()}, + {0x7e9c, std::numeric_limits::quiet_NaN()}, + {0x7eb2, std::numeric_limits::quiet_NaN()}, + {0x7ec3, std::numeric_limits::quiet_NaN()}, + {0x7ef9, std::numeric_limits::quiet_NaN()}, + {0x7f36, std::numeric_limits::quiet_NaN()}, + {0x8040, -0.0000038146972656}, + {0x8101, -0.0000153183937073}, + {0x813d, -0.0000188946723938}, + {0x81a8, -0.0000252723693848}, + {0x81bc, -0.0000264644622803}, + {0x81c2, -0.0000268220901489}, + {0x8259, -0.00003582239151}, + {0x8330, -0.0000486373901367}, + {0x8366, -0.0000518560409546}, + {0x8392, -0.0000544786453247}, + {0x83e4, -0.0000593662261963}, + {0x83ee, -0.000059962272644}, + {0x8402, -0.0000611543655396}, + {0x845e, -0.0000666379928589}, + {0x84ac, -0.0000712871551514}, + {0x84b1, -0.0000715851783752}, + {0x84fb, -0.0000759959220886}, + {0x8546, -0.0000804662704468}, + {0x856f, -0.0000829100608826}, + {0x85b5, -0.0000870823860168}, + {0x8638, -0.0000948905944824}, + {0x8656, -0.0000966787338257}, + {0x86b9, -0.0001025795936584}, + {0x86ba, -0.0001026391983032}, + {0x86fe, -0.0001066923141479}, + {0x8731, -0.0001097321510315}, + {0x8740, -0.0001106262207031}, + {0x8793, -0.0001155734062195}, + {0x87bd, -0.0001180768013}, + {0x87f1, -0.0001211762428284}, + {0x87f4, -0.0001213550567627}, + {0x8809, -0.000123143196106}, + {0x882a, -0.0001270771026611}, + {0x8848, -0.0001306533813477}, + {0x8852, -0.0001318454742432}, + {0x8874, -0.0001358985900879}, + {0x8892, -0.0001394748687744}, + {0x88a7, -0.000141978263855}, + {0x88c8, -0.0001459121704102}, + {0x8927, -0.0001572370529175}, + {0x892a, -0.0001575946807861}, + {0x8989, -0.0001689195632935}, + {0x89b9, -0.0001746416091919}, + {0x8b18, -0.0002164840698242}, + {0x8b4b, -0.0002225637435913}, + {0x8b62, -0.000225305557251}, + {0x8b7f, -0.0002287626266479}, + {0x8bca, -0.0002377033233643}, + {0x8bcf, -0.000238299369812}, + {0x8bff, -0.0002440214157104}, + {0x8c0b, -0.0002467632293701}, + {0x8c55, -0.0002644062042236}, + {0x8c63, -0.0002677440643311}, + {0x8d53, -0.0003249645233154}, + {0x8dba, -0.0003495216369629}, + {0x8e03, -0.0003669261932373}, + {0x8e82, -0.0003972053527832}, + {0x8e9c, -0.0004034042358398}, + {0x8faa, -0.0004677772521973}, + {0x902f, -0.0005106925964355}, + {0x9051, -0.0005269050598145}, + {0x9066, -0.0005369186401367}, + {0x907e, -0.0005483627319336}, + {0x9080, -0.00054931640625}, + {0x908e, -0.0005559921264648}, + {0x9102, -0.0006113052368164}, + {0x91eb, -0.0007224082946777}, + {0x9215, -0.0007424354553223}, + {0x9252, -0.0007715225219727}, + {0x9294, -0.0008029937744141}, + {0x9297, -0.0008044242858887}, + {0x933d, -0.0008835792541504}, + {0x936f, -0.0009074211120605}, + {0x93aa, -0.0009355545043945}, + {0x93f2, -0.0009698867797852}, + {0x941d, -0.0010042190551758}, + {0x945a, -0.0010623931884766}, + {0x94ad, -0.0011415481567383}, + {0x94d2, -0.0011768341064453}, + {0x951c, -0.0012474060058594}, + {0x9520, -0.001251220703125}, + {0x952f, -0.0012655258178711}, + {0x953f, -0.0012807846069336}, + {0x9549, -0.0012903213500977}, + {0x95c6, -0.0014095306396484}, + {0x9602, -0.0014667510986328}, + {0x969b, -0.001612663269043}, + {0x96fa, -0.0017032623291016}, + {0x977d, -0.0018281936645508}, + {0x97c3, -0.0018949508666992}, + {0x97c6, -0.0018978118896484}, + {0x97db, -0.001917839050293}, + {0x97f9, -0.0019464492797852}, + {0x983f, -0.0020732879638672}, + {0x984e, -0.0021018981933594}, + {0x985a, -0.0021247863769531}, + {0x988c, -0.0022201538085938}, + {0x990d, -0.0024662017822266}, + {0x9958, -0.0026092529296875}, + {0x9971, -0.0026569366455078}, + {0x9a4e, -0.0030784606933594}, + {0x9a8f, -0.0032024383544922}, + {0x9abe, -0.0032920837402344}, + {0x9ace, -0.0033226013183594}, + {0x9b1e, -0.0034751892089844}, + {0x9b3e, -0.0035362243652344}, + {0x9b77, -0.0036449432373047}, + {0x9b89, -0.0036792755126953}, + {0x9b90, -0.003692626953125}, + {0x9bec, -0.0038681030273438}, + {0x9c03, -0.0039176940917969}, + {0x9c75, -0.0043525695800781}, + {0x9d6c, -0.0052947998046875}, + {0x9d74, -0.0053253173828125}, + {0x9da7, -0.0055198669433594}, + {0x9e73, -0.0062980651855469}, + {0x9e94, -0.0064239501953125}, + {0x9f17, -0.0069236755371094}, + {0x9f3a, -0.0070571899414062}, + {0x9f6c, -0.0072479248046875}, + {0x9f89, -0.0073585510253906}, + {0x9fbd, -0.0075569152832031}, + {0xa003, -0.0078353881835938}, + {0xa014, -0.007965087890625}, + {0xa019, -0.0080032348632812}, + {0xa01d, -0.0080337524414062}, + {0xa090, -0.0089111328125}, + {0xa1cf, -0.0113449096679688}, + {0xa1dd, -0.0114517211914062}, + {0xa249, -0.0122756958007812}, + {0xa26d, -0.0125503540039062}, + {0xa288, -0.01275634765625}, + {0xa2fb, -0.0136337280273438}, + {0xa390, -0.0147705078125}, + {0xa3b3, -0.0150375366210938}, + {0xa3ed, -0.0154800415039062}, + {0xa434, -0.01641845703125}, + {0xa476, -0.017425537109375}, + {0xa571, -0.0212554931640625}, + {0xa57d, -0.0214385986328125}, + {0xa597, -0.0218353271484375}, + {0xa5d1, -0.0227203369140625}, + {0xa5f9, -0.0233306884765625}, + {0xa680, -0.025390625}, + {0xa6e3, -0.0269012451171875}, + {0xa6f0, -0.027099609375}, + {0xa72d, -0.0280303955078125}, + {0xa77e, -0.029266357421875}, + {0xa7d0, -0.030517578125}, + {0xa7ee, -0.030975341796875}, + {0xa7f3, -0.0310516357421875}, + {0xa80c, -0.0316162109375}, + {0xa827, -0.032440185546875}, + {0xa89f, -0.036102294921875}, + {0xa8a0, -0.0361328125}, + {0xa8a5, -0.036285400390625}, + {0xa948, -0.041259765625}, + {0xaa0c, -0.0472412109375}, + {0xaa16, -0.04754638671875}, + {0xaa9a, -0.05157470703125}, + {0xaaeb, -0.054046630859375}, + {0xab5c, -0.0574951171875}, + {0xac7e, -0.0701904296875}, + {0xad33, -0.08123779296875}, + {0xad37, -0.08148193359375}, + {0xad90, -0.0869140625}, + {0xada0, -0.087890625}, + {0xade5, -0.09210205078125}, + {0xadf8, -0.09326171875}, + {0xae02, -0.0938720703125}, + {0xae04, -0.093994140625}, + {0xae4f, -0.09857177734375}, + {0xae63, -0.09979248046875}, + {0xaebe, -0.1053466796875}, + {0xaee1, -0.10748291015625}, + {0xaef9, -0.10894775390625}, + {0xaf0b, -0.11004638671875}, + {0xaf78, -0.11669921875}, + {0xaf7d, -0.11700439453125}, + {0xaf7f, -0.11712646484375}, + {0xaf8c, -0.117919921875}, + {0xafcb, -0.12176513671875}, + {0xb06b, -0.1380615234375}, + {0xb07b, -0.1400146484375}, + {0xb088, -0.1416015625}, + {0xb0b2, -0.146728515625}, + {0xb0ed, -0.1539306640625}, + {0xb0f9, -0.1553955078125}, + {0xb16c, -0.16943359375}, + {0xb189, -0.1729736328125}, + {0xb1c5, -0.1802978515625}, + {0xb1f7, -0.1864013671875}, + {0xb22d, -0.1929931640625}, + {0xb23c, -0.19482421875}, + {0xb258, -0.1982421875}, + {0xb2c7, -0.2117919921875}, + {0xb2de, -0.214599609375}, + {0xb2e1, -0.2149658203125}, + {0xb317, -0.2215576171875}, + {0xb31d, -0.2222900390625}, + {0xb3ef, -0.2479248046875}, + {0xb3f8, -0.2490234375}, + {0xb45a, -0.27197265625}, + {0xb548, -0.330078125}, + {0xb5d8, -0.365234375}, + {0xb64e, -0.39404296875}, + {0xb69f, -0.413818359375}, + {0xb6e6, -0.43115234375}, + {0xb6ed, -0.432861328125}, + {0xb6f7, -0.435302734375}, + {0xb79a, -0.47509765625}, + {0xb7b6, -0.48193359375}, + {0xb7ee, -0.49560546875}, + {0xb856, -0.5419921875}, + {0xb8c0, -0.59375}, + {0xb96f, -0.67919921875}, + {0xb9a5, -0.70556640625}, + {0xba1e, -0.7646484375}, + {0xba2d, -0.77197265625}, + {0xba48, -0.78515625}, + {0xba65, -0.79931640625}, + {0xbaaf, -0.83544921875}, + {0xbab0, -0.8359375}, + {0xbb12, -0.8837890625}, + {0xbb35, -0.90087890625}, + {0xbb47, -0.90966796875}, + {0xbb97, -0.94873046875}, + {0xbba3, -0.95458984375}, + {0xbbcb, -0.97412109375}, + {0xbbe8, -0.98828125}, + {0xbbee, -0.9912109375}, + {0xbd03, -1.2529296875}, + {0xbd4b, -1.3232421875}, + {0xbd4c, -1.32421875}, + {0xbd8a, -1.384765625}, + {0xbdb6, -1.427734375}, + {0xbde1, -1.4697265625}, + {0xbe04, -1.50390625}, + {0xbe50, -1.578125}, + {0xbe54, -1.58203125}, + {0xbe6a, -1.603515625}, + {0xbf31, -1.7978515625}, + {0xbf87, -1.8818359375}, + {0xbfa2, -1.908203125}, + {0xc016, -2.04296875}, + {0xc074, -2.2265625}, + {0xc0ca, -2.39453125}, + {0xc100, -2.5}, + {0xc1b7, -2.857421875}, + {0xc1b9, -2.861328125}, + {0xc1d3, -2.912109375}, + {0xc23f, -3.123046875}, + {0xc2d5, -3.416015625}, + {0xc32f, -3.591796875}, + {0xc3e3, -3.943359375}, + {0xc412, -4.0703125}, + {0xc49a, -4.6015625}, + {0xc4ca, -4.7890625}, + {0xc4cf, -4.80859375}, + {0xc523, -5.13671875}, + {0xc55d, -5.36328125}, + {0xc5aa, -5.6640625}, + {0xc604, -6.015625}, + {0xc61b, -6.10546875}, + {0xc642, -6.2578125}, + {0xc68b, -6.54296875}, + {0xc69e, -6.6171875}, + {0xc6b0, -6.6875}, + {0xc6ca, -6.7890625}, + {0xc71e, -7.1171875}, + {0xc721, -7.12890625}, + {0xc73b, -7.23046875}, + {0xc7d4, -7.828125}, + {0xc831, -8.3828125}, + {0xc89a, -9.203125}, + {0xc8be, -9.484375}, + {0xc8dc, -9.71875}, + {0xc8e4, -9.78125}, + {0xc8fa, -9.953125}, + {0xc8fe, -9.984375}, + {0xc969, -10.8203125}, + {0xca0f, -12.1171875}, + {0xca1a, -12.203125}, + {0xca6f, -12.8671875}, + {0xca7b, -12.9609375}, + {0xca8f, -13.1171875}, + {0xcaca, -13.578125}, + {0xcafd, -13.9765625}, + {0xcb05, -14.0390625}, + {0xcb6b, -14.8359375}, + {0xcbaf, -15.3671875}, + {0xcbb4, -15.40625}, + {0xcbdf, -15.7421875}, + {0xcc2d, -16.703125}, + {0xcc74, -17.8125}, + {0xccac, -18.6875}, + {0xcd11, -20.265625}, + {0xce04, -24.0625}, + {0xce0f, -24.234375}, + {0xceaf, -26.734375}, + {0xceb8, -26.875}, + {0xcf36, -28.84375}, + {0xcfad, -30.703125}, + {0xd019, -32.78125}, + {0xd08d, -36.40625}, + {0xd115, -40.65625}, + {0xd119, -40.78125}, + {0xd128, -41.25}, + {0xd1a4, -45.125}, + {0xd1b7, -45.71875}, + {0xd1b8, -45.75}, + {0xd203, -48.09375}, + {0xd20a, -48.3125}, + {0xd28b, -52.34375}, + {0xd2ac, -53.375}, + {0xd2ae, -53.4375}, + {0xd2c5, -54.15625}, + {0xd2f2, -55.5625}, + {0xd326, -57.1875}, + {0xd337, -57.71875}, + {0xd343, -58.09375}, + {0xd34e, -58.4375}, + {0xd40c, -64.75}, + {0xd43b, -67.6875}, + {0xd45a, -69.625}, + {0xd464, -70.25}, + {0xd4c3, -76.1875}, + {0xd505, -80.3125}, + {0xd52d, -82.8125}, + {0xd5cf, -92.9375}, + {0xd5f0, -95}, + {0xd607, -96.4375}, + {0xd635, -99.3125}, + {0xd63d, -99.8125}, + {0xd644, -100.25}, + {0xd658, -101.5}, + {0xd789, -120.5625}, + {0xd863, -140.375}, + {0xd866, -140.75}, + {0xd884, -144.5}, + {0xd88d, -145.625}, + {0xd89b, -147.375}, + {0xd8da, -155.25}, + {0xd93b, -167.375}, + {0xd982, -176.25}, + {0xd995, -178.625}, + {0xd99d, -179.625}, + {0xd9cf, -185.875}, + {0xdaaf, -213.875}, + {0xdabd, -215.625}, + {0xdb54, -234.5}, + {0xdc10, -260}, + {0xdca1, -296.25}, + {0xdd0a, -322.5}, + {0xdd56, -341.5}, + {0xddcf, -371.75}, + {0xde04, -385}, + {0xde0d, -387.25}, + {0xde3d, -399.25}, + {0xde4f, -403.75}, + {0xde66, -409.5}, + {0xdeae, -427.5}, + {0xdf52, -468.5}, + {0xdf63, -472.75}, + {0xdf6a, -474.5}, + {0xdf77, -477.75}, + {0xdf7b, -478.75}, + {0xdfc5, -497.25}, + {0xdfcf, -499.75}, + {0xdfd2, -500.5}, + {0xdfd8, -502}, + {0xdfe1, -504.25}, + {0xe022, -529}, + {0xe046, -547}, + {0xe092, -585}, + {0xe0b0, -600}, + {0xe0be, -607}, + {0xe0f4, -634}, + {0xe11b, -653.5}, + {0xe19c, -718}, + {0xe213, -777.5}, + {0xe232, -793}, + {0xe25b, -813.5}, + {0xe262, -817}, + {0xe279, -828.5}, + {0xe2cc, -870}, + {0xe2da, -877}, + {0xe326, -915}, + {0xe330, -920}, + {0xe3c3, -993.5}, + {0xe3cc, -998}, + {0xe566, -1382}, + {0xe57e, -1406}, + {0xe5c8, -1480}, + {0xe609, -1545}, + {0xe628, -1576}, + {0xe663, -1635}, + {0xe6ac, -1708}, + {0xe710, -1808}, + {0xe77f, -1919}, + {0xe7e7, -2023}, + {0xe868, -2256}, + {0xe885, -2314}, + {0xe8ea, -2516}, + {0xe919, -2610}, + {0xe92c, -2648}, + {0xea60, -3264}, + {0xeac1, -3458}, + {0xeacb, -3478}, + {0xeb22, -3652}, + {0xeb2c, -3672}, + {0xeb59, -3762}, + {0xeba5, -3914}, + {0xec53, -4428}, + {0xec97, -4700}, + {0xed16, -5208}, + {0xed4a, -5416}, + {0xed69, -5540}, + {0xee14, -6224}, + {0xee59, -6500}, + {0xee8a, -6696}, + {0xee93, -6732}, + {0xeed7, -7004}, + {0xef0b, -7212}, + {0xef59, -7524}, + {0xef61, -7556}, + {0xef67, -7580}, + {0xefb6, -7896}, + {0xf03a, -8656}, + {0xf04e, -8816}, + {0xf05f, -8952}, + {0xf09f, -9464}, + {0xf0c0, -9728}, + {0xf173, -11160}, + {0xf1d7, -11960}, + {0xf225, -12584}, + {0xf2ca, -13904}, + {0xf2d8, -14016}, + {0xf2e5, -14120}, + {0xf317, -14520}, + {0xf35d, -15080}, + {0xf3bd, -15848}, + {0xf3d3, -16024}, + {0xf3e6, -16176}, + {0xf3fb, -16344}, + {0xf477, -18288}, + {0xf4e0, -19968}, + {0xf4e5, -20048}, + {0xf50b, -20656}, + {0xf5a2, -23072}, + {0xf5c1, -23568}, + {0xf634, -25408}, + {0xf651, -25872}, + {0xf68a, -26784}, + {0xf69c, -27072}, + {0xf6ce, -27872}, + {0xf816, -33472}, + {0xf849, -35104}, + {0xf869, -36128}, + {0xf878, -36608}, + {0xf8cf, -39392}, + {0xf90a, -41280}, + {0xf916, -41664}, + {0xf91e, -41920}, + {0xf9c1, -47136}, + {0xfa0a, -49472}, + {0xfa11, -49696}, + {0xfa1d, -50080}, + {0xfa51, -51744}, + {0xfa86, -53440}, + {0xfaac, -54656}, + {0xfb95, -62112}, + {0xfbd1, -64032}, + {0xfbe0, -64512}, + {0xfbf5, -65184}, + {0xfc00, -std::numeric_limits::infinity()}, + {0xfca5, std::numeric_limits::quiet_NaN()}, + {0xfcb9, std::numeric_limits::quiet_NaN()}, + {0xfcc6, std::numeric_limits::quiet_NaN()}, + {0xfd72, std::numeric_limits::quiet_NaN()}, + {0xfd77, std::numeric_limits::quiet_NaN()}, + {0xfda3, std::numeric_limits::quiet_NaN()}, + {0xfe3e, std::numeric_limits::quiet_NaN()}, + {0xfe89, std::numeric_limits::quiet_NaN()}, + {0xfe91, std::numeric_limits::quiet_NaN()}, + {0xfe93, std::numeric_limits::quiet_NaN()}, + {0xfed1, std::numeric_limits::quiet_NaN()}, + {0xff7a, std::numeric_limits::quiet_NaN()}, + {0xffa3, std::numeric_limits::quiet_NaN()}, + }; + return result; +} TEST_CASE(check_half_values) { - for(auto [x, f] : half_lut) + for(auto [x, f] : half_lut()) { auto h = migraphx::bit_cast(x); if(std::isnan(f)) From b129bd52055ad3215f85b524b4ad97ba7efa65e5 Mon Sep 17 00:00:00 2001 From: richagadgil Date: Mon, 4 Nov 2024 22:56:04 +0000 Subject: [PATCH 49/58] format --- src/include/migraphx/generic_float.hpp | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/src/include/migraphx/generic_float.hpp b/src/include/migraphx/generic_float.hpp index d3dab157afb..7cf1162c363 100644 --- a/src/include/migraphx/generic_float.hpp +++ b/src/include/migraphx/generic_float.hpp @@ -386,12 +386,14 @@ class numeric_limits> // NOLINT(cert-dcl58-cpp) }; template -struct common_type, T> : std::common_type // NOLINT(cert-dcl58-cpp) +struct common_type, T> + : std::common_type // NOLINT(cert-dcl58-cpp) { }; template -struct common_type> : std::common_type // NOLINT(cert-dcl58-cpp) +struct common_type> + : std::common_type // NOLINT(cert-dcl58-cpp) { }; @@ -416,7 +418,8 @@ struct common_type> : std::common_type -struct common_type, migraphx::generic_float> // NOLINT(cert-dcl58-cpp) +struct common_type, + migraphx::generic_float> // NOLINT(cert-dcl58-cpp) { using type = migraphx::generic_float; }; From 04632660fcee7f4b6185d6fade00b6e67197b9dd Mon Sep 17 00:00:00 2001 From: richagadgil Date: Tue, 5 Nov 2024 17:41:29 +0000 Subject: [PATCH 50/58] change tidy warnings --- src/include/migraphx/generic_float.hpp | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/src/include/migraphx/generic_float.hpp b/src/include/migraphx/generic_float.hpp index 7cf1162c363..577f1dc68f3 100644 --- a/src/include/migraphx/generic_float.hpp +++ b/src/include/migraphx/generic_float.hpp @@ -386,14 +386,14 @@ class numeric_limits> // NOLINT(cert-dcl58-cpp) }; template -struct common_type, T> - : std::common_type // NOLINT(cert-dcl58-cpp) +struct common_type, T> // NOLINT(cert-dcl58-cpp) + : std::common_type { }; template -struct common_type> - : std::common_type // NOLINT(cert-dcl58-cpp) +struct common_type> // NOLINT(cert-dcl58-cpp) + : std::common_type { }; @@ -418,8 +418,8 @@ struct common_type> // {}; template -struct common_type, - migraphx::generic_float> // NOLINT(cert-dcl58-cpp) +struct common_type, // NOLINT(cert-dcl58-cpp) + migraphx::generic_float> { using type = migraphx::generic_float; }; From 2e3bd2537828bbca7f872f1c5d948315d018a009 Mon Sep 17 00:00:00 2001 From: richagadgil Date: Tue, 5 Nov 2024 21:44:12 +0000 Subject: [PATCH 51/58] tidy --- test/half.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/half.cpp b/test/half.cpp index 04abedcac52..6b0a5f330a4 100644 --- a/test/half.cpp +++ b/test/half.cpp @@ -51,7 +51,7 @@ TEST_CASE(check_numeric_limits) CHECK(bit_equal(std::numeric_limits::signaling_NaN(), uint16_t{0x7d00})); } -const std::map& half_lut() +const std::map& half_lut() // NOLINT(readability-function-size) { static const std::map result = { {0x0000, 0}, From c02f3e3ef7a168096d6f613c53ad414a5ef4d6f5 Mon Sep 17 00:00:00 2001 From: richagadgil Date: Wed, 6 Nov 2024 00:53:40 +0000 Subject: [PATCH 52/58] windows build fix --- src/include/migraphx/generic_float.hpp | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/include/migraphx/generic_float.hpp b/src/include/migraphx/generic_float.hpp index 577f1dc68f3..892341be4de 100644 --- a/src/include/migraphx/generic_float.hpp +++ b/src/include/migraphx/generic_float.hpp @@ -64,8 +64,9 @@ struct float32_parts constexpr float32_parts get_parts(float f) { return migraphx::bit_cast(f); } +#pragma pack(push, 1) template -struct __attribute__((packed, may_alias)) generic_float +struct __attribute__((may_alias)) generic_float { unsigned int mantissa : MantissaSize; unsigned int exponent : ExponentSize; @@ -333,6 +334,7 @@ struct __attribute__((packed, may_alias)) generic_float return temp; } }; +#pragma pack(pop) } // namespace MIGRAPHX_INLINE_NS } // namespace migraphx From 2db6e41a316982a4a223b32bf317e05391c427c7 Mon Sep 17 00:00:00 2001 From: richagadgil Date: Wed, 6 Nov 2024 01:33:11 +0000 Subject: [PATCH 53/58] windows build --- src/include/migraphx/generic_float.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/include/migraphx/generic_float.hpp b/src/include/migraphx/generic_float.hpp index 892341be4de..76a998aeace 100644 --- a/src/include/migraphx/generic_float.hpp +++ b/src/include/migraphx/generic_float.hpp @@ -66,7 +66,7 @@ constexpr float32_parts get_parts(float f) { return migraphx::bit_cast -struct __attribute__((may_alias)) generic_float +struct alignas(1) __attribute__((may_alias)) generic_float { unsigned int mantissa : MantissaSize; unsigned int exponent : ExponentSize; From b19551478dd84eb151ea3ca68485c233770a64ac Mon Sep 17 00:00:00 2001 From: richagadgil Date: Wed, 6 Nov 2024 18:25:49 +0000 Subject: [PATCH 54/58] replace w gnu flag --- src/include/migraphx/generic_float.hpp | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/src/include/migraphx/generic_float.hpp b/src/include/migraphx/generic_float.hpp index 76a998aeace..1492add3e9c 100644 --- a/src/include/migraphx/generic_float.hpp +++ b/src/include/migraphx/generic_float.hpp @@ -64,9 +64,8 @@ struct float32_parts constexpr float32_parts get_parts(float f) { return migraphx::bit_cast(f); } -#pragma pack(push, 1) template -struct alignas(1) __attribute__((may_alias)) generic_float +struct [[gnu::packed, gnu::may_alias]] generic_float { unsigned int mantissa : MantissaSize; unsigned int exponent : ExponentSize; @@ -334,7 +333,6 @@ struct alignas(1) __attribute__((may_alias)) generic_float return temp; } }; -#pragma pack(pop) } // namespace MIGRAPHX_INLINE_NS } // namespace migraphx From 5754c1cc755738dbffc45e1e2e6f806492ab61ef Mon Sep 17 00:00:00 2001 From: richagadgil Date: Wed, 6 Nov 2024 19:32:30 +0000 Subject: [PATCH 55/58] align --- src/include/migraphx/generic_float.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/include/migraphx/generic_float.hpp b/src/include/migraphx/generic_float.hpp index 1492add3e9c..6ae0ab373ef 100644 --- a/src/include/migraphx/generic_float.hpp +++ b/src/include/migraphx/generic_float.hpp @@ -65,7 +65,7 @@ struct float32_parts constexpr float32_parts get_parts(float f) { return migraphx::bit_cast(f); } template -struct [[gnu::packed, gnu::may_alias]] generic_float +struct alignas((MantissaSize + ExponentSize + 1) / 8) [[gnu::packed, gnu::may_alias]] generic_float { unsigned int mantissa : MantissaSize; unsigned int exponent : ExponentSize; From 34554fb925a97fb5ac5a1c4f5d6adbe6e5d66869 Mon Sep 17 00:00:00 2001 From: richagadgil Date: Wed, 6 Nov 2024 20:01:05 +0000 Subject: [PATCH 56/58] cmake --- src/CMakeLists.txt | 1 + src/include/migraphx/generic_float.hpp | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 909c0f6bc26..532ea9b777b 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -122,6 +122,7 @@ add_library(migraphx if(WIN32) # Due to compilation crashing, we need to use type-erased matchers on Windows. target_compile_definitions(migraphx PUBLIC MIGRAPHX_USE_TYPE_ERASED_MATCHERS=1) + target_link_libraries(migraphx PUBLIC "-mno-ms-bitfields") endif() configure_file(version.h.in include/migraphx/version.h) diff --git a/src/include/migraphx/generic_float.hpp b/src/include/migraphx/generic_float.hpp index 6ae0ab373ef..577f1dc68f3 100644 --- a/src/include/migraphx/generic_float.hpp +++ b/src/include/migraphx/generic_float.hpp @@ -65,7 +65,7 @@ struct float32_parts constexpr float32_parts get_parts(float f) { return migraphx::bit_cast(f); } template -struct alignas((MantissaSize + ExponentSize + 1) / 8) [[gnu::packed, gnu::may_alias]] generic_float +struct __attribute__((packed, may_alias)) generic_float { unsigned int mantissa : MantissaSize; unsigned int exponent : ExponentSize; From 56cdd29d49416d3ff3c178e29943b976a6e0660e Mon Sep 17 00:00:00 2001 From: richagadgil Date: Wed, 6 Nov 2024 21:32:49 +0000 Subject: [PATCH 57/58] readd mvsc --- src/include/migraphx/generic_float.hpp | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/src/include/migraphx/generic_float.hpp b/src/include/migraphx/generic_float.hpp index 577f1dc68f3..ffa2c72e53e 100644 --- a/src/include/migraphx/generic_float.hpp +++ b/src/include/migraphx/generic_float.hpp @@ -64,6 +64,9 @@ struct float32_parts constexpr float32_parts get_parts(float f) { return migraphx::bit_cast(f); } +#ifdef _MSC_VER +#pragma pack(push, 1) +#endif template struct __attribute__((packed, may_alias)) generic_float { @@ -333,6 +336,9 @@ struct __attribute__((packed, may_alias)) generic_float return temp; } }; +#ifdef _MSC_VER +#pragma pack(pop) +#endif } // namespace MIGRAPHX_INLINE_NS } // namespace migraphx From 9ae05aeda4285aae4fb9ddb36dd74a603e089a56 Mon Sep 17 00:00:00 2001 From: richagadgil Date: Wed, 6 Nov 2024 22:23:43 +0000 Subject: [PATCH 58/58] redo compile options --- src/CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 532ea9b777b..dcde836b00e 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -122,7 +122,7 @@ add_library(migraphx if(WIN32) # Due to compilation crashing, we need to use type-erased matchers on Windows. target_compile_definitions(migraphx PUBLIC MIGRAPHX_USE_TYPE_ERASED_MATCHERS=1) - target_link_libraries(migraphx PUBLIC "-mno-ms-bitfields") + target_compile_options(migraphx PUBLIC "-mno-ms-bitfields") endif() configure_file(version.h.in include/migraphx/version.h)