From f3de8aa24afa055d00c7d35ea2a518259f201b65 Mon Sep 17 00:00:00 2001 From: Chih-Min Chao Date: Thu, 6 Jun 2019 01:55:30 -0700 Subject: [PATCH 1/8] rvv: sf: handle signaling NaN for fmax/fmin --- build/Linux-386-GCC/Makefile | 1 + build/Linux-386-SSE2-GCC/Makefile | 1 + build/Linux-ARM-VFPv2-GCC/Makefile | 1 + build/Linux-RISCV64-GCC/Makefile | 1 + build/Linux-x86_64-GCC/Makefile | 1 + build/Win32-MinGW/Makefile | 1 + build/Win32-SSE2-MinGW/Makefile | 1 + build/Win64-MinGW-w64/Makefile | 1 + build/template-FAST_INT64/Makefile | 1 + build/template-not-FAST_INT64/Makefile | 1 + source/fall_maxmin.c | 81 ++++++++++++++++++++++++++ source/include/softfloat.h | 6 ++ 12 files changed, 97 insertions(+) create mode 100644 source/fall_maxmin.c diff --git a/build/Linux-386-GCC/Makefile b/build/Linux-386-GCC/Makefile index faeb3972..e677e83f 100644 --- a/build/Linux-386-GCC/Makefile +++ b/build/Linux-386-GCC/Makefile @@ -228,6 +228,7 @@ OBJS_OTHERS = \ f64_to_ui64_r_minMag$(OBJ) \ f64_to_i32_r_minMag$(OBJ) \ f64_to_i64_r_minMag$(OBJ) \ + fall_maxmin$(OBJ) \ f64_to_f16$(OBJ) \ f64_to_f32$(OBJ) \ f64_to_extF80M$(OBJ) \ diff --git a/build/Linux-386-SSE2-GCC/Makefile b/build/Linux-386-SSE2-GCC/Makefile index ced977b6..57adc362 100644 --- a/build/Linux-386-SSE2-GCC/Makefile +++ b/build/Linux-386-SSE2-GCC/Makefile @@ -228,6 +228,7 @@ OBJS_OTHERS = \ f64_to_ui64_r_minMag$(OBJ) \ f64_to_i32_r_minMag$(OBJ) \ f64_to_i64_r_minMag$(OBJ) \ + fall_maxmin$(OBJ) \ f64_to_f16$(OBJ) \ f64_to_f32$(OBJ) \ f64_to_extF80M$(OBJ) \ diff --git a/build/Linux-ARM-VFPv2-GCC/Makefile b/build/Linux-ARM-VFPv2-GCC/Makefile index a1e7c830..71940058 100644 --- a/build/Linux-ARM-VFPv2-GCC/Makefile +++ b/build/Linux-ARM-VFPv2-GCC/Makefile @@ -226,6 +226,7 @@ OBJS_OTHERS = \ f64_to_ui64_r_minMag$(OBJ) \ f64_to_i32_r_minMag$(OBJ) \ f64_to_i64_r_minMag$(OBJ) \ + fall_maxmin$(OBJ) \ f64_to_f16$(OBJ) \ f64_to_f32$(OBJ) \ f64_to_extF80M$(OBJ) \ diff --git a/build/Linux-RISCV64-GCC/Makefile b/build/Linux-RISCV64-GCC/Makefile index b9408c6b..616abfe0 100644 --- a/build/Linux-RISCV64-GCC/Makefile +++ b/build/Linux-RISCV64-GCC/Makefile @@ -247,6 +247,7 @@ OBJS_OTHERS = \ f64_to_ui64_r_minMag$(OBJ) \ f64_to_i32_r_minMag$(OBJ) \ f64_to_i64_r_minMag$(OBJ) \ + fall_maxmin$(OBJ) \ f64_to_f16$(OBJ) \ f64_to_f32$(OBJ) \ f64_to_extF80$(OBJ) \ diff --git a/build/Linux-x86_64-GCC/Makefile b/build/Linux-x86_64-GCC/Makefile index 72f251a5..24f60524 100644 --- a/build/Linux-x86_64-GCC/Makefile +++ b/build/Linux-x86_64-GCC/Makefile @@ -245,6 +245,7 @@ OBJS_OTHERS = \ f64_to_ui64_r_minMag$(OBJ) \ f64_to_i32_r_minMag$(OBJ) \ f64_to_i64_r_minMag$(OBJ) \ + fall_maxmin$(OBJ) \ f64_to_f16$(OBJ) \ f64_to_f32$(OBJ) \ f64_to_extF80$(OBJ) \ diff --git a/build/Win32-MinGW/Makefile b/build/Win32-MinGW/Makefile index faeb3972..e677e83f 100644 --- a/build/Win32-MinGW/Makefile +++ b/build/Win32-MinGW/Makefile @@ -228,6 +228,7 @@ OBJS_OTHERS = \ f64_to_ui64_r_minMag$(OBJ) \ f64_to_i32_r_minMag$(OBJ) \ f64_to_i64_r_minMag$(OBJ) \ + fall_maxmin$(OBJ) \ f64_to_f16$(OBJ) \ f64_to_f32$(OBJ) \ f64_to_extF80M$(OBJ) \ diff --git a/build/Win32-SSE2-MinGW/Makefile b/build/Win32-SSE2-MinGW/Makefile index ced977b6..57adc362 100644 --- a/build/Win32-SSE2-MinGW/Makefile +++ b/build/Win32-SSE2-MinGW/Makefile @@ -228,6 +228,7 @@ OBJS_OTHERS = \ f64_to_ui64_r_minMag$(OBJ) \ f64_to_i32_r_minMag$(OBJ) \ f64_to_i64_r_minMag$(OBJ) \ + fall_maxmin$(OBJ) \ f64_to_f16$(OBJ) \ f64_to_f32$(OBJ) \ f64_to_extF80M$(OBJ) \ diff --git a/build/Win64-MinGW-w64/Makefile b/build/Win64-MinGW-w64/Makefile index cc5bc0c5..f13ab0cf 100644 --- a/build/Win64-MinGW-w64/Makefile +++ b/build/Win64-MinGW-w64/Makefile @@ -238,6 +238,7 @@ OBJS_OTHERS = \ f64_to_ui64_r_minMag$(OBJ) \ f64_to_i32_r_minMag$(OBJ) \ f64_to_i64_r_minMag$(OBJ) \ + fall_maxmin$(OBJ) \ f64_to_f16$(OBJ) \ f64_to_f32$(OBJ) \ f64_to_extF80$(OBJ) \ diff --git a/build/template-FAST_INT64/Makefile b/build/template-FAST_INT64/Makefile index e04c2160..8227854c 100644 --- a/build/template-FAST_INT64/Makefile +++ b/build/template-FAST_INT64/Makefile @@ -244,6 +244,7 @@ OBJS_OTHERS = \ f64_to_ui64_r_minMag$(OBJ) \ f64_to_i32_r_minMag$(OBJ) \ f64_to_i64_r_minMag$(OBJ) \ + fall_maxmin$(OBJ) \ f64_to_f16$(OBJ) \ f64_to_f32$(OBJ) \ f64_to_extF80$(OBJ) \ diff --git a/build/template-not-FAST_INT64/Makefile b/build/template-not-FAST_INT64/Makefile index 48b2cd6b..776b4ec6 100644 --- a/build/template-not-FAST_INT64/Makefile +++ b/build/template-not-FAST_INT64/Makefile @@ -228,6 +228,7 @@ OBJS_OTHERS = \ f64_to_ui64_r_minMag$(OBJ) \ f64_to_i32_r_minMag$(OBJ) \ f64_to_i64_r_minMag$(OBJ) \ + fall_maxmin$(OBJ) \ f64_to_f16$(OBJ) \ f64_to_f32$(OBJ) \ f64_to_extF80M$(OBJ) \ diff --git a/source/fall_maxmin.c b/source/fall_maxmin.c new file mode 100644 index 00000000..55ab2e36 --- /dev/null +++ b/source/fall_maxmin.c @@ -0,0 +1,81 @@ + +/*============================================================================ + +This C source file is part of the SoftFloat IEEE Floating-Point Arithmetic +Package, Release 3e, by John R. Hauser. + +Copyright 2011, 2012, 2013, 2014, 2015, 2016 The Regents of the University of +California. All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + + 1. Redistributions of source code must retain the above copyright notice, + this list of conditions, and the following disclaimer. + + 2. Redistributions in binary form must reproduce the above copyright notice, + this list of conditions, and the following disclaimer in the documentation + and/or other materials provided with the distribution. + + 3. Neither the name of the University nor the names of its contributors may + be used to endorse or promote products derived from this software without + specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY +EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE +DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY +DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES +(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND +ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +=============================================================================*/ + +#include +#include +#include +#include "platform.h" +#include "internals.h" +#include "specialize.h" +#include "softfloat.h" + +#define COMPARE_MAX(a, b, bits) \ +float ## bits ## _t f ## bits ## _max( float ## bits ## _t a, float ## bits ## _t b ) \ +{ \ + if (isNaNF ## bits ## UI(a.v) && isNaNF ## bits ## UI(b.v)) { \ + union ui ## bits ## _f ## bits ui; \ + ui.ui = defaultNaNF ## bits ## UI; \ + return ui.f; \ + } \ + \ + bool greater = f ## bits ## _lt_quiet(b, a) || \ + (f ## bits ## _eq(b, a) && signF ## bits ## UI(b.v)); \ + \ + return greater || isNaNF ## bits ## UI((b).v) ? a : b; \ +} + +#define COMPARE_MIN(a, b, bits) \ +float ## bits ## _t f ## bits ## _min( float ## bits ## _t a, float ## bits ## _t b ) \ +{ \ + if (isNaNF ## bits ## UI(a.v) && isNaNF ## bits ## UI(b.v)) { \ + union ui ## bits ## _f ## bits ui; \ + ui.ui = defaultNaNF ## bits ## UI; \ + return ui.f; \ + } \ + \ + bool greater = f ## bits ## _lt_quiet(a, b) || \ + (f ## bits ## _eq(a, b) && signF ## bits ## UI(a.v)); \ + \ + return greater || isNaNF ## bits ## UI((b).v) ? a : b; \ +} + +COMPARE_MAX(a, b, 16); +COMPARE_MAX(a, b, 32); +COMPARE_MAX(a, b, 64); + +COMPARE_MIN(a, b, 16); +COMPARE_MIN(a, b, 32); +COMPARE_MIN(a, b, 64); diff --git a/source/include/softfloat.h b/source/include/softfloat.h index bf5014bf..7dbf771c 100644 --- a/source/include/softfloat.h +++ b/source/include/softfloat.h @@ -156,6 +156,8 @@ void f16_to_f128M( float16_t, float128_t * ); float16_t f16_roundToInt( float16_t, uint_fast8_t, bool ); float16_t f16_add( float16_t, float16_t ); float16_t f16_sub( float16_t, float16_t ); +float16_t f16_max( float16_t, float16_t ); +float16_t f16_min( float16_t, float16_t ); float16_t f16_mul( float16_t, float16_t ); float16_t f16_mulAdd( float16_t, float16_t, float16_t ); float16_t f16_div( float16_t, float16_t ); @@ -198,6 +200,8 @@ void f32_to_f128M( float32_t, float128_t * ); float32_t f32_roundToInt( float32_t, uint_fast8_t, bool ); float32_t f32_add( float32_t, float32_t ); float32_t f32_sub( float32_t, float32_t ); +float32_t f32_max( float32_t, float32_t ); +float32_t f32_min( float32_t, float32_t ); float32_t f32_mul( float32_t, float32_t ); float32_t f32_mulAdd( float32_t, float32_t, float32_t ); float32_t f32_div( float32_t, float32_t ); @@ -233,6 +237,8 @@ void f64_to_f128M( float64_t, float128_t * ); float64_t f64_roundToInt( float64_t, uint_fast8_t, bool ); float64_t f64_add( float64_t, float64_t ); float64_t f64_sub( float64_t, float64_t ); +float64_t f64_max( float64_t, float64_t ); +float64_t f64_min( float64_t, float64_t ); float64_t f64_mul( float64_t, float64_t ); float64_t f64_mulAdd( float64_t, float64_t, float64_t ); float64_t f64_div( float64_t, float64_t ); From 6cc86428c3d8255cd7804e3a328106820e4c060a Mon Sep 17 00:00:00 2001 From: Han-Kuan Chen Date: Tue, 24 Mar 2020 19:39:36 -0700 Subject: [PATCH 2/8] sf: fp16: add missing APIs --- build/Linux-386-GCC/Makefile | 5 ++ build/Linux-386-SSE2-GCC/Makefile | 5 ++ build/Linux-ARM-VFPv2-GCC/Makefile | 5 ++ build/Linux-RISCV64-GCC/Makefile | 5 ++ build/Linux-x86_64-GCC/Makefile | 5 ++ build/Win32-MinGW/Makefile | 5 ++ build/Win32-SSE2-MinGW/Makefile | 5 ++ build/Win64-MinGW-w64/Makefile | 5 ++ build/template-FAST_INT64/Makefile | 5 ++ build/template-not-FAST_INT64/Makefile | 5 ++ source/8086-SSE/specialize.h | 7 +++ source/8086/specialize.h | 7 +++ source/ARM-VFPv2-defaultNaN/specialize.h | 7 +++ source/ARM-VFPv2/specialize.h | 7 +++ source/RISCV/specialize.h | 7 +++ source/f16_classify.c | 71 ++++++++++++++++++++++++ source/f16_to_i16.c | 55 ++++++++++++++++++ source/f16_to_ui16.c | 52 +++++++++++++++++ source/f32_to_i16.c | 55 ++++++++++++++++++ source/f32_to_ui16.c | 51 +++++++++++++++++ source/include/softfloat.h | 5 ++ 21 files changed, 374 insertions(+) create mode 100755 source/f16_classify.c create mode 100644 source/f16_to_i16.c create mode 100644 source/f16_to_ui16.c create mode 100644 source/f32_to_i16.c create mode 100644 source/f32_to_ui16.c diff --git a/build/Linux-386-GCC/Makefile b/build/Linux-386-GCC/Makefile index e677e83f..7015b570 100644 --- a/build/Linux-386-GCC/Makefile +++ b/build/Linux-386-GCC/Makefile @@ -174,12 +174,15 @@ OBJS_OTHERS = \ f16_to_ui64_r_minMag$(OBJ) \ f16_to_i32_r_minMag$(OBJ) \ f16_to_i64_r_minMag$(OBJ) \ + f16_to_ui16$(OBJ) \ f16_to_f32$(OBJ) \ f16_to_f64$(OBJ) \ + f16_to_i16$(OBJ) \ f16_to_extF80M$(OBJ) \ f16_to_f128M$(OBJ) \ f16_roundToInt$(OBJ) \ f16_add$(OBJ) \ + f16_classify$(OBJ) \ f16_sub$(OBJ) \ f16_mul$(OBJ) \ f16_mulAdd$(OBJ) \ @@ -201,8 +204,10 @@ OBJS_OTHERS = \ f32_to_ui64_r_minMag$(OBJ) \ f32_to_i32_r_minMag$(OBJ) \ f32_to_i64_r_minMag$(OBJ) \ + f32_to_ui16$(OBJ) \ f32_to_f16$(OBJ) \ f32_to_f64$(OBJ) \ + f32_to_i16$(OBJ) \ f32_to_extF80M$(OBJ) \ f32_to_f128M$(OBJ) \ f32_roundToInt$(OBJ) \ diff --git a/build/Linux-386-SSE2-GCC/Makefile b/build/Linux-386-SSE2-GCC/Makefile index 57adc362..c8fab35c 100644 --- a/build/Linux-386-SSE2-GCC/Makefile +++ b/build/Linux-386-SSE2-GCC/Makefile @@ -174,12 +174,15 @@ OBJS_OTHERS = \ f16_to_ui64_r_minMag$(OBJ) \ f16_to_i32_r_minMag$(OBJ) \ f16_to_i64_r_minMag$(OBJ) \ + f16_to_ui16$(OBJ) \ f16_to_f32$(OBJ) \ f16_to_f64$(OBJ) \ + f16_to_i16$(OBJ) \ f16_to_extF80M$(OBJ) \ f16_to_f128M$(OBJ) \ f16_roundToInt$(OBJ) \ f16_add$(OBJ) \ + f16_classify$(OBJ) \ f16_sub$(OBJ) \ f16_mul$(OBJ) \ f16_mulAdd$(OBJ) \ @@ -201,8 +204,10 @@ OBJS_OTHERS = \ f32_to_ui64_r_minMag$(OBJ) \ f32_to_i32_r_minMag$(OBJ) \ f32_to_i64_r_minMag$(OBJ) \ + f32_to_ui16$(OBJ) \ f32_to_f16$(OBJ) \ f32_to_f64$(OBJ) \ + f32_to_i16$(OBJ) \ f32_to_extF80M$(OBJ) \ f32_to_f128M$(OBJ) \ f32_roundToInt$(OBJ) \ diff --git a/build/Linux-ARM-VFPv2-GCC/Makefile b/build/Linux-ARM-VFPv2-GCC/Makefile index 71940058..dc1b430a 100644 --- a/build/Linux-ARM-VFPv2-GCC/Makefile +++ b/build/Linux-ARM-VFPv2-GCC/Makefile @@ -172,12 +172,15 @@ OBJS_OTHERS = \ f16_to_ui64_r_minMag$(OBJ) \ f16_to_i32_r_minMag$(OBJ) \ f16_to_i64_r_minMag$(OBJ) \ + f16_to_ui16$(OBJ) \ f16_to_f32$(OBJ) \ f16_to_f64$(OBJ) \ + f16_to_i16$(OBJ) \ f16_to_extF80M$(OBJ) \ f16_to_f128M$(OBJ) \ f16_roundToInt$(OBJ) \ f16_add$(OBJ) \ + f16_classify$(OBJ) \ f16_sub$(OBJ) \ f16_mul$(OBJ) \ f16_mulAdd$(OBJ) \ @@ -199,8 +202,10 @@ OBJS_OTHERS = \ f32_to_ui64_r_minMag$(OBJ) \ f32_to_i32_r_minMag$(OBJ) \ f32_to_i64_r_minMag$(OBJ) \ + f32_to_ui16$(OBJ) \ f32_to_f16$(OBJ) \ f32_to_f64$(OBJ) \ + f32_to_i16$(OBJ) \ f32_to_extF80M$(OBJ) \ f32_to_f128M$(OBJ) \ f32_roundToInt$(OBJ) \ diff --git a/build/Linux-RISCV64-GCC/Makefile b/build/Linux-RISCV64-GCC/Makefile index 616abfe0..3503b7cb 100644 --- a/build/Linux-RISCV64-GCC/Makefile +++ b/build/Linux-RISCV64-GCC/Makefile @@ -188,14 +188,17 @@ OBJS_OTHERS = \ f16_to_ui64_r_minMag$(OBJ) \ f16_to_i32_r_minMag$(OBJ) \ f16_to_i64_r_minMag$(OBJ) \ + f16_to_ui16$(OBJ) \ f16_to_f32$(OBJ) \ f16_to_f64$(OBJ) \ + f16_to_i16$(OBJ) \ f16_to_extF80$(OBJ) \ f16_to_extF80M$(OBJ) \ f16_to_f128$(OBJ) \ f16_to_f128M$(OBJ) \ f16_roundToInt$(OBJ) \ f16_add$(OBJ) \ + f16_classify$(OBJ) \ f16_sub$(OBJ) \ f16_mul$(OBJ) \ f16_mulAdd$(OBJ) \ @@ -217,9 +220,11 @@ OBJS_OTHERS = \ f32_to_ui64_r_minMag$(OBJ) \ f32_to_i32_r_minMag$(OBJ) \ f32_to_i64_r_minMag$(OBJ) \ + f32_to_ui16$(OBJ) \ f32_to_bf16$(OBJ) \ f32_to_f16$(OBJ) \ f32_to_f64$(OBJ) \ + f32_to_i16$(OBJ) \ f32_to_extF80$(OBJ) \ f32_to_extF80M$(OBJ) \ f32_to_f128$(OBJ) \ diff --git a/build/Linux-x86_64-GCC/Makefile b/build/Linux-x86_64-GCC/Makefile index 24f60524..90b1e825 100644 --- a/build/Linux-x86_64-GCC/Makefile +++ b/build/Linux-x86_64-GCC/Makefile @@ -186,14 +186,17 @@ OBJS_OTHERS = \ f16_to_ui64_r_minMag$(OBJ) \ f16_to_i32_r_minMag$(OBJ) \ f16_to_i64_r_minMag$(OBJ) \ + f16_to_ui16$(OBJ) \ f16_to_f32$(OBJ) \ f16_to_f64$(OBJ) \ + f16_to_i16$(OBJ) \ f16_to_extF80$(OBJ) \ f16_to_extF80M$(OBJ) \ f16_to_f128$(OBJ) \ f16_to_f128M$(OBJ) \ f16_roundToInt$(OBJ) \ f16_add$(OBJ) \ + f16_classify$(OBJ) \ f16_sub$(OBJ) \ f16_mul$(OBJ) \ f16_mulAdd$(OBJ) \ @@ -215,9 +218,11 @@ OBJS_OTHERS = \ f32_to_ui64_r_minMag$(OBJ) \ f32_to_i32_r_minMag$(OBJ) \ f32_to_i64_r_minMag$(OBJ) \ + f32_to_ui16$(OBJ) \ f32_to_bf16$(OBJ) \ f32_to_f16$(OBJ) \ f32_to_f64$(OBJ) \ + f32_to_i16$(OBJ) \ f32_to_extF80$(OBJ) \ f32_to_extF80M$(OBJ) \ f32_to_f128$(OBJ) \ diff --git a/build/Win32-MinGW/Makefile b/build/Win32-MinGW/Makefile index e677e83f..7015b570 100644 --- a/build/Win32-MinGW/Makefile +++ b/build/Win32-MinGW/Makefile @@ -174,12 +174,15 @@ OBJS_OTHERS = \ f16_to_ui64_r_minMag$(OBJ) \ f16_to_i32_r_minMag$(OBJ) \ f16_to_i64_r_minMag$(OBJ) \ + f16_to_ui16$(OBJ) \ f16_to_f32$(OBJ) \ f16_to_f64$(OBJ) \ + f16_to_i16$(OBJ) \ f16_to_extF80M$(OBJ) \ f16_to_f128M$(OBJ) \ f16_roundToInt$(OBJ) \ f16_add$(OBJ) \ + f16_classify$(OBJ) \ f16_sub$(OBJ) \ f16_mul$(OBJ) \ f16_mulAdd$(OBJ) \ @@ -201,8 +204,10 @@ OBJS_OTHERS = \ f32_to_ui64_r_minMag$(OBJ) \ f32_to_i32_r_minMag$(OBJ) \ f32_to_i64_r_minMag$(OBJ) \ + f32_to_ui16$(OBJ) \ f32_to_f16$(OBJ) \ f32_to_f64$(OBJ) \ + f32_to_i16$(OBJ) \ f32_to_extF80M$(OBJ) \ f32_to_f128M$(OBJ) \ f32_roundToInt$(OBJ) \ diff --git a/build/Win32-SSE2-MinGW/Makefile b/build/Win32-SSE2-MinGW/Makefile index 57adc362..c8fab35c 100644 --- a/build/Win32-SSE2-MinGW/Makefile +++ b/build/Win32-SSE2-MinGW/Makefile @@ -174,12 +174,15 @@ OBJS_OTHERS = \ f16_to_ui64_r_minMag$(OBJ) \ f16_to_i32_r_minMag$(OBJ) \ f16_to_i64_r_minMag$(OBJ) \ + f16_to_ui16$(OBJ) \ f16_to_f32$(OBJ) \ f16_to_f64$(OBJ) \ + f16_to_i16$(OBJ) \ f16_to_extF80M$(OBJ) \ f16_to_f128M$(OBJ) \ f16_roundToInt$(OBJ) \ f16_add$(OBJ) \ + f16_classify$(OBJ) \ f16_sub$(OBJ) \ f16_mul$(OBJ) \ f16_mulAdd$(OBJ) \ @@ -201,8 +204,10 @@ OBJS_OTHERS = \ f32_to_ui64_r_minMag$(OBJ) \ f32_to_i32_r_minMag$(OBJ) \ f32_to_i64_r_minMag$(OBJ) \ + f32_to_ui16$(OBJ) \ f32_to_f16$(OBJ) \ f32_to_f64$(OBJ) \ + f32_to_i16$(OBJ) \ f32_to_extF80M$(OBJ) \ f32_to_f128M$(OBJ) \ f32_roundToInt$(OBJ) \ diff --git a/build/Win64-MinGW-w64/Makefile b/build/Win64-MinGW-w64/Makefile index f13ab0cf..ebf18f36 100644 --- a/build/Win64-MinGW-w64/Makefile +++ b/build/Win64-MinGW-w64/Makefile @@ -180,14 +180,17 @@ OBJS_OTHERS = \ f16_to_ui64_r_minMag$(OBJ) \ f16_to_i32_r_minMag$(OBJ) \ f16_to_i64_r_minMag$(OBJ) \ + f16_to_ui16$(OBJ) \ f16_to_f32$(OBJ) \ f16_to_f64$(OBJ) \ + f16_to_i16$(OBJ) \ f16_to_extF80$(OBJ) \ f16_to_extF80M$(OBJ) \ f16_to_f128$(OBJ) \ f16_to_f128M$(OBJ) \ f16_roundToInt$(OBJ) \ f16_add$(OBJ) \ + f16_classify$(OBJ) \ f16_sub$(OBJ) \ f16_mul$(OBJ) \ f16_mulAdd$(OBJ) \ @@ -209,8 +212,10 @@ OBJS_OTHERS = \ f32_to_ui64_r_minMag$(OBJ) \ f32_to_i32_r_minMag$(OBJ) \ f32_to_i64_r_minMag$(OBJ) \ + f32_to_ui16$(OBJ) \ f32_to_f16$(OBJ) \ f32_to_f64$(OBJ) \ + f32_to_i16$(OBJ) \ f32_to_extF80$(OBJ) \ f32_to_extF80M$(OBJ) \ f32_to_f128$(OBJ) \ diff --git a/build/template-FAST_INT64/Makefile b/build/template-FAST_INT64/Makefile index 8227854c..2d29a72c 100644 --- a/build/template-FAST_INT64/Makefile +++ b/build/template-FAST_INT64/Makefile @@ -185,14 +185,17 @@ OBJS_OTHERS = \ f16_to_ui64_r_minMag$(OBJ) \ f16_to_i32_r_minMag$(OBJ) \ f16_to_i64_r_minMag$(OBJ) \ + f16_to_ui16$(OBJ) \ f16_to_f32$(OBJ) \ f16_to_f64$(OBJ) \ + f16_to_i16$(OBJ) \ f16_to_extF80$(OBJ) \ f16_to_extF80M$(OBJ) \ f16_to_f128$(OBJ) \ f16_to_f128M$(OBJ) \ f16_roundToInt$(OBJ) \ f16_add$(OBJ) \ + f16_classify$(OBJ) \ f16_sub$(OBJ) \ f16_mul$(OBJ) \ f16_mulAdd$(OBJ) \ @@ -214,9 +217,11 @@ OBJS_OTHERS = \ f32_to_ui64_r_minMag$(OBJ) \ f32_to_i32_r_minMag$(OBJ) \ f32_to_i64_r_minMag$(OBJ) \ + f32_to_ui16$(OBJ) \ f32_to_bf16$(OBJ) \ f32_to_f16$(OBJ) \ f32_to_f64$(OBJ) \ + f32_to_i16$(OBJ) \ f32_to_extF80$(OBJ) \ f32_to_extF80M$(OBJ) \ f32_to_f128$(OBJ) \ diff --git a/build/template-not-FAST_INT64/Makefile b/build/template-not-FAST_INT64/Makefile index 776b4ec6..028da40a 100644 --- a/build/template-not-FAST_INT64/Makefile +++ b/build/template-not-FAST_INT64/Makefile @@ -174,12 +174,15 @@ OBJS_OTHERS = \ f16_to_ui64_r_minMag$(OBJ) \ f16_to_i32_r_minMag$(OBJ) \ f16_to_i64_r_minMag$(OBJ) \ + f16_to_ui16$(OBJ) \ f16_to_f32$(OBJ) \ f16_to_f64$(OBJ) \ + f16_to_i16$(OBJ) \ f16_to_extF80M$(OBJ) \ f16_to_f128M$(OBJ) \ f16_roundToInt$(OBJ) \ f16_add$(OBJ) \ + f16_classify$(OBJ) \ f16_sub$(OBJ) \ f16_mul$(OBJ) \ f16_mulAdd$(OBJ) \ @@ -201,8 +204,10 @@ OBJS_OTHERS = \ f32_to_ui64_r_minMag$(OBJ) \ f32_to_i32_r_minMag$(OBJ) \ f32_to_i64_r_minMag$(OBJ) \ + f32_to_ui16$(OBJ) \ f32_to_f16$(OBJ) \ f32_to_f64$(OBJ) \ + f32_to_i16$(OBJ) \ f32_to_extF80M$(OBJ) \ f32_to_f128M$(OBJ) \ f32_roundToInt$(OBJ) \ diff --git a/source/8086-SSE/specialize.h b/source/8086-SSE/specialize.h index 8ed2e75c..3a562592 100644 --- a/source/8086-SSE/specialize.h +++ b/source/8086-SSE/specialize.h @@ -51,6 +51,13 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | The values to return on conversions to 32-bit integer formats that raise an | invalid exception. *----------------------------------------------------------------------------*/ +#define ui16_fromPosOverflow 0xFFFF +#define ui16_fromNegOverflow 0 +#define ui16_fromNaN 0xFFFF +#define i16_fromPosOverflow 0x7FFF +#define i16_fromNegOverflow (-0x7FFF - 1) +#define i16_fromNaN 0x7FFF + #define ui32_fromPosOverflow 0xFFFFFFFF #define ui32_fromNegOverflow 0xFFFFFFFF #define ui32_fromNaN 0xFFFFFFFF diff --git a/source/8086/specialize.h b/source/8086/specialize.h index a9166e17..d7ca1dee 100644 --- a/source/8086/specialize.h +++ b/source/8086/specialize.h @@ -51,6 +51,13 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | The values to return on conversions to 32-bit integer formats that raise an | invalid exception. *----------------------------------------------------------------------------*/ +#define ui16_fromPosOverflow 0xFFFF +#define ui16_fromNegOverflow 0 +#define ui16_fromNaN 0xFFFF +#define i16_fromPosOverflow 0x7FFF +#define i16_fromNegOverflow (-0x7FFF - 1) +#define i16_fromNaN 0x7FFF + #define ui32_fromPosOverflow 0xFFFFFFFF #define ui32_fromNegOverflow 0xFFFFFFFF #define ui32_fromNaN 0xFFFFFFFF diff --git a/source/ARM-VFPv2-defaultNaN/specialize.h b/source/ARM-VFPv2-defaultNaN/specialize.h index e4ea15d1..63dbd534 100644 --- a/source/ARM-VFPv2-defaultNaN/specialize.h +++ b/source/ARM-VFPv2-defaultNaN/specialize.h @@ -51,6 +51,13 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | The values to return on conversions to 32-bit integer formats that raise an | invalid exception. *----------------------------------------------------------------------------*/ +#define ui16_fromPosOverflow 0xFFFF +#define ui16_fromNegOverflow 0 +#define ui16_fromNaN 0xFFFF +#define i16_fromPosOverflow 0x7FFF +#define i16_fromNegOverflow (-0x7FFF - 1) +#define i16_fromNaN 0x7FFF + #define ui32_fromPosOverflow 0xFFFFFFFF #define ui32_fromNegOverflow 0 #define ui32_fromNaN 0 diff --git a/source/ARM-VFPv2/specialize.h b/source/ARM-VFPv2/specialize.h index 10b0b357..6e2351c0 100644 --- a/source/ARM-VFPv2/specialize.h +++ b/source/ARM-VFPv2/specialize.h @@ -51,6 +51,13 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | The values to return on conversions to 32-bit integer formats that raise an | invalid exception. *----------------------------------------------------------------------------*/ +#define ui16_fromPosOverflow 0xFFFF +#define ui16_fromNegOverflow 0 +#define ui16_fromNaN 0xFFFF +#define i16_fromPosOverflow 0x7FFF +#define i16_fromNegOverflow (-0x7FFF - 1) +#define i16_fromNaN 0x7FFF + #define ui32_fromPosOverflow 0xFFFFFFFF #define ui32_fromNegOverflow 0 #define ui32_fromNaN 0 diff --git a/source/RISCV/specialize.h b/source/RISCV/specialize.h index cb95900e..0beb2e89 100644 --- a/source/RISCV/specialize.h +++ b/source/RISCV/specialize.h @@ -51,6 +51,13 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | The values to return on conversions to 32-bit integer formats that raise an | invalid exception. *----------------------------------------------------------------------------*/ +#define ui16_fromPosOverflow 0xFFFF +#define ui16_fromNegOverflow 0 +#define ui16_fromNaN 0xFFFF +#define i16_fromPosOverflow 0x7FFF +#define i16_fromNegOverflow (-0x7FFF - 1) +#define i16_fromNaN 0x7FFF + #define ui32_fromPosOverflow 0xFFFFFFFF #define ui32_fromNegOverflow 0 #define ui32_fromNaN 0xFFFFFFFF diff --git a/source/f16_classify.c b/source/f16_classify.c new file mode 100755 index 00000000..6d2bbb73 --- /dev/null +++ b/source/f16_classify.c @@ -0,0 +1,71 @@ + +/*============================================================================ + +This C source file is part of the SoftFloat IEEE Floating-Point Arithmetic +Package, Release 3e, by John R. Hauser. + +Copyright 2011, 2012, 2013, 2014, 2015, 2016, 2017 The Regents of the +University of California. All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + + 1. Redistributions of source code must retain the above copyright notice, + this list of conditions, and the following disclaimer. + + 2. Redistributions in binary form must reproduce the above copyright notice, + this list of conditions, and the following disclaimer in the documentation + and/or other materials provided with the distribution. + + 3. Neither the name of the University nor the names of its contributors may + be used to endorse or promote products derived from this software without + specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY +EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE +DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY +DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES +(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND +ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +=============================================================================*/ + +#include +#include +#include "platform.h" +#include "internals.h" +#include "specialize.h" +#include "softfloat.h" + +uint_fast16_t f16_classify( float16_t a ) +{ + union ui16_f16 uA; + uint_fast16_t uiA; + + uA.f = a; + uiA = uA.ui; + + uint_fast16_t infOrNaN = expF16UI( uiA ) == 0x1F; + uint_fast16_t subnormalOrZero = expF16UI( uiA ) == 0; + bool sign = signF16UI( uiA ); + bool fracZero = fracF16UI( uiA ) == 0; + bool isNaN = isNaNF16UI( uiA ); + bool isSNaN = softfloat_isSigNaNF16UI( uiA ); + + return + ( sign && infOrNaN && fracZero ) << 0 | + ( sign && !infOrNaN && !subnormalOrZero ) << 1 | + ( sign && subnormalOrZero && !fracZero ) << 2 | + ( sign && subnormalOrZero && fracZero ) << 3 | + ( !sign && infOrNaN && fracZero ) << 7 | + ( !sign && !infOrNaN && !subnormalOrZero ) << 6 | + ( !sign && subnormalOrZero && !fracZero ) << 5 | + ( !sign && subnormalOrZero && fracZero ) << 4 | + ( isNaN && isSNaN ) << 8 | + ( isNaN && !isSNaN ) << 9; +} + diff --git a/source/f16_to_i16.c b/source/f16_to_i16.c new file mode 100644 index 00000000..05726754 --- /dev/null +++ b/source/f16_to_i16.c @@ -0,0 +1,55 @@ + +/*============================================================================ + +This C source file is part of the SoftFloat IEEE Floating-Point Arithmetic +Package, Release 3e, by John R. Hauser. + +Copyright 2011, 2012, 2013, 2014, 2015, 2016, 2017 The Regents of the +University of California. All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + + 1. Redistributions of source code must retain the above copyright notice, + this list of conditions, and the following disclaimer. + + 2. Redistributions in binary form must reproduce the above copyright notice, + this list of conditions, and the following disclaimer in the documentation + and/or other materials provided with the distribution. + + 3. Neither the name of the University nor the names of its contributors may + be used to endorse or promote products derived from this software without + specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY +EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE +DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY +DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES +(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND +ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +=============================================================================*/ + +#include +#include "specialize.h" +#include "softfloat.h" + +int_fast16_t f16_to_i16( float16_t a, uint_fast8_t roundingMode, bool exact ) +{ + int_fast32_t sig32 = f16_to_i32(a, roundingMode, exact); + + if (sig32 > INT16_MAX) { + softfloat_exceptionFlags |= softfloat_flag_invalid; + return i16_fromPosOverflow; + } else if (sig32 < INT16_MIN) { + softfloat_exceptionFlags |= softfloat_flag_invalid; + return i16_fromNegOverflow; + } else { + return sig32; + } +} + diff --git a/source/f16_to_ui16.c b/source/f16_to_ui16.c new file mode 100644 index 00000000..b84b018a --- /dev/null +++ b/source/f16_to_ui16.c @@ -0,0 +1,52 @@ + +/*============================================================================ + +This C source file is part of the SoftFloat IEEE Floating-Point Arithmetic +Package, Release 3e, by John R. Hauser. + +Copyright 2011, 2012, 2013, 2014, 2015, 2016, 2017 The Regents of the +University of California. All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + + 1. Redistributions of source code must retain the above copyright notice, + this list of conditions, and the following disclaimer. + + 2. Redistributions in binary form must reproduce the above copyright notice, + this list of conditions, and the following disclaimer in the documentation + and/or other materials provided with the distribution. + + 3. Neither the name of the University nor the names of its contributors may + be used to endorse or promote products derived from this software without + specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY +EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE +DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY +DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES +(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND +ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +=============================================================================*/ + +#include +#include "specialize.h" +#include "softfloat.h" + +uint_fast16_t f16_to_ui16( float16_t a, uint_fast8_t roundingMode, bool exact ) +{ + uint_fast32_t sig32 = f16_to_ui32(a, roundingMode, exact); + + if (sig32 > UINT16_MAX) { + softfloat_exceptionFlags |= softfloat_flag_invalid; + return ui16_fromPosOverflow; + } else { + return sig32; + } +} + diff --git a/source/f32_to_i16.c b/source/f32_to_i16.c new file mode 100644 index 00000000..8f5f111e --- /dev/null +++ b/source/f32_to_i16.c @@ -0,0 +1,55 @@ + +/*============================================================================ + +This C source file is part of the SoftFloat IEEE Floating-Point Arithmetic +Package, Release 3e, by John R. Hauser. + +Copyright 2011, 2012, 2013, 2014, 2015, 2016, 2017 The Regents of the +University of California. All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + + 1. Redistributions of source code must retain the above copyright notice, + this list of conditions, and the following disclaimer. + + 2. Redistributions in binary form must reproduce the above copyright notice, + this list of conditions, and the following disclaimer in the documentation + and/or other materials provided with the distribution. + + 3. Neither the name of the University nor the names of its contributors may + be used to endorse or promote products derived from this software without + specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY +EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE +DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY +DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES +(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND +ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +=============================================================================*/ + +#include +#include "specialize.h" +#include "softfloat.h" + +int_fast16_t f32_to_i16( float32_t a, uint_fast8_t roundingMode, bool exact ) +{ + int_fast32_t sig32 = f32_to_i32(a, roundingMode, exact); + + if (sig32 > INT16_MAX) { + softfloat_exceptionFlags |= softfloat_flag_invalid; + return i16_fromPosOverflow; + } else if (sig32 < INT16_MIN) { + softfloat_exceptionFlags |= softfloat_flag_invalid; + return i16_fromNegOverflow; + } else { + return sig32; + } +} + diff --git a/source/f32_to_ui16.c b/source/f32_to_ui16.c new file mode 100644 index 00000000..05c98a72 --- /dev/null +++ b/source/f32_to_ui16.c @@ -0,0 +1,51 @@ + +/*============================================================================ + +This C source file is part of the SoftFloat IEEE Floating-Point Arithmetic +Package, Release 3e, by John R. Hauser. + +Copyright 2011, 2012, 2013, 2014, 2015, 2016, 2017 The Regents of the +University of California. All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + + 1. Redistributions of source code must retain the above copyright notice, + this list of conditions, and the following disclaimer. + + 2. Redistributions in binary form must reproduce the above copyright notice, + this list of conditions, and the following disclaimer in the documentation + and/or other materials provided with the distribution. + + 3. Neither the name of the University nor the names of its contributors may + be used to endorse or promote products derived from this software without + specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY +EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE +DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY +DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES +(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND +ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +=============================================================================*/ + +#include +#include "specialize.h" +#include "softfloat.h" + +uint_fast16_t f32_to_ui16( float32_t a, uint_fast8_t roundingMode, bool exact ) +{ + uint_fast32_t sig32 = f32_to_ui32(a, roundingMode, exact); + + if (sig32 > UINT16_MAX) { + softfloat_exceptionFlags |= softfloat_flag_invalid; + return ui16_fromPosOverflow; + } else { + return sig32; + } +} diff --git a/source/include/softfloat.h b/source/include/softfloat.h index 7dbf771c..7b5238e3 100644 --- a/source/include/softfloat.h +++ b/source/include/softfloat.h @@ -137,8 +137,10 @@ void i64_to_f128M( int64_t, float128_t * ); /*---------------------------------------------------------------------------- | 16-bit (half-precision) floating-point operations. *----------------------------------------------------------------------------*/ +uint_fast16_t f16_to_ui16( float16_t, uint_fast8_t, bool ); uint_fast32_t f16_to_ui32( float16_t, uint_fast8_t, bool ); uint_fast64_t f16_to_ui64( float16_t, uint_fast8_t, bool ); +int_fast16_t f16_to_i16( float16_t, uint_fast8_t, bool ); int_fast32_t f16_to_i32( float16_t, uint_fast8_t, bool ); int_fast64_t f16_to_i64( float16_t, uint_fast8_t, bool ); uint_fast32_t f16_to_ui32_r_minMag( float16_t, bool ); @@ -170,6 +172,7 @@ bool f16_eq_signaling( float16_t, float16_t ); bool f16_le_quiet( float16_t, float16_t ); bool f16_lt_quiet( float16_t, float16_t ); bool f16_isSignalingNaN( float16_t ); +uint_fast16_t f16_classify( float16_t ); /*---------------------------------------------------------------------------- | 16-bit (brain float 16) floating-point operations. @@ -181,8 +184,10 @@ bool bf16_isSignalingNaN( bfloat16_t ); /*---------------------------------------------------------------------------- | 32-bit (single-precision) floating-point operations. *----------------------------------------------------------------------------*/ +uint_fast16_t f32_to_ui16( float32_t, uint_fast8_t, bool ); uint_fast32_t f32_to_ui32( float32_t, uint_fast8_t, bool ); uint_fast64_t f32_to_ui64( float32_t, uint_fast8_t, bool ); +int_fast16_t f32_to_i16( float32_t, uint_fast8_t, bool ); int_fast32_t f32_to_i32( float32_t, uint_fast8_t, bool ); int_fast64_t f32_to_i64( float32_t, uint_fast8_t, bool ); uint_fast32_t f32_to_ui32_r_minMag( float32_t, bool ); From 1d452c6b1eb280e955c9e66baf19aef4526abd02 Mon Sep 17 00:00:00 2001 From: Chih-Min Chao Date: Wed, 27 May 2020 00:32:42 -0700 Subject: [PATCH 3/8] sf: add f16_to_[u]i8 APIs --- build/Linux-386-GCC/Makefile | 2 + build/Linux-386-SSE2-GCC/Makefile | 2 + build/Linux-ARM-VFPv2-GCC/Makefile | 2 + build/Linux-RISCV64-GCC/Makefile | 2 + build/Linux-x86_64-GCC/Makefile | 2 + build/Win32-MinGW/Makefile | 2 + build/Win32-SSE2-MinGW/Makefile | 2 + build/Win64-MinGW-w64/Makefile | 2 + build/template-FAST_INT64/Makefile | 2 + build/template-not-FAST_INT64/Makefile | 2 + source/8086-SSE/specialize.h | 7 +++ source/8086/specialize.h | 7 +++ source/ARM-VFPv2-defaultNaN/specialize.h | 7 +++ source/ARM-VFPv2/specialize.h | 7 +++ source/RISCV/specialize.h | 7 +++ source/f16_to_i16.c | 6 ++- source/f16_to_i8.c | 57 ++++++++++++++++++++++++ source/f16_to_ui16.c | 4 +- source/f16_to_ui8.c | 54 ++++++++++++++++++++++ source/f32_to_i16.c | 6 ++- source/f32_to_ui16.c | 4 +- source/include/softfloat.h | 2 + 22 files changed, 182 insertions(+), 6 deletions(-) create mode 100644 source/f16_to_i8.c create mode 100644 source/f16_to_ui8.c diff --git a/build/Linux-386-GCC/Makefile b/build/Linux-386-GCC/Makefile index 7015b570..1c56f450 100644 --- a/build/Linux-386-GCC/Makefile +++ b/build/Linux-386-GCC/Makefile @@ -174,9 +174,11 @@ OBJS_OTHERS = \ f16_to_ui64_r_minMag$(OBJ) \ f16_to_i32_r_minMag$(OBJ) \ f16_to_i64_r_minMag$(OBJ) \ + f16_to_ui8$(OBJ) \ f16_to_ui16$(OBJ) \ f16_to_f32$(OBJ) \ f16_to_f64$(OBJ) \ + f16_to_i8$(OBJ) \ f16_to_i16$(OBJ) \ f16_to_extF80M$(OBJ) \ f16_to_f128M$(OBJ) \ diff --git a/build/Linux-386-SSE2-GCC/Makefile b/build/Linux-386-SSE2-GCC/Makefile index c8fab35c..6542a2be 100644 --- a/build/Linux-386-SSE2-GCC/Makefile +++ b/build/Linux-386-SSE2-GCC/Makefile @@ -174,9 +174,11 @@ OBJS_OTHERS = \ f16_to_ui64_r_minMag$(OBJ) \ f16_to_i32_r_minMag$(OBJ) \ f16_to_i64_r_minMag$(OBJ) \ + f16_to_ui8$(OBJ) \ f16_to_ui16$(OBJ) \ f16_to_f32$(OBJ) \ f16_to_f64$(OBJ) \ + f16_to_i8$(OBJ) \ f16_to_i16$(OBJ) \ f16_to_extF80M$(OBJ) \ f16_to_f128M$(OBJ) \ diff --git a/build/Linux-ARM-VFPv2-GCC/Makefile b/build/Linux-ARM-VFPv2-GCC/Makefile index dc1b430a..f0c5c39e 100644 --- a/build/Linux-ARM-VFPv2-GCC/Makefile +++ b/build/Linux-ARM-VFPv2-GCC/Makefile @@ -172,9 +172,11 @@ OBJS_OTHERS = \ f16_to_ui64_r_minMag$(OBJ) \ f16_to_i32_r_minMag$(OBJ) \ f16_to_i64_r_minMag$(OBJ) \ + f16_to_ui8$(OBJ) \ f16_to_ui16$(OBJ) \ f16_to_f32$(OBJ) \ f16_to_f64$(OBJ) \ + f16_to_i8$(OBJ) \ f16_to_i16$(OBJ) \ f16_to_extF80M$(OBJ) \ f16_to_f128M$(OBJ) \ diff --git a/build/Linux-RISCV64-GCC/Makefile b/build/Linux-RISCV64-GCC/Makefile index 3503b7cb..2d3b3f13 100644 --- a/build/Linux-RISCV64-GCC/Makefile +++ b/build/Linux-RISCV64-GCC/Makefile @@ -188,9 +188,11 @@ OBJS_OTHERS = \ f16_to_ui64_r_minMag$(OBJ) \ f16_to_i32_r_minMag$(OBJ) \ f16_to_i64_r_minMag$(OBJ) \ + f16_to_ui8$(OBJ) \ f16_to_ui16$(OBJ) \ f16_to_f32$(OBJ) \ f16_to_f64$(OBJ) \ + f16_to_i8$(OBJ) \ f16_to_i16$(OBJ) \ f16_to_extF80$(OBJ) \ f16_to_extF80M$(OBJ) \ diff --git a/build/Linux-x86_64-GCC/Makefile b/build/Linux-x86_64-GCC/Makefile index 90b1e825..b153075f 100644 --- a/build/Linux-x86_64-GCC/Makefile +++ b/build/Linux-x86_64-GCC/Makefile @@ -186,9 +186,11 @@ OBJS_OTHERS = \ f16_to_ui64_r_minMag$(OBJ) \ f16_to_i32_r_minMag$(OBJ) \ f16_to_i64_r_minMag$(OBJ) \ + f16_to_ui8$(OBJ) \ f16_to_ui16$(OBJ) \ f16_to_f32$(OBJ) \ f16_to_f64$(OBJ) \ + f16_to_i8$(OBJ) \ f16_to_i16$(OBJ) \ f16_to_extF80$(OBJ) \ f16_to_extF80M$(OBJ) \ diff --git a/build/Win32-MinGW/Makefile b/build/Win32-MinGW/Makefile index 7015b570..1c56f450 100644 --- a/build/Win32-MinGW/Makefile +++ b/build/Win32-MinGW/Makefile @@ -174,9 +174,11 @@ OBJS_OTHERS = \ f16_to_ui64_r_minMag$(OBJ) \ f16_to_i32_r_minMag$(OBJ) \ f16_to_i64_r_minMag$(OBJ) \ + f16_to_ui8$(OBJ) \ f16_to_ui16$(OBJ) \ f16_to_f32$(OBJ) \ f16_to_f64$(OBJ) \ + f16_to_i8$(OBJ) \ f16_to_i16$(OBJ) \ f16_to_extF80M$(OBJ) \ f16_to_f128M$(OBJ) \ diff --git a/build/Win32-SSE2-MinGW/Makefile b/build/Win32-SSE2-MinGW/Makefile index c8fab35c..6542a2be 100644 --- a/build/Win32-SSE2-MinGW/Makefile +++ b/build/Win32-SSE2-MinGW/Makefile @@ -174,9 +174,11 @@ OBJS_OTHERS = \ f16_to_ui64_r_minMag$(OBJ) \ f16_to_i32_r_minMag$(OBJ) \ f16_to_i64_r_minMag$(OBJ) \ + f16_to_ui8$(OBJ) \ f16_to_ui16$(OBJ) \ f16_to_f32$(OBJ) \ f16_to_f64$(OBJ) \ + f16_to_i8$(OBJ) \ f16_to_i16$(OBJ) \ f16_to_extF80M$(OBJ) \ f16_to_f128M$(OBJ) \ diff --git a/build/Win64-MinGW-w64/Makefile b/build/Win64-MinGW-w64/Makefile index ebf18f36..cf906f51 100644 --- a/build/Win64-MinGW-w64/Makefile +++ b/build/Win64-MinGW-w64/Makefile @@ -180,9 +180,11 @@ OBJS_OTHERS = \ f16_to_ui64_r_minMag$(OBJ) \ f16_to_i32_r_minMag$(OBJ) \ f16_to_i64_r_minMag$(OBJ) \ + f16_to_ui8$(OBJ) \ f16_to_ui16$(OBJ) \ f16_to_f32$(OBJ) \ f16_to_f64$(OBJ) \ + f16_to_i8$(OBJ) \ f16_to_i16$(OBJ) \ f16_to_extF80$(OBJ) \ f16_to_extF80M$(OBJ) \ diff --git a/build/template-FAST_INT64/Makefile b/build/template-FAST_INT64/Makefile index 2d29a72c..37abe21a 100644 --- a/build/template-FAST_INT64/Makefile +++ b/build/template-FAST_INT64/Makefile @@ -185,9 +185,11 @@ OBJS_OTHERS = \ f16_to_ui64_r_minMag$(OBJ) \ f16_to_i32_r_minMag$(OBJ) \ f16_to_i64_r_minMag$(OBJ) \ + f16_to_ui8$(OBJ) \ f16_to_ui16$(OBJ) \ f16_to_f32$(OBJ) \ f16_to_f64$(OBJ) \ + f16_to_i8$(OBJ) \ f16_to_i16$(OBJ) \ f16_to_extF80$(OBJ) \ f16_to_extF80M$(OBJ) \ diff --git a/build/template-not-FAST_INT64/Makefile b/build/template-not-FAST_INT64/Makefile index 028da40a..78239a5f 100644 --- a/build/template-not-FAST_INT64/Makefile +++ b/build/template-not-FAST_INT64/Makefile @@ -174,9 +174,11 @@ OBJS_OTHERS = \ f16_to_ui64_r_minMag$(OBJ) \ f16_to_i32_r_minMag$(OBJ) \ f16_to_i64_r_minMag$(OBJ) \ + f16_to_ui8$(OBJ) \ f16_to_ui16$(OBJ) \ f16_to_f32$(OBJ) \ f16_to_f64$(OBJ) \ + f16_to_i8$(OBJ) \ f16_to_i16$(OBJ) \ f16_to_extF80M$(OBJ) \ f16_to_f128M$(OBJ) \ diff --git a/source/8086-SSE/specialize.h b/source/8086-SSE/specialize.h index 3a562592..2f538b07 100644 --- a/source/8086-SSE/specialize.h +++ b/source/8086-SSE/specialize.h @@ -51,6 +51,13 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | The values to return on conversions to 32-bit integer formats that raise an | invalid exception. *----------------------------------------------------------------------------*/ +#define ui8_fromPosOverflow 0xFF +#define ui8_fromNegOverflow 0 +#define ui8_fromNaN 0xFF +#define i8_fromPosOverflow 0x7F +#define i8_fromNegOverflow (-0x7F - 1) +#define i8_fromNaN 0x7F + #define ui16_fromPosOverflow 0xFFFF #define ui16_fromNegOverflow 0 #define ui16_fromNaN 0xFFFF diff --git a/source/8086/specialize.h b/source/8086/specialize.h index d7ca1dee..0fd44993 100644 --- a/source/8086/specialize.h +++ b/source/8086/specialize.h @@ -51,6 +51,13 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | The values to return on conversions to 32-bit integer formats that raise an | invalid exception. *----------------------------------------------------------------------------*/ +#define ui8_fromPosOverflow 0xFF +#define ui8_fromNegOverflow 0 +#define ui8_fromNaN 0xFF +#define i8_fromPosOverflow 0x7F +#define i8_fromNegOverflow (-0x7F - 1) +#define i8_fromNaN 0x7F + #define ui16_fromPosOverflow 0xFFFF #define ui16_fromNegOverflow 0 #define ui16_fromNaN 0xFFFF diff --git a/source/ARM-VFPv2-defaultNaN/specialize.h b/source/ARM-VFPv2-defaultNaN/specialize.h index 63dbd534..974fed12 100644 --- a/source/ARM-VFPv2-defaultNaN/specialize.h +++ b/source/ARM-VFPv2-defaultNaN/specialize.h @@ -51,6 +51,13 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | The values to return on conversions to 32-bit integer formats that raise an | invalid exception. *----------------------------------------------------------------------------*/ +#define ui8_fromPosOverflow 0xFF +#define ui8_fromNegOverflow 0 +#define ui8_fromNaN 0xFF +#define i8_fromPosOverflow 0x7F +#define i8_fromNegOverflow (-0x7F - 1) +#define i8_fromNaN 0x7F + #define ui16_fromPosOverflow 0xFFFF #define ui16_fromNegOverflow 0 #define ui16_fromNaN 0xFFFF diff --git a/source/ARM-VFPv2/specialize.h b/source/ARM-VFPv2/specialize.h index 6e2351c0..d7b5bfba 100644 --- a/source/ARM-VFPv2/specialize.h +++ b/source/ARM-VFPv2/specialize.h @@ -51,6 +51,13 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | The values to return on conversions to 32-bit integer formats that raise an | invalid exception. *----------------------------------------------------------------------------*/ +#define ui8_fromPosOverflow 0xFF +#define ui8_fromNegOverflow 0 +#define ui8_fromNaN 0xFF +#define i8_fromPosOverflow 0x7F +#define i8_fromNegOverflow (-0x7F - 1) +#define i8_fromNaN 0x7F + #define ui16_fromPosOverflow 0xFFFF #define ui16_fromNegOverflow 0 #define ui16_fromNaN 0xFFFF diff --git a/source/RISCV/specialize.h b/source/RISCV/specialize.h index 0beb2e89..ca62b0d2 100644 --- a/source/RISCV/specialize.h +++ b/source/RISCV/specialize.h @@ -51,6 +51,13 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | The values to return on conversions to 32-bit integer formats that raise an | invalid exception. *----------------------------------------------------------------------------*/ +#define ui8_fromPosOverflow 0xFF +#define ui8_fromNegOverflow 0 +#define ui8_fromNaN 0xFF +#define i8_fromPosOverflow 0x7F +#define i8_fromNegOverflow (-0x7F - 1) +#define i8_fromNaN 0x7F + #define ui16_fromPosOverflow 0xFFFF #define ui16_fromNegOverflow 0 #define ui16_fromNaN 0xFFFF diff --git a/source/f16_to_i16.c b/source/f16_to_i16.c index 05726754..3cd0e481 100644 --- a/source/f16_to_i16.c +++ b/source/f16_to_i16.c @@ -40,13 +40,15 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. int_fast16_t f16_to_i16( float16_t a, uint_fast8_t roundingMode, bool exact ) { + uint_fast8_t old_flags = softfloat_exceptionFlags; + int_fast32_t sig32 = f16_to_i32(a, roundingMode, exact); if (sig32 > INT16_MAX) { - softfloat_exceptionFlags |= softfloat_flag_invalid; + softfloat_exceptionFlags = old_flags | softfloat_flag_invalid; return i16_fromPosOverflow; } else if (sig32 < INT16_MIN) { - softfloat_exceptionFlags |= softfloat_flag_invalid; + softfloat_exceptionFlags = old_flags | softfloat_flag_invalid; return i16_fromNegOverflow; } else { return sig32; diff --git a/source/f16_to_i8.c b/source/f16_to_i8.c new file mode 100644 index 00000000..f9f8fe53 --- /dev/null +++ b/source/f16_to_i8.c @@ -0,0 +1,57 @@ + +/*============================================================================ + +This C source file is part of the SoftFloat IEEE Floating-Point Arithmetic +Package, Release 3e, by John R. Hauser. + +Copyright 2011, 2012, 2013, 2014, 2015, 2016, 2017 The Regents of the +University of California. All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + + 1. Redistributions of source code must retain the above copyright notice, + this list of conditions, and the following disclaimer. + + 2. Redistributions in binary form must reproduce the above copyright notice, + this list of conditions, and the following disclaimer in the documentation + and/or other materials provided with the distribution. + + 3. Neither the name of the University nor the names of its contributors may + be used to endorse or promote products derived from this software without + specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY +EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE +DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY +DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES +(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND +ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +=============================================================================*/ + +#include +#include "specialize.h" +#include "softfloat.h" + +int_fast8_t f16_to_i8( float16_t a, uint_fast8_t roundingMode, bool exact ) +{ + uint_fast8_t old_flags = softfloat_exceptionFlags; + + int_fast32_t sig32 = f16_to_i32(a, roundingMode, exact); + + if (sig32 > INT8_MAX) { + softfloat_exceptionFlags = old_flags | softfloat_flag_invalid; + return i8_fromPosOverflow; + } else if (sig32 < INT8_MIN) { + softfloat_exceptionFlags = old_flags | softfloat_flag_invalid; + return i8_fromNegOverflow; + } else { + return sig32; + } +} + diff --git a/source/f16_to_ui16.c b/source/f16_to_ui16.c index b84b018a..5ad30e7e 100644 --- a/source/f16_to_ui16.c +++ b/source/f16_to_ui16.c @@ -40,10 +40,12 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. uint_fast16_t f16_to_ui16( float16_t a, uint_fast8_t roundingMode, bool exact ) { + uint_fast8_t old_flags = softfloat_exceptionFlags; + uint_fast32_t sig32 = f16_to_ui32(a, roundingMode, exact); if (sig32 > UINT16_MAX) { - softfloat_exceptionFlags |= softfloat_flag_invalid; + softfloat_exceptionFlags = old_flags | softfloat_flag_invalid; return ui16_fromPosOverflow; } else { return sig32; diff --git a/source/f16_to_ui8.c b/source/f16_to_ui8.c new file mode 100644 index 00000000..091a9ec9 --- /dev/null +++ b/source/f16_to_ui8.c @@ -0,0 +1,54 @@ + +/*============================================================================ + +This C source file is part of the SoftFloat IEEE Floating-Point Arithmetic +Package, Release 3e, by John R. Hauser. + +Copyright 2011, 2012, 2013, 2014, 2015, 2016, 2017 The Regents of the +University of California. All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + + 1. Redistributions of source code must retain the above copyright notice, + this list of conditions, and the following disclaimer. + + 2. Redistributions in binary form must reproduce the above copyright notice, + this list of conditions, and the following disclaimer in the documentation + and/or other materials provided with the distribution. + + 3. Neither the name of the University nor the names of its contributors may + be used to endorse or promote products derived from this software without + specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY +EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE +DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY +DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES +(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND +ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +=============================================================================*/ + +#include +#include "specialize.h" +#include "softfloat.h" + +uint_fast8_t f16_to_ui8( float16_t a, uint_fast8_t roundingMode, bool exact ) +{ + uint_fast8_t old_flags = softfloat_exceptionFlags; + + uint_fast32_t sig32 = f16_to_ui32(a, roundingMode, exact); + + if (sig32 > UINT8_MAX) { + softfloat_exceptionFlags = old_flags | softfloat_flag_invalid; + return ui8_fromPosOverflow; + } else { + return sig32; + } +} + diff --git a/source/f32_to_i16.c b/source/f32_to_i16.c index 8f5f111e..852e2598 100644 --- a/source/f32_to_i16.c +++ b/source/f32_to_i16.c @@ -40,13 +40,15 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. int_fast16_t f32_to_i16( float32_t a, uint_fast8_t roundingMode, bool exact ) { + uint_fast8_t old_flags = softfloat_exceptionFlags; + int_fast32_t sig32 = f32_to_i32(a, roundingMode, exact); if (sig32 > INT16_MAX) { - softfloat_exceptionFlags |= softfloat_flag_invalid; + softfloat_exceptionFlags = old_flags | softfloat_flag_invalid; return i16_fromPosOverflow; } else if (sig32 < INT16_MIN) { - softfloat_exceptionFlags |= softfloat_flag_invalid; + softfloat_exceptionFlags = old_flags | softfloat_flag_invalid; return i16_fromNegOverflow; } else { return sig32; diff --git a/source/f32_to_ui16.c b/source/f32_to_ui16.c index 05c98a72..3f8fc3c6 100644 --- a/source/f32_to_ui16.c +++ b/source/f32_to_ui16.c @@ -40,10 +40,12 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. uint_fast16_t f32_to_ui16( float32_t a, uint_fast8_t roundingMode, bool exact ) { + uint_fast8_t old_flags = softfloat_exceptionFlags; + uint_fast32_t sig32 = f32_to_ui32(a, roundingMode, exact); if (sig32 > UINT16_MAX) { - softfloat_exceptionFlags |= softfloat_flag_invalid; + softfloat_exceptionFlags = old_flags | softfloat_flag_invalid; return ui16_fromPosOverflow; } else { return sig32; diff --git a/source/include/softfloat.h b/source/include/softfloat.h index 7b5238e3..f4e70ead 100644 --- a/source/include/softfloat.h +++ b/source/include/softfloat.h @@ -137,9 +137,11 @@ void i64_to_f128M( int64_t, float128_t * ); /*---------------------------------------------------------------------------- | 16-bit (half-precision) floating-point operations. *----------------------------------------------------------------------------*/ +uint_fast8_t f16_to_ui8( float16_t, uint_fast8_t, bool ); uint_fast16_t f16_to_ui16( float16_t, uint_fast8_t, bool ); uint_fast32_t f16_to_ui32( float16_t, uint_fast8_t, bool ); uint_fast64_t f16_to_ui64( float16_t, uint_fast8_t, bool ); +int_fast8_t f16_to_i8( float16_t, uint_fast8_t, bool ); int_fast16_t f16_to_i16( float16_t, uint_fast8_t, bool ); int_fast32_t f16_to_i32( float16_t, uint_fast8_t, bool ); int_fast64_t f16_to_i64( float16_t, uint_fast8_t, bool ); From ae2df41823121082e6d1a0f371f22a10150fc78e Mon Sep 17 00:00:00 2001 From: Chih-Min Chao Date: Tue, 25 Aug 2020 21:24:25 -0700 Subject: [PATCH 4/8] softfloat: add reciprocal api --- build/Linux-386-GCC/Makefile | 1 + build/Linux-386-SSE2-GCC/Makefile | 1 + build/Linux-ARM-VFPv2-GCC/Makefile | 1 + build/Linux-RISCV64-GCC/Makefile | 1 + build/Linux-x86_64-GCC/Makefile | 1 + build/Win32-MinGW/Makefile | 1 + build/Win32-SSE2-MinGW/Makefile | 1 + build/Win64-MinGW-w64/Makefile | 1 + build/template-FAST_INT64/Makefile | 1 + build/template-not-FAST_INT64/Makefile | 1 + source/fall_reciprocal.c | 392 +++++++++++++++++++++++++ source/include/softfloat.h | 8 + 12 files changed, 410 insertions(+) create mode 100644 source/fall_reciprocal.c diff --git a/build/Linux-386-GCC/Makefile b/build/Linux-386-GCC/Makefile index 1c56f450..05c9f8f9 100644 --- a/build/Linux-386-GCC/Makefile +++ b/build/Linux-386-GCC/Makefile @@ -236,6 +236,7 @@ OBJS_OTHERS = \ f64_to_i32_r_minMag$(OBJ) \ f64_to_i64_r_minMag$(OBJ) \ fall_maxmin$(OBJ) \ + fall_reciprocal$(OBJ) \ f64_to_f16$(OBJ) \ f64_to_f32$(OBJ) \ f64_to_extF80M$(OBJ) \ diff --git a/build/Linux-386-SSE2-GCC/Makefile b/build/Linux-386-SSE2-GCC/Makefile index 6542a2be..67ad74aa 100644 --- a/build/Linux-386-SSE2-GCC/Makefile +++ b/build/Linux-386-SSE2-GCC/Makefile @@ -236,6 +236,7 @@ OBJS_OTHERS = \ f64_to_i32_r_minMag$(OBJ) \ f64_to_i64_r_minMag$(OBJ) \ fall_maxmin$(OBJ) \ + fall_reciprocal$(OBJ) \ f64_to_f16$(OBJ) \ f64_to_f32$(OBJ) \ f64_to_extF80M$(OBJ) \ diff --git a/build/Linux-ARM-VFPv2-GCC/Makefile b/build/Linux-ARM-VFPv2-GCC/Makefile index f0c5c39e..827aa5ab 100644 --- a/build/Linux-ARM-VFPv2-GCC/Makefile +++ b/build/Linux-ARM-VFPv2-GCC/Makefile @@ -234,6 +234,7 @@ OBJS_OTHERS = \ f64_to_i32_r_minMag$(OBJ) \ f64_to_i64_r_minMag$(OBJ) \ fall_maxmin$(OBJ) \ + fall_reciprocal$(OBJ) \ f64_to_f16$(OBJ) \ f64_to_f32$(OBJ) \ f64_to_extF80M$(OBJ) \ diff --git a/build/Linux-RISCV64-GCC/Makefile b/build/Linux-RISCV64-GCC/Makefile index 2d3b3f13..a6bcccde 100644 --- a/build/Linux-RISCV64-GCC/Makefile +++ b/build/Linux-RISCV64-GCC/Makefile @@ -255,6 +255,7 @@ OBJS_OTHERS = \ f64_to_i32_r_minMag$(OBJ) \ f64_to_i64_r_minMag$(OBJ) \ fall_maxmin$(OBJ) \ + fall_reciprocal$(OBJ) \ f64_to_f16$(OBJ) \ f64_to_f32$(OBJ) \ f64_to_extF80$(OBJ) \ diff --git a/build/Linux-x86_64-GCC/Makefile b/build/Linux-x86_64-GCC/Makefile index b153075f..381e0c1e 100644 --- a/build/Linux-x86_64-GCC/Makefile +++ b/build/Linux-x86_64-GCC/Makefile @@ -253,6 +253,7 @@ OBJS_OTHERS = \ f64_to_i32_r_minMag$(OBJ) \ f64_to_i64_r_minMag$(OBJ) \ fall_maxmin$(OBJ) \ + fall_reciprocal$(OBJ) \ f64_to_f16$(OBJ) \ f64_to_f32$(OBJ) \ f64_to_extF80$(OBJ) \ diff --git a/build/Win32-MinGW/Makefile b/build/Win32-MinGW/Makefile index 1c56f450..05c9f8f9 100644 --- a/build/Win32-MinGW/Makefile +++ b/build/Win32-MinGW/Makefile @@ -236,6 +236,7 @@ OBJS_OTHERS = \ f64_to_i32_r_minMag$(OBJ) \ f64_to_i64_r_minMag$(OBJ) \ fall_maxmin$(OBJ) \ + fall_reciprocal$(OBJ) \ f64_to_f16$(OBJ) \ f64_to_f32$(OBJ) \ f64_to_extF80M$(OBJ) \ diff --git a/build/Win32-SSE2-MinGW/Makefile b/build/Win32-SSE2-MinGW/Makefile index 6542a2be..67ad74aa 100644 --- a/build/Win32-SSE2-MinGW/Makefile +++ b/build/Win32-SSE2-MinGW/Makefile @@ -236,6 +236,7 @@ OBJS_OTHERS = \ f64_to_i32_r_minMag$(OBJ) \ f64_to_i64_r_minMag$(OBJ) \ fall_maxmin$(OBJ) \ + fall_reciprocal$(OBJ) \ f64_to_f16$(OBJ) \ f64_to_f32$(OBJ) \ f64_to_extF80M$(OBJ) \ diff --git a/build/Win64-MinGW-w64/Makefile b/build/Win64-MinGW-w64/Makefile index cf906f51..609a43c6 100644 --- a/build/Win64-MinGW-w64/Makefile +++ b/build/Win64-MinGW-w64/Makefile @@ -246,6 +246,7 @@ OBJS_OTHERS = \ f64_to_i32_r_minMag$(OBJ) \ f64_to_i64_r_minMag$(OBJ) \ fall_maxmin$(OBJ) \ + fall_reciprocal$(OBJ) \ f64_to_f16$(OBJ) \ f64_to_f32$(OBJ) \ f64_to_extF80$(OBJ) \ diff --git a/build/template-FAST_INT64/Makefile b/build/template-FAST_INT64/Makefile index 37abe21a..53eeed27 100644 --- a/build/template-FAST_INT64/Makefile +++ b/build/template-FAST_INT64/Makefile @@ -252,6 +252,7 @@ OBJS_OTHERS = \ f64_to_i32_r_minMag$(OBJ) \ f64_to_i64_r_minMag$(OBJ) \ fall_maxmin$(OBJ) \ + fall_reciprocal$(OBJ) \ f64_to_f16$(OBJ) \ f64_to_f32$(OBJ) \ f64_to_extF80$(OBJ) \ diff --git a/build/template-not-FAST_INT64/Makefile b/build/template-not-FAST_INT64/Makefile index 78239a5f..ea35b4ab 100644 --- a/build/template-not-FAST_INT64/Makefile +++ b/build/template-not-FAST_INT64/Makefile @@ -236,6 +236,7 @@ OBJS_OTHERS = \ f64_to_i32_r_minMag$(OBJ) \ f64_to_i64_r_minMag$(OBJ) \ fall_maxmin$(OBJ) \ + fall_reciprocal$(OBJ) \ f64_to_f16$(OBJ) \ f64_to_f32$(OBJ) \ f64_to_extF80M$(OBJ) \ diff --git a/source/fall_reciprocal.c b/source/fall_reciprocal.c new file mode 100644 index 00000000..28e7530e --- /dev/null +++ b/source/fall_reciprocal.c @@ -0,0 +1,392 @@ + +/*============================================================================ + +This C source file is part of the SoftFloat IEEE Floating-Point Arithmetic +Package, Release 3e, by John R. Hauser. + +Copyright 2011, 2012, 2013, 2014, 2015, 2016 The Regents of the University of +California. All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + + 1. Redistributions of source code must retain the above copyright notice, + this list of conditions, and the following disclaimer. + + 2. Redistributions in binary form must reproduce the above copyright notice, + this list of conditions, and the following disclaimer in the documentation + and/or other materials provided with the distribution. + + 3. Neither the name of the University nor the names of its contributors may + be used to endorse or promote products derived from this software without + specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY +EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE +DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY +DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES +(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND +ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +=============================================================================*/ + +#include +#include +#include +#include "platform.h" +#include "internals.h" +#include "specialize.h" +#include "softfloat.h" + +static inline uint64_t extract64(uint64_t val, int pos, int len) +{ + assert(pos >= 0 && len > 0 && len <= 64 - pos); + return (val >> pos) & (~UINT64_C(0) >> (64 - len)); +} + +static inline uint64_t make_mask64(int pos, int len) +{ + assert(pos >= 0 && len > 0 && pos < 64 && len <= 64); + return (UINT64_MAX >> (64 - len)) << pos; +} + +//user needs to truncate output to required length +static inline uint64_t rsqrte7(uint64_t val, int e, int s, bool sub) { + uint64_t exp = extract64(val, s, e); + uint64_t sig = extract64(val, 0, s); + uint64_t sign = extract64(val, s + e, 1); + const int p = 7; + + static const uint8_t table[] = { + 52, 51, 50, 48, 47, 46, 44, 43, + 42, 41, 40, 39, 38, 36, 35, 34, + 33, 32, 31, 30, 30, 29, 28, 27, + 26, 25, 24, 23, 23, 22, 21, 20, + 19, 19, 18, 17, 16, 16, 15, 14, + 14, 13, 12, 12, 11, 10, 10, 9, + 9, 8, 7, 7, 6, 6, 5, 4, + 4, 3, 3, 2, 2, 1, 1, 0, + 127, 125, 123, 121, 119, 118, 116, 114, + 113, 111, 109, 108, 106, 105, 103, 102, + 100, 99, 97, 96, 95, 93, 92, 91, + 90, 88, 87, 86, 85, 84, 83, 82, + 80, 79, 78, 77, 76, 75, 74, 73, + 72, 71, 70, 70, 69, 68, 67, 66, + 65, 64, 63, 63, 62, 61, 60, 59, + 59, 58, 57, 56, 56, 55, 54, 53}; + + if (sub) { + while (extract64(sig, s - 1, 1) == 0) + exp--, sig <<= 1; + + sig = (sig << 1) & make_mask64(0 ,s); + } + + int idx = ((exp & 1) << (p-1)) | (sig >> (s-p+1)); + uint64_t out_sig = (uint64_t)(table[idx]) << (s-p); + uint64_t out_exp = (3 * make_mask64(0, e - 1) + ~exp) / 2; + + return (sign << (s+e)) | (out_exp << s) | out_sig; +} + +float16_t f16_rsqrte7(float16_t in) +{ + union ui16_f16 uA; + + uA.f = in; + unsigned int ret = f16_classify(in); + bool sub = false; + switch(ret) { + case 0x001: // -inf + case 0x002: // -normal + case 0x004: // -subnormal + case 0x100: // sNaN + softfloat_exceptionFlags |= softfloat_flag_invalid; + case 0x200: //qNaN + uA.ui = defaultNaNF16UI; + break; + case 0x008: // -0 + uA.ui = 0xfc00; + softfloat_exceptionFlags |= softfloat_flag_infinite; + break; + case 0x010: // +0 + uA.ui = 0x7c00; + softfloat_exceptionFlags |= softfloat_flag_infinite; + break; + case 0x080: //+inf + uA.ui = 0x0; + break; + case 0x020: //+ sub + sub = true; + default: // +num + uA.ui = rsqrte7(uA.ui, 5, 10, sub); + break; + } + + return uA.f; +} + +float32_t f32_rsqrte7(float32_t in) +{ + union ui32_f32 uA; + + uA.f = in; + unsigned int ret = f32_classify(in); + bool sub = false; + switch(ret) { + case 0x001: // -inf + case 0x002: // -normal + case 0x004: // -subnormal + case 0x100: // sNaN + softfloat_exceptionFlags |= softfloat_flag_invalid; + case 0x200: //qNaN + uA.ui = defaultNaNF32UI; + break; + case 0x008: // -0 + uA.ui = 0xff800000; + softfloat_exceptionFlags |= softfloat_flag_infinite; + break; + case 0x010: // +0 + uA.ui = 0x7f800000; + softfloat_exceptionFlags |= softfloat_flag_infinite; + break; + case 0x080: //+inf + uA.ui = 0x0; + break; + case 0x020: //+ sub + sub = true; + default: // +num + uA.ui = rsqrte7(uA.ui, 8, 23, sub); + break; + } + + return uA.f; +} + +float64_t f64_rsqrte7(float64_t in) +{ + union ui64_f64 uA; + + uA.f = in; + unsigned int ret = f64_classify(in); + bool sub = false; + switch(ret) { + case 0x001: // -inf + case 0x002: // -normal + case 0x004: // -subnormal + case 0x100: // sNaN + softfloat_exceptionFlags |= softfloat_flag_invalid; + case 0x200: //qNaN + uA.ui = defaultNaNF64UI; + break; + case 0x008: // -0 + uA.ui = 0xfff0000000000000ul; + softfloat_exceptionFlags |= softfloat_flag_infinite; + break; + case 0x010: // +0 + uA.ui = 0x7ff0000000000000ul; + softfloat_exceptionFlags |= softfloat_flag_infinite; + break; + case 0x080: //+inf + uA.ui = 0x0; + break; + case 0x020: //+ sub + sub = true; + default: // +num + uA.ui = rsqrte7(uA.ui, 11, 52, sub); + break; + } + + return uA.f; +} + +//user needs to truncate output to required length +static inline uint64_t recip7(uint64_t val, int e, int s, int rm, bool sub, + bool *round_abnormal) +{ + uint64_t exp = extract64(val, s, e); + uint64_t sig = extract64(val, 0, s); + uint64_t sign = extract64(val, s + e, 1); + const int p = 7; + + static const uint8_t table[] = { + 127, 125, 123, 121, 119, 117, 116, 114, + 112, 110, 109, 107, 105, 104, 102, 100, + 99, 97, 96, 94, 93, 91, 90, 88, + 87, 85, 84, 83, 81, 80, 79, 77, + 76, 75, 74, 72, 71, 70, 69, 68, + 66, 65, 64, 63, 62, 61, 60, 59, + 58, 57, 56, 55, 54, 53, 52, 51, + 50, 49, 48, 47, 46, 45, 44, 43, + 42, 41, 40, 40, 39, 38, 37, 36, + 35, 35, 34, 33, 32, 31, 31, 30, + 29, 28, 28, 27, 26, 25, 25, 24, + 23, 23, 22, 21, 21, 20, 19, 19, + 18, 17, 17, 16, 15, 15, 14, 14, + 13, 12, 12, 11, 11, 10, 9, 9, + 8, 8, 7, 7, 6, 5, 5, 4, + 4, 3, 3, 2, 2, 1, 1, 0}; + + if (sub) { + while (extract64(sig, s - 1, 1) == 0) + exp--, sig <<= 1; + + sig = (sig << 1) & make_mask64(0 ,s); + + if (exp != 0 && exp != UINT64_MAX) { + *round_abnormal = true; + if (rm == 1 || + (rm == 2 && !sign) || + (rm == 3 && sign)) + return ((sign << (s+e)) | make_mask64(s, e)) - 1; + else + return (sign << (s+e)) | make_mask64(s, e); + } + } + + int idx = sig >> (s-p); + uint64_t out_sig = (uint64_t)(table[idx]) << (s-p); + uint64_t out_exp = 2 * make_mask64(0, e - 1) + ~exp; + if (out_exp == 0 || out_exp == UINT64_MAX) { + out_sig = (out_sig >> 1) | make_mask64(s - 1, 1); + if (out_exp == UINT64_MAX) { + out_sig >>= 1; + out_exp = 0; + } + } + + return (sign << (s+e)) | (out_exp << s) | out_sig; +} + +float16_t f16_recip7(float16_t in) +{ + union ui16_f16 uA; + + uA.f = in; + unsigned int ret = f16_classify(in); + bool sub = false; + bool round_abnormal = false; + switch(ret) { + case 0x001: // -inf + uA.ui = 0x8000; + break; + case 0x080: //+inf + uA.ui = 0x0; + break; + case 0x008: // -0 + uA.ui = 0xfc00; + softfloat_exceptionFlags |= softfloat_flag_infinite; + break; + case 0x010: // +0 + uA.ui = 0x7c00; + softfloat_exceptionFlags |= softfloat_flag_infinite; + break; + case 0x100: // sNaN + softfloat_exceptionFlags |= softfloat_flag_invalid; + case 0x200: //qNaN + uA.ui = defaultNaNF16UI; + break; + case 0x004: // -subnormal + case 0x020: //+ sub + sub = true; + default: // +- normal + uA.ui = recip7(uA.ui, 5, 10, + softfloat_roundingMode, sub, &round_abnormal); + if (round_abnormal) + softfloat_exceptionFlags |= softfloat_flag_inexact | + softfloat_flag_overflow; + break; + } + + return uA.f; +} + +float32_t f32_recip7(float32_t in) +{ + union ui32_f32 uA; + + uA.f = in; + unsigned int ret = f32_classify(in); + bool sub = false; + bool round_abnormal = false; + switch(ret) { + case 0x001: // -inf + uA.ui = 0x80000000; + break; + case 0x080: //+inf + uA.ui = 0x0; + break; + case 0x008: // -0 + uA.ui = 0xff800000; + softfloat_exceptionFlags |= softfloat_flag_infinite; + break; + case 0x010: // +0 + uA.ui = 0x7f800000; + softfloat_exceptionFlags |= softfloat_flag_infinite; + break; + case 0x100: // sNaN + softfloat_exceptionFlags |= softfloat_flag_invalid; + case 0x200: //qNaN + uA.ui = defaultNaNF32UI; + break; + case 0x004: // -subnormal + case 0x020: //+ sub + sub = true; + default: // +- normal + uA.ui = recip7(uA.ui, 8, 23, + softfloat_roundingMode, sub, &round_abnormal); + if (round_abnormal) + softfloat_exceptionFlags |= softfloat_flag_inexact | + softfloat_flag_overflow; + break; + } + + return uA.f; +} + +float64_t f64_recip7(float64_t in) +{ + union ui64_f64 uA; + + uA.f = in; + unsigned int ret = f64_classify(in); + bool sub = false; + bool round_abnormal = false; + switch(ret) { + case 0x001: // -inf + uA.ui = 0x8000000000000000; + break; + case 0x080: //+inf + uA.ui = 0x0; + break; + case 0x008: // -0 + uA.ui = 0xfff0000000000000; + softfloat_exceptionFlags |= softfloat_flag_infinite; + break; + case 0x010: // +0 + uA.ui = 0x7ff0000000000000; + softfloat_exceptionFlags |= softfloat_flag_infinite; + break; + case 0x100: // sNaN + softfloat_exceptionFlags |= softfloat_flag_invalid; + case 0x200: //qNaN + uA.ui = defaultNaNF64UI; + break; + case 0x004: // -subnormal + case 0x020: //+ sub + sub = true; + default: // +- normal + uA.ui = recip7(uA.ui, 11, 52, + softfloat_roundingMode, sub, &round_abnormal); + if (round_abnormal) + softfloat_exceptionFlags |= softfloat_flag_inexact | + softfloat_flag_overflow; + break; + } + + return uA.f; +} diff --git a/source/include/softfloat.h b/source/include/softfloat.h index f4e70ead..73bfcaba 100644 --- a/source/include/softfloat.h +++ b/source/include/softfloat.h @@ -175,6 +175,8 @@ bool f16_le_quiet( float16_t, float16_t ); bool f16_lt_quiet( float16_t, float16_t ); bool f16_isSignalingNaN( float16_t ); uint_fast16_t f16_classify( float16_t ); +float16_t f16_rsqrte7( float16_t ); +float16_t f16_recip7( float16_t ); /*---------------------------------------------------------------------------- | 16-bit (brain float 16) floating-point operations. @@ -221,6 +223,9 @@ bool f32_eq_signaling( float32_t, float32_t ); bool f32_le_quiet( float32_t, float32_t ); bool f32_lt_quiet( float32_t, float32_t ); bool f32_isSignalingNaN( float32_t ); +uint_fast16_t f32_classify( float32_t ); +float32_t f32_rsqrte7( float32_t ); +float32_t f32_recip7( float32_t ); /*---------------------------------------------------------------------------- | 64-bit (double-precision) floating-point operations. @@ -258,6 +263,9 @@ bool f64_eq_signaling( float64_t, float64_t ); bool f64_le_quiet( float64_t, float64_t ); bool f64_lt_quiet( float64_t, float64_t ); bool f64_isSignalingNaN( float64_t ); +uint_fast16_t f64_classify( float64_t ); +float64_t f64_rsqrte7( float64_t ); +float64_t f64_recip7( float64_t ); /*---------------------------------------------------------------------------- | Rounding precision for 80-bit extended double-precision floating-point. From 20486de4da39c8d87966c530f6ff67bfff1b942e Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Thu, 22 Sep 2022 17:31:47 -0700 Subject: [PATCH 5/8] Suppress unused-paramter warnings in softfloat --- source/ARM-VFPv2-defaultNaN/specialize.h | 6 ++++++ source/RISCV/specialize.h | 6 ++++++ 2 files changed, 12 insertions(+) diff --git a/source/ARM-VFPv2-defaultNaN/specialize.h b/source/ARM-VFPv2-defaultNaN/specialize.h index 974fed12..f39a4518 100644 --- a/source/ARM-VFPv2-defaultNaN/specialize.h +++ b/source/ARM-VFPv2-defaultNaN/specialize.h @@ -234,6 +234,9 @@ INLINE struct uint128 softfloat_commonNaNToExtF80UI( const struct commonNaN *aPtr ) { struct uint128 uiZ; + + (void) aPtr; + uiZ.v64 = defaultNaNExtF80UI64; uiZ.v0 = defaultNaNExtF80UI0; return uiZ; @@ -291,6 +294,9 @@ INLINE struct uint128 softfloat_commonNaNToF128UI( const struct commonNaN *aPtr ) { struct uint128 uiZ; + + (void) aPtr; + uiZ.v64 = defaultNaNF128UI64; uiZ.v0 = defaultNaNF128UI0; return uiZ; diff --git a/source/RISCV/specialize.h b/source/RISCV/specialize.h index ca62b0d2..c4dcf243 100644 --- a/source/RISCV/specialize.h +++ b/source/RISCV/specialize.h @@ -260,6 +260,9 @@ INLINE struct uint128 softfloat_commonNaNToExtF80UI( const struct commonNaN *aPtr ) { struct uint128 uiZ; + + (void) aPtr; + uiZ.v64 = defaultNaNExtF80UI64; uiZ.v0 = defaultNaNExtF80UI0; return uiZ; @@ -317,6 +320,9 @@ INLINE struct uint128 softfloat_commonNaNToF128UI( const struct commonNaN *aPtr ) { struct uint128 uiZ; + + (void) aPtr; + uiZ.v64 = defaultNaNF128UI64; uiZ.v0 = defaultNaNF128UI0; return uiZ; From e191bdb19078502b23e6297629b677cb4d0dde86 Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Thu, 22 Sep 2022 18:35:58 -0700 Subject: [PATCH 6/8] Fix unused-variable warnings in softfloat --- source/ARM-VFPv2-defaultNaN/specialize.h | 8 ++++---- source/RISCV/specialize.h | 8 ++++---- 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/source/ARM-VFPv2-defaultNaN/specialize.h b/source/ARM-VFPv2-defaultNaN/specialize.h index f39a4518..984f51a2 100644 --- a/source/ARM-VFPv2-defaultNaN/specialize.h +++ b/source/ARM-VFPv2-defaultNaN/specialize.h @@ -107,7 +107,7 @@ struct commonNaN { char _unused; }; | location pointed to by 'zPtr'. If the NaN is a signaling NaN, the invalid | exception is raised. *----------------------------------------------------------------------------*/ -#define softfloat_f16UIToCommonNaN( uiA, zPtr ) if ( ! ((uiA) & 0x0200) ) softfloat_raiseFlags( softfloat_flag_invalid ) +#define softfloat_f16UIToCommonNaN( uiA, zPtr ) if ( ! ((uiA) & 0x0200) ) (void) (zPtr), softfloat_raiseFlags( softfloat_flag_invalid ) /*---------------------------------------------------------------------------- | Converts the common NaN pointed to by 'aPtr' into a 16-bit floating-point @@ -142,7 +142,7 @@ uint_fast16_t | location pointed to by 'zPtr'. If the NaN is a signaling NaN, the invalid | exception is raised. *----------------------------------------------------------------------------*/ -#define softfloat_f32UIToCommonNaN( uiA, zPtr ) if ( ! ((uiA) & 0x00400000) ) softfloat_raiseFlags( softfloat_flag_invalid ) +#define softfloat_f32UIToCommonNaN( uiA, zPtr ) if ( ! ((uiA) & 0x00400000) ) (void) (zPtr), softfloat_raiseFlags( softfloat_flag_invalid ) /*---------------------------------------------------------------------------- | Converts the common NaN pointed to by 'aPtr' into a 32-bit floating-point @@ -177,7 +177,7 @@ uint_fast32_t | location pointed to by 'zPtr'. If the NaN is a signaling NaN, the invalid | exception is raised. *----------------------------------------------------------------------------*/ -#define softfloat_f64UIToCommonNaN( uiA, zPtr ) if ( ! ((uiA) & UINT64_C( 0x0008000000000000 )) ) softfloat_raiseFlags( softfloat_flag_invalid ) +#define softfloat_f64UIToCommonNaN( uiA, zPtr ) if ( ! ((uiA) & UINT64_C( 0x0008000000000000 )) ) (void) (zPtr), softfloat_raiseFlags( softfloat_flag_invalid ) /*---------------------------------------------------------------------------- | Converts the common NaN pointed to by 'aPtr' into a 64-bit floating-point @@ -283,7 +283,7 @@ struct uint128 | pointed to by 'zPtr'. If the NaN is a signaling NaN, the invalid exception | is raised. *----------------------------------------------------------------------------*/ -#define softfloat_f128UIToCommonNaN( uiA64, uiA0, zPtr ) if ( ! ((uiA64) & UINT64_C( 0x0000800000000000 )) ) softfloat_raiseFlags( softfloat_flag_invalid ) +#define softfloat_f128UIToCommonNaN( uiA64, uiA0, zPtr ) if ( ! ((uiA64) & UINT64_C( 0x0000800000000000 )) ) (void) (zPtr), softfloat_raiseFlags( softfloat_flag_invalid ) /*---------------------------------------------------------------------------- | Converts the common NaN pointed to by 'aPtr' into a 128-bit floating-point diff --git a/source/RISCV/specialize.h b/source/RISCV/specialize.h index c4dcf243..ebe4b8bb 100644 --- a/source/RISCV/specialize.h +++ b/source/RISCV/specialize.h @@ -114,7 +114,7 @@ struct commonNaN { char _unused; }; | location pointed to by 'zPtr'. If the NaN is a signaling NaN, the invalid | exception is raised. *----------------------------------------------------------------------------*/ -#define softfloat_f16UIToCommonNaN( uiA, zPtr ) if ( ! ((uiA) & 0x0200) ) softfloat_raiseFlags( softfloat_flag_invalid ) +#define softfloat_f16UIToCommonNaN( uiA, zPtr ) if ( ! ((uiA) & 0x0200) ) (void) (zPtr), softfloat_raiseFlags( softfloat_flag_invalid ) /*---------------------------------------------------------------------------- | Assuming 'uiA' has the bit pattern of a 16-bit BF16 floating-point NaN, converts @@ -168,7 +168,7 @@ uint_fast16_t | location pointed to by 'zPtr'. If the NaN is a signaling NaN, the invalid | exception is raised. *----------------------------------------------------------------------------*/ -#define softfloat_f32UIToCommonNaN( uiA, zPtr ) if ( ! ((uiA) & 0x00400000) ) softfloat_raiseFlags( softfloat_flag_invalid ) +#define softfloat_f32UIToCommonNaN( uiA, zPtr ) if ( ! ((uiA) & 0x00400000) ) (void) (zPtr), softfloat_raiseFlags( softfloat_flag_invalid ) /*---------------------------------------------------------------------------- | Converts the common NaN pointed to by 'aPtr' into a 32-bit floating-point @@ -203,7 +203,7 @@ uint_fast32_t | location pointed to by 'zPtr'. If the NaN is a signaling NaN, the invalid | exception is raised. *----------------------------------------------------------------------------*/ -#define softfloat_f64UIToCommonNaN( uiA, zPtr ) if ( ! ((uiA) & UINT64_C( 0x0008000000000000 )) ) softfloat_raiseFlags( softfloat_flag_invalid ) +#define softfloat_f64UIToCommonNaN( uiA, zPtr ) if ( ! ((uiA) & UINT64_C( 0x0008000000000000 )) ) (void) (zPtr), softfloat_raiseFlags( softfloat_flag_invalid ) /*---------------------------------------------------------------------------- | Converts the common NaN pointed to by 'aPtr' into a 64-bit floating-point @@ -309,7 +309,7 @@ struct uint128 | pointed to by 'zPtr'. If the NaN is a signaling NaN, the invalid exception | is raised. *----------------------------------------------------------------------------*/ -#define softfloat_f128UIToCommonNaN( uiA64, uiA0, zPtr ) if ( ! ((uiA64) & UINT64_C( 0x0000800000000000 )) ) softfloat_raiseFlags( softfloat_flag_invalid ) +#define softfloat_f128UIToCommonNaN( uiA64, uiA0, zPtr ) if ( ! ((uiA64) & UINT64_C( 0x0000800000000000 )) ) (void) (zPtr), softfloat_raiseFlags( softfloat_flag_invalid ) /*---------------------------------------------------------------------------- | Converts the common NaN pointed to by 'aPtr' into a 128-bit floating-point From 25e2a6062d1ac5efd7945cc59c576c26c4f7342a Mon Sep 17 00:00:00 2001 From: Weiwei Li Date: Fri, 14 Apr 2023 22:35:12 +0800 Subject: [PATCH 7/8] Add convertion function between binary float16 and float32 in softfloat --- build/Linux-386-GCC/Makefile | 4 ++++ build/Linux-386-SSE2-GCC/Makefile | 4 ++++ build/Linux-ARM-VFPv2-GCC/Makefile | 4 ++++ build/Win32-MinGW/Makefile | 4 ++++ build/Win32-SSE2-MinGW/Makefile | 4 ++++ build/Win64-MinGW-w64/Makefile | 4 ++++ build/template-not-FAST_INT64/Makefile | 4 ++++ source/8086-SSE/specialize.h | 19 +++++++++++++++++++ source/8086/specialize.h | 19 +++++++++++++++++++ source/ARM-VFPv2-defaultNaN/specialize.h | 19 +++++++++++++++++++ source/ARM-VFPv2/specialize.h | 19 +++++++++++++++++++ 11 files changed, 104 insertions(+) diff --git a/build/Linux-386-GCC/Makefile b/build/Linux-386-GCC/Makefile index 05c9f8f9..fa16f2aa 100644 --- a/build/Linux-386-GCC/Makefile +++ b/build/Linux-386-GCC/Makefile @@ -112,6 +112,7 @@ OBJS_OTHERS = \ s_roundMToUI64$(OBJ) \ s_roundToI32$(OBJ) \ s_roundMToI64$(OBJ) \ + s_roundPackToBF16$(OBJ) \ s_normSubnormalF16Sig$(OBJ) \ s_roundPackToF16$(OBJ) \ s_normRoundPackToF16$(OBJ) \ @@ -166,6 +167,8 @@ OBJS_OTHERS = \ i64_to_f64$(OBJ) \ i64_to_extF80M$(OBJ) \ i64_to_f128M$(OBJ) \ + bf16_isSignalingNaN$(OBJ) \ + bf16_to_f32$(OBJ) \ f16_to_ui32$(OBJ) \ f16_to_ui64$(OBJ) \ f16_to_i32$(OBJ) \ @@ -207,6 +210,7 @@ OBJS_OTHERS = \ f32_to_i32_r_minMag$(OBJ) \ f32_to_i64_r_minMag$(OBJ) \ f32_to_ui16$(OBJ) \ + f32_to_bf16$(OBJ) \ f32_to_f16$(OBJ) \ f32_to_f64$(OBJ) \ f32_to_i16$(OBJ) \ diff --git a/build/Linux-386-SSE2-GCC/Makefile b/build/Linux-386-SSE2-GCC/Makefile index 67ad74aa..436501d1 100644 --- a/build/Linux-386-SSE2-GCC/Makefile +++ b/build/Linux-386-SSE2-GCC/Makefile @@ -112,6 +112,7 @@ OBJS_OTHERS = \ s_roundMToUI64$(OBJ) \ s_roundToI32$(OBJ) \ s_roundMToI64$(OBJ) \ + s_roundPackToBF16$(OBJ) \ s_normSubnormalF16Sig$(OBJ) \ s_roundPackToF16$(OBJ) \ s_normRoundPackToF16$(OBJ) \ @@ -166,6 +167,8 @@ OBJS_OTHERS = \ i64_to_f64$(OBJ) \ i64_to_extF80M$(OBJ) \ i64_to_f128M$(OBJ) \ + bf16_isSignalingNaN$(OBJ) \ + bf16_to_f32$(OBJ) \ f16_to_ui32$(OBJ) \ f16_to_ui64$(OBJ) \ f16_to_i32$(OBJ) \ @@ -207,6 +210,7 @@ OBJS_OTHERS = \ f32_to_i32_r_minMag$(OBJ) \ f32_to_i64_r_minMag$(OBJ) \ f32_to_ui16$(OBJ) \ + f32_to_bf16$(OBJ) \ f32_to_f16$(OBJ) \ f32_to_f64$(OBJ) \ f32_to_i16$(OBJ) \ diff --git a/build/Linux-ARM-VFPv2-GCC/Makefile b/build/Linux-ARM-VFPv2-GCC/Makefile index 827aa5ab..05ce42f1 100644 --- a/build/Linux-ARM-VFPv2-GCC/Makefile +++ b/build/Linux-ARM-VFPv2-GCC/Makefile @@ -110,6 +110,7 @@ OBJS_OTHERS = \ s_roundMToUI64$(OBJ) \ s_roundToI32$(OBJ) \ s_roundMToI64$(OBJ) \ + s_roundPackToBF16$(OBJ) \ s_normSubnormalF16Sig$(OBJ) \ s_roundPackToF16$(OBJ) \ s_normRoundPackToF16$(OBJ) \ @@ -164,6 +165,8 @@ OBJS_OTHERS = \ i64_to_f64$(OBJ) \ i64_to_extF80M$(OBJ) \ i64_to_f128M$(OBJ) \ + bf16_isSignalingNaN$(OBJ) \ + bf16_to_f32$(OBJ) \ f16_to_ui32$(OBJ) \ f16_to_ui64$(OBJ) \ f16_to_i32$(OBJ) \ @@ -205,6 +208,7 @@ OBJS_OTHERS = \ f32_to_i32_r_minMag$(OBJ) \ f32_to_i64_r_minMag$(OBJ) \ f32_to_ui16$(OBJ) \ + f32_to_bf16$(OBJ) \ f32_to_f16$(OBJ) \ f32_to_f64$(OBJ) \ f32_to_i16$(OBJ) \ diff --git a/build/Win32-MinGW/Makefile b/build/Win32-MinGW/Makefile index 05c9f8f9..fa16f2aa 100644 --- a/build/Win32-MinGW/Makefile +++ b/build/Win32-MinGW/Makefile @@ -112,6 +112,7 @@ OBJS_OTHERS = \ s_roundMToUI64$(OBJ) \ s_roundToI32$(OBJ) \ s_roundMToI64$(OBJ) \ + s_roundPackToBF16$(OBJ) \ s_normSubnormalF16Sig$(OBJ) \ s_roundPackToF16$(OBJ) \ s_normRoundPackToF16$(OBJ) \ @@ -166,6 +167,8 @@ OBJS_OTHERS = \ i64_to_f64$(OBJ) \ i64_to_extF80M$(OBJ) \ i64_to_f128M$(OBJ) \ + bf16_isSignalingNaN$(OBJ) \ + bf16_to_f32$(OBJ) \ f16_to_ui32$(OBJ) \ f16_to_ui64$(OBJ) \ f16_to_i32$(OBJ) \ @@ -207,6 +210,7 @@ OBJS_OTHERS = \ f32_to_i32_r_minMag$(OBJ) \ f32_to_i64_r_minMag$(OBJ) \ f32_to_ui16$(OBJ) \ + f32_to_bf16$(OBJ) \ f32_to_f16$(OBJ) \ f32_to_f64$(OBJ) \ f32_to_i16$(OBJ) \ diff --git a/build/Win32-SSE2-MinGW/Makefile b/build/Win32-SSE2-MinGW/Makefile index 67ad74aa..436501d1 100644 --- a/build/Win32-SSE2-MinGW/Makefile +++ b/build/Win32-SSE2-MinGW/Makefile @@ -112,6 +112,7 @@ OBJS_OTHERS = \ s_roundMToUI64$(OBJ) \ s_roundToI32$(OBJ) \ s_roundMToI64$(OBJ) \ + s_roundPackToBF16$(OBJ) \ s_normSubnormalF16Sig$(OBJ) \ s_roundPackToF16$(OBJ) \ s_normRoundPackToF16$(OBJ) \ @@ -166,6 +167,8 @@ OBJS_OTHERS = \ i64_to_f64$(OBJ) \ i64_to_extF80M$(OBJ) \ i64_to_f128M$(OBJ) \ + bf16_isSignalingNaN$(OBJ) \ + bf16_to_f32$(OBJ) \ f16_to_ui32$(OBJ) \ f16_to_ui64$(OBJ) \ f16_to_i32$(OBJ) \ @@ -207,6 +210,7 @@ OBJS_OTHERS = \ f32_to_i32_r_minMag$(OBJ) \ f32_to_i64_r_minMag$(OBJ) \ f32_to_ui16$(OBJ) \ + f32_to_bf16$(OBJ) \ f32_to_f16$(OBJ) \ f32_to_f64$(OBJ) \ f32_to_i16$(OBJ) \ diff --git a/build/Win64-MinGW-w64/Makefile b/build/Win64-MinGW-w64/Makefile index 609a43c6..113cbe84 100644 --- a/build/Win64-MinGW-w64/Makefile +++ b/build/Win64-MinGW-w64/Makefile @@ -114,6 +114,7 @@ OBJS_OTHERS = \ s_roundToUI64$(OBJ) \ s_roundToI32$(OBJ) \ s_roundToI64$(OBJ) \ + s_roundPackToBF16$(OBJ) \ s_normSubnormalF16Sig$(OBJ) \ s_roundPackToF16$(OBJ) \ s_normRoundPackToF16$(OBJ) \ @@ -172,6 +173,8 @@ OBJS_OTHERS = \ i64_to_extF80M$(OBJ) \ i64_to_f128$(OBJ) \ i64_to_f128M$(OBJ) \ + bf16_isSignalingNaN$(OBJ) \ + bf16_to_f32$(OBJ) \ f16_to_ui32$(OBJ) \ f16_to_ui64$(OBJ) \ f16_to_i32$(OBJ) \ @@ -215,6 +218,7 @@ OBJS_OTHERS = \ f32_to_i32_r_minMag$(OBJ) \ f32_to_i64_r_minMag$(OBJ) \ f32_to_ui16$(OBJ) \ + f32_to_bf16$(OBJ) \ f32_to_f16$(OBJ) \ f32_to_f64$(OBJ) \ f32_to_i16$(OBJ) \ diff --git a/build/template-not-FAST_INT64/Makefile b/build/template-not-FAST_INT64/Makefile index ea35b4ab..8b0bc3c6 100644 --- a/build/template-not-FAST_INT64/Makefile +++ b/build/template-not-FAST_INT64/Makefile @@ -112,6 +112,7 @@ OBJS_OTHERS = \ s_roundMToUI64$(OBJ) \ s_roundToI32$(OBJ) \ s_roundMToI64$(OBJ) \ + s_roundPackToBF16$(OBJ) \ s_normSubnormalF16Sig$(OBJ) \ s_roundPackToF16$(OBJ) \ s_normRoundPackToF16$(OBJ) \ @@ -166,6 +167,8 @@ OBJS_OTHERS = \ i64_to_f64$(OBJ) \ i64_to_extF80M$(OBJ) \ i64_to_f128M$(OBJ) \ + bf16_isSignalingNaN$(OBJ) \ + bf16_to_f32$(OBJ) \ f16_to_ui32$(OBJ) \ f16_to_ui64$(OBJ) \ f16_to_i32$(OBJ) \ @@ -207,6 +210,7 @@ OBJS_OTHERS = \ f32_to_i32_r_minMag$(OBJ) \ f32_to_i64_r_minMag$(OBJ) \ f32_to_ui16$(OBJ) \ + f32_to_bf16$(OBJ) \ f32_to_f16$(OBJ) \ f32_to_f64$(OBJ) \ f32_to_i16$(OBJ) \ diff --git a/source/8086-SSE/specialize.h b/source/8086-SSE/specialize.h index 2f538b07..64b9e534 100644 --- a/source/8086-SSE/specialize.h +++ b/source/8086-SSE/specialize.h @@ -101,6 +101,11 @@ struct commonNaN { *----------------------------------------------------------------------------*/ #define defaultNaNF16UI 0xFE00 +/*---------------------------------------------------------------------------- +| The bit pattern for a default generated binary 16-bit floating-point NaN. +*----------------------------------------------------------------------------*/ +#define defaultNaNBF16UI 0x7FC0 + /*---------------------------------------------------------------------------- | Returns true when 16-bit unsigned integer 'uiA' has the bit pattern of a | 16-bit floating-point signaling NaN. @@ -116,6 +121,20 @@ struct commonNaN { *----------------------------------------------------------------------------*/ void softfloat_f16UIToCommonNaN( uint_fast16_t uiA, struct commonNaN *zPtr ); +/*---------------------------------------------------------------------------- +| Assuming 'uiA' has the bit pattern of a binary 16-bit floating-point NaN, converts +| this NaN to the common NaN form, and stores the resulting common NaN at the +| location pointed to by 'zPtr'. If the NaN is a signaling NaN, the invalid +| exception is raised. +*----------------------------------------------------------------------------*/ +#define softfloat_bf16UIToCommonNaN( uiA, zPtr ) if ( ! ((uiA) & 0x040) ) (void) (zPtr), softfloat_raiseFlags( softfloat_flag_invalid ) + +/*---------------------------------------------------------------------------- +| Converts the common NaN pointed to by 'aPtr' into a binary 16-bit floating-point +| NaN, and returns the bit pattern of this value as an unsigned integer. +*----------------------------------------------------------------------------*/ +#define softfloat_commonNaNToBF16UI( aPtr ) ((uint_fast16_t) defaultNaNBF16UI) + /*---------------------------------------------------------------------------- | Converts the common NaN pointed to by 'aPtr' into a 16-bit floating-point | NaN, and returns the bit pattern of this value as an unsigned integer. diff --git a/source/8086/specialize.h b/source/8086/specialize.h index 0fd44993..7f455491 100644 --- a/source/8086/specialize.h +++ b/source/8086/specialize.h @@ -101,6 +101,11 @@ struct commonNaN { *----------------------------------------------------------------------------*/ #define defaultNaNF16UI 0xFE00 +/*---------------------------------------------------------------------------- +| The bit pattern for a default generated binary 16-bit floating-point NaN. +*----------------------------------------------------------------------------*/ +#define defaultNaNBF16UI 0x7FC0 + /*---------------------------------------------------------------------------- | Returns true when 16-bit unsigned integer 'uiA' has the bit pattern of a | 16-bit floating-point signaling NaN. @@ -116,6 +121,20 @@ struct commonNaN { *----------------------------------------------------------------------------*/ void softfloat_f16UIToCommonNaN( uint_fast16_t uiA, struct commonNaN *zPtr ); +/*---------------------------------------------------------------------------- +| Assuming 'uiA' has the bit pattern of a binary 16-bit floating-point NaN, converts +| this NaN to the common NaN form, and stores the resulting common NaN at the +| location pointed to by 'zPtr'. If the NaN is a signaling NaN, the invalid +| exception is raised. +*----------------------------------------------------------------------------*/ +#define softfloat_bf16UIToCommonNaN( uiA, zPtr ) if ( ! ((uiA) & 0x040) ) (void) (zPtr), softfloat_raiseFlags( softfloat_flag_invalid ) + +/*---------------------------------------------------------------------------- +| Converts the common NaN pointed to by 'aPtr' into a binary 16-bit floating-point +| NaN, and returns the bit pattern of this value as an unsigned integer. +*----------------------------------------------------------------------------*/ +#define softfloat_commonNaNToBF16UI( aPtr ) ((uint_fast16_t) defaultNaNBF16UI) + /*---------------------------------------------------------------------------- | Converts the common NaN pointed to by 'aPtr' into a 16-bit floating-point | NaN, and returns the bit pattern of this value as an unsigned integer. diff --git a/source/ARM-VFPv2-defaultNaN/specialize.h b/source/ARM-VFPv2-defaultNaN/specialize.h index 984f51a2..881b79f3 100644 --- a/source/ARM-VFPv2-defaultNaN/specialize.h +++ b/source/ARM-VFPv2-defaultNaN/specialize.h @@ -94,6 +94,11 @@ struct commonNaN { char _unused; }; *----------------------------------------------------------------------------*/ #define defaultNaNF16UI 0x7E00 +/*---------------------------------------------------------------------------- +| The bit pattern for a default generated binary 16-bit floating-point NaN. +*----------------------------------------------------------------------------*/ +#define defaultNaNBF16UI 0x7FC0 + /*---------------------------------------------------------------------------- | Returns true when 16-bit unsigned integer 'uiA' has the bit pattern of a | 16-bit floating-point signaling NaN. @@ -109,6 +114,20 @@ struct commonNaN { char _unused; }; *----------------------------------------------------------------------------*/ #define softfloat_f16UIToCommonNaN( uiA, zPtr ) if ( ! ((uiA) & 0x0200) ) (void) (zPtr), softfloat_raiseFlags( softfloat_flag_invalid ) +/*---------------------------------------------------------------------------- +| Assuming 'uiA' has the bit pattern of a binary 16-bit floating-point NaN, converts +| this NaN to the common NaN form, and stores the resulting common NaN at the +| location pointed to by 'zPtr'. If the NaN is a signaling NaN, the invalid +| exception is raised. +*----------------------------------------------------------------------------*/ +#define softfloat_bf16UIToCommonNaN( uiA, zPtr ) if ( ! ((uiA) & 0x040) ) (void) (zPtr), softfloat_raiseFlags( softfloat_flag_invalid ) + +/*---------------------------------------------------------------------------- +| Converts the common NaN pointed to by 'aPtr' into a binary 16-bit floating-point +| NaN, and returns the bit pattern of this value as an unsigned integer. +*----------------------------------------------------------------------------*/ +#define softfloat_commonNaNToBF16UI( aPtr ) ((uint_fast16_t) defaultNaNBF16UI) + /*---------------------------------------------------------------------------- | Converts the common NaN pointed to by 'aPtr' into a 16-bit floating-point | NaN, and returns the bit pattern of this value as an unsigned integer. diff --git a/source/ARM-VFPv2/specialize.h b/source/ARM-VFPv2/specialize.h index d7b5bfba..ed377ecf 100644 --- a/source/ARM-VFPv2/specialize.h +++ b/source/ARM-VFPv2/specialize.h @@ -101,6 +101,11 @@ struct commonNaN { *----------------------------------------------------------------------------*/ #define defaultNaNF16UI 0x7E00 +/*---------------------------------------------------------------------------- +| The bit pattern for a default generated binary 16-bit floating-point NaN. +*----------------------------------------------------------------------------*/ +#define defaultNaNBF16UI 0x7FC0 + /*---------------------------------------------------------------------------- | Returns true when 16-bit unsigned integer 'uiA' has the bit pattern of a | 16-bit floating-point signaling NaN. @@ -116,6 +121,20 @@ struct commonNaN { *----------------------------------------------------------------------------*/ void softfloat_f16UIToCommonNaN( uint_fast16_t uiA, struct commonNaN *zPtr ); +/*---------------------------------------------------------------------------- +| Assuming 'uiA' has the bit pattern of a binary 16-bit floating-point NaN, converts +| this NaN to the common NaN form, and stores the resulting common NaN at the +| location pointed to by 'zPtr'. If the NaN is a signaling NaN, the invalid +| exception is raised. +*----------------------------------------------------------------------------*/ +#define softfloat_bf16UIToCommonNaN( uiA, zPtr ) if ( ! ((uiA) & 0x040) ) (void) (zPtr), softfloat_raiseFlags( softfloat_flag_invalid ) + +/*---------------------------------------------------------------------------- +| Converts the common NaN pointed to by 'aPtr' into a binary 16-bit floating-point +| NaN, and returns the bit pattern of this value as an unsigned integer. +*----------------------------------------------------------------------------*/ +#define softfloat_commonNaNToBF16UI( aPtr ) ((uint_fast16_t) defaultNaNBF16UI) + /*---------------------------------------------------------------------------- | Converts the common NaN pointed to by 'aPtr' into a 16-bit floating-point | NaN, and returns the bit pattern of this value as an unsigned integer. From 6ffd8a88fcd318d45a87796e2872f947b4acf763 Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Sun, 24 Sep 2017 20:25:34 -0700 Subject: [PATCH 8/8] Add missing classify functions --- build/Linux-386-GCC/Makefile | 2 ++ build/Linux-386-SSE2-GCC/Makefile | 2 ++ build/Linux-ARM-VFPv2-GCC/Makefile | 2 ++ build/Linux-RISCV64-GCC/Makefile | 2 ++ build/Linux-x86_64-GCC/Makefile | 2 ++ build/Win32-MinGW/Makefile | 2 ++ build/Win32-SSE2-MinGW/Makefile | 2 ++ build/Win64-MinGW-w64/Makefile | 2 ++ build/template-FAST_INT64/Makefile | 2 ++ build/template-not-FAST_INT64/Makefile | 2 ++ source/f32_classify.c | 36 ++++++++++++++++++++++++++ source/f64_classify.c | 36 ++++++++++++++++++++++++++ 12 files changed, 92 insertions(+) create mode 100755 source/f32_classify.c create mode 100755 source/f64_classify.c diff --git a/build/Linux-386-GCC/Makefile b/build/Linux-386-GCC/Makefile index fa16f2aa..45f82fc2 100644 --- a/build/Linux-386-GCC/Makefile +++ b/build/Linux-386-GCC/Makefile @@ -188,6 +188,8 @@ OBJS_OTHERS = \ f16_roundToInt$(OBJ) \ f16_add$(OBJ) \ f16_classify$(OBJ) \ + f32_classify$(OBJ) \ + f64_classify$(OBJ) \ f16_sub$(OBJ) \ f16_mul$(OBJ) \ f16_mulAdd$(OBJ) \ diff --git a/build/Linux-386-SSE2-GCC/Makefile b/build/Linux-386-SSE2-GCC/Makefile index 436501d1..dcdbe68f 100644 --- a/build/Linux-386-SSE2-GCC/Makefile +++ b/build/Linux-386-SSE2-GCC/Makefile @@ -188,6 +188,8 @@ OBJS_OTHERS = \ f16_roundToInt$(OBJ) \ f16_add$(OBJ) \ f16_classify$(OBJ) \ + f32_classify$(OBJ) \ + f64_classify$(OBJ) \ f16_sub$(OBJ) \ f16_mul$(OBJ) \ f16_mulAdd$(OBJ) \ diff --git a/build/Linux-ARM-VFPv2-GCC/Makefile b/build/Linux-ARM-VFPv2-GCC/Makefile index 05ce42f1..0011f498 100644 --- a/build/Linux-ARM-VFPv2-GCC/Makefile +++ b/build/Linux-ARM-VFPv2-GCC/Makefile @@ -186,6 +186,8 @@ OBJS_OTHERS = \ f16_roundToInt$(OBJ) \ f16_add$(OBJ) \ f16_classify$(OBJ) \ + f32_classify$(OBJ) \ + f64_classify$(OBJ) \ f16_sub$(OBJ) \ f16_mul$(OBJ) \ f16_mulAdd$(OBJ) \ diff --git a/build/Linux-RISCV64-GCC/Makefile b/build/Linux-RISCV64-GCC/Makefile index a6bcccde..03810a31 100644 --- a/build/Linux-RISCV64-GCC/Makefile +++ b/build/Linux-RISCV64-GCC/Makefile @@ -201,6 +201,8 @@ OBJS_OTHERS = \ f16_roundToInt$(OBJ) \ f16_add$(OBJ) \ f16_classify$(OBJ) \ + f32_classify$(OBJ) \ + f64_classify$(OBJ) \ f16_sub$(OBJ) \ f16_mul$(OBJ) \ f16_mulAdd$(OBJ) \ diff --git a/build/Linux-x86_64-GCC/Makefile b/build/Linux-x86_64-GCC/Makefile index 381e0c1e..fac27ae0 100644 --- a/build/Linux-x86_64-GCC/Makefile +++ b/build/Linux-x86_64-GCC/Makefile @@ -199,6 +199,8 @@ OBJS_OTHERS = \ f16_roundToInt$(OBJ) \ f16_add$(OBJ) \ f16_classify$(OBJ) \ + f32_classify$(OBJ) \ + f64_classify$(OBJ) \ f16_sub$(OBJ) \ f16_mul$(OBJ) \ f16_mulAdd$(OBJ) \ diff --git a/build/Win32-MinGW/Makefile b/build/Win32-MinGW/Makefile index fa16f2aa..45f82fc2 100644 --- a/build/Win32-MinGW/Makefile +++ b/build/Win32-MinGW/Makefile @@ -188,6 +188,8 @@ OBJS_OTHERS = \ f16_roundToInt$(OBJ) \ f16_add$(OBJ) \ f16_classify$(OBJ) \ + f32_classify$(OBJ) \ + f64_classify$(OBJ) \ f16_sub$(OBJ) \ f16_mul$(OBJ) \ f16_mulAdd$(OBJ) \ diff --git a/build/Win32-SSE2-MinGW/Makefile b/build/Win32-SSE2-MinGW/Makefile index 436501d1..dcdbe68f 100644 --- a/build/Win32-SSE2-MinGW/Makefile +++ b/build/Win32-SSE2-MinGW/Makefile @@ -188,6 +188,8 @@ OBJS_OTHERS = \ f16_roundToInt$(OBJ) \ f16_add$(OBJ) \ f16_classify$(OBJ) \ + f32_classify$(OBJ) \ + f64_classify$(OBJ) \ f16_sub$(OBJ) \ f16_mul$(OBJ) \ f16_mulAdd$(OBJ) \ diff --git a/build/Win64-MinGW-w64/Makefile b/build/Win64-MinGW-w64/Makefile index 113cbe84..cf7f3efb 100644 --- a/build/Win64-MinGW-w64/Makefile +++ b/build/Win64-MinGW-w64/Makefile @@ -196,6 +196,8 @@ OBJS_OTHERS = \ f16_roundToInt$(OBJ) \ f16_add$(OBJ) \ f16_classify$(OBJ) \ + f32_classify$(OBJ) \ + f64_classify$(OBJ) \ f16_sub$(OBJ) \ f16_mul$(OBJ) \ f16_mulAdd$(OBJ) \ diff --git a/build/template-FAST_INT64/Makefile b/build/template-FAST_INT64/Makefile index 53eeed27..278dea4f 100644 --- a/build/template-FAST_INT64/Makefile +++ b/build/template-FAST_INT64/Makefile @@ -198,6 +198,8 @@ OBJS_OTHERS = \ f16_roundToInt$(OBJ) \ f16_add$(OBJ) \ f16_classify$(OBJ) \ + f32_classify$(OBJ) \ + f64_classify$(OBJ) \ f16_sub$(OBJ) \ f16_mul$(OBJ) \ f16_mulAdd$(OBJ) \ diff --git a/build/template-not-FAST_INT64/Makefile b/build/template-not-FAST_INT64/Makefile index 8b0bc3c6..1aa53973 100644 --- a/build/template-not-FAST_INT64/Makefile +++ b/build/template-not-FAST_INT64/Makefile @@ -188,6 +188,8 @@ OBJS_OTHERS = \ f16_roundToInt$(OBJ) \ f16_add$(OBJ) \ f16_classify$(OBJ) \ + f32_classify$(OBJ) \ + f64_classify$(OBJ) \ f16_sub$(OBJ) \ f16_mul$(OBJ) \ f16_mulAdd$(OBJ) \ diff --git a/source/f32_classify.c b/source/f32_classify.c new file mode 100755 index 00000000..83fad878 --- /dev/null +++ b/source/f32_classify.c @@ -0,0 +1,36 @@ + +#include +#include +#include "platform.h" +#include "internals.h" +#include "specialize.h" +#include "softfloat.h" + +uint_fast16_t f32_classify( float32_t a ) +{ + union ui32_f32 uA; + uint_fast32_t uiA; + + uA.f = a; + uiA = uA.ui; + + uint_fast16_t infOrNaN = expF32UI( uiA ) == 0xFF; + uint_fast16_t subnormalOrZero = expF32UI( uiA ) == 0; + bool sign = signF32UI( uiA ); + bool fracZero = fracF32UI( uiA ) == 0; + bool isNaN = isNaNF32UI( uiA ); + bool isSNaN = softfloat_isSigNaNF32UI( uiA ); + + return + ( sign && infOrNaN && fracZero ) << 0 | + ( sign && !infOrNaN && !subnormalOrZero ) << 1 | + ( sign && subnormalOrZero && !fracZero ) << 2 | + ( sign && subnormalOrZero && fracZero ) << 3 | + ( !sign && infOrNaN && fracZero ) << 7 | + ( !sign && !infOrNaN && !subnormalOrZero ) << 6 | + ( !sign && subnormalOrZero && !fracZero ) << 5 | + ( !sign && subnormalOrZero && fracZero ) << 4 | + ( isNaN && isSNaN ) << 8 | + ( isNaN && !isSNaN ) << 9; +} + diff --git a/source/f64_classify.c b/source/f64_classify.c new file mode 100755 index 00000000..180abde3 --- /dev/null +++ b/source/f64_classify.c @@ -0,0 +1,36 @@ + +#include +#include +#include "platform.h" +#include "internals.h" +#include "specialize.h" +#include "softfloat.h" + +uint_fast16_t f64_classify( float64_t a ) +{ + union ui64_f64 uA; + uint_fast64_t uiA; + + uA.f = a; + uiA = uA.ui; + + uint_fast16_t infOrNaN = expF64UI( uiA ) == 0x7FF; + uint_fast16_t subnormalOrZero = expF64UI( uiA ) == 0; + bool sign = signF64UI( uiA ); + bool fracZero = fracF64UI( uiA ) == 0; + bool isNaN = isNaNF64UI( uiA ); + bool isSNaN = softfloat_isSigNaNF64UI( uiA ); + + return + ( sign && infOrNaN && fracZero ) << 0 | + ( sign && !infOrNaN && !subnormalOrZero ) << 1 | + ( sign && subnormalOrZero && !fracZero ) << 2 | + ( sign && subnormalOrZero && fracZero ) << 3 | + ( !sign && infOrNaN && fracZero ) << 7 | + ( !sign && !infOrNaN && !subnormalOrZero ) << 6 | + ( !sign && subnormalOrZero && !fracZero ) << 5 | + ( !sign && subnormalOrZero && fracZero ) << 4 | + ( isNaN && isSNaN ) << 8 | + ( isNaN && !isSNaN ) << 9; +} +