From eaa5897534cbd263d0cdbf780f72133c2fe8d8d4 Mon Sep 17 00:00:00 2001 From: Fraser Cormack Date: Mon, 27 Jan 2025 14:41:40 +0000 Subject: [PATCH] [libclc] Optimize CLC vector is(un)ordered builtins (#124546) These are similar to 347fb208, but these builtins are expressed in terms of other builtins. The LLVM IR generated features the same fcmp ord/uno comparisons as before, but consistently in vector form. --- .../clc/include/clc/relational/relational.h | 79 ------------------- .../lib/generic/relational/clc_isordered.cl | 22 +++--- .../lib/generic/relational/clc_isunordered.cl | 26 +++--- 3 files changed, 18 insertions(+), 109 deletions(-) diff --git a/libclc/clc/include/clc/relational/relational.h b/libclc/clc/include/clc/relational/relational.h index f32e7630203e4b..f269715cfc83c9 100644 --- a/libclc/clc/include/clc/relational/relational.h +++ b/libclc/clc/include/clc/relational/relational.h @@ -63,85 +63,6 @@ ARG_TYPE) \ _CLC_DEFINE_RELATIONAL_UNARY_VEC_ALL(RET_TYPE, FUNCTION, ARG_TYPE) -#define _CLC_DEFINE_RELATIONAL_BINARY_SCALAR(RET_TYPE, FUNCTION, BUILTIN_NAME, \ - ARG0_TYPE, ARG1_TYPE) \ - _CLC_DEF _CLC_OVERLOAD RET_TYPE FUNCTION(ARG0_TYPE x, ARG1_TYPE y) { \ - return BUILTIN_NAME(x, y); \ - } - -#define _CLC_DEFINE_RELATIONAL_BINARY_VEC(RET_TYPE, FUNCTION, ARG0_TYPE, \ - ARG1_TYPE) \ - _CLC_DEF _CLC_OVERLOAD RET_TYPE FUNCTION(ARG0_TYPE x, ARG1_TYPE y) { \ - return (RET_TYPE)((RET_TYPE){FUNCTION(x.lo, y.lo), \ - FUNCTION(x.hi, y.hi)} != (RET_TYPE)0); \ - } - -#define _CLC_DEFINE_RELATIONAL_BINARY_VEC2(RET_TYPE, FUNCTION, ARG0_TYPE, \ - ARG1_TYPE) \ - _CLC_DEF _CLC_OVERLOAD RET_TYPE FUNCTION(ARG0_TYPE x, ARG1_TYPE y) { \ - return (RET_TYPE)((RET_TYPE){FUNCTION(x.lo, y.lo), \ - FUNCTION(x.hi, y.hi)} != (RET_TYPE)0); \ - } - -#define _CLC_DEFINE_RELATIONAL_BINARY_VEC3(RET_TYPE, FUNCTION, ARG0_TYPE, \ - ARG1_TYPE) \ - _CLC_DEF _CLC_OVERLOAD RET_TYPE FUNCTION(ARG0_TYPE x, ARG1_TYPE y) { \ - return (RET_TYPE)((RET_TYPE){FUNCTION(x.s0, y.s0), FUNCTION(x.s1, y.s1), \ - FUNCTION(x.s2, y.s2)} != (RET_TYPE)0); \ - } - -#define _CLC_DEFINE_RELATIONAL_BINARY_VEC4(RET_TYPE, FUNCTION, ARG0_TYPE, \ - ARG1_TYPE) \ - _CLC_DEF _CLC_OVERLOAD RET_TYPE FUNCTION(ARG0_TYPE x, ARG1_TYPE y) { \ - return (RET_TYPE)((RET_TYPE){FUNCTION(x.s0, y.s0), FUNCTION(x.s1, y.s1), \ - FUNCTION(x.s2, y.s2), \ - FUNCTION(x.s3, y.s3)} != (RET_TYPE)0); \ - } - -#define _CLC_DEFINE_RELATIONAL_BINARY_VEC8(RET_TYPE, FUNCTION, ARG0_TYPE, \ - ARG1_TYPE) \ - _CLC_DEF _CLC_OVERLOAD RET_TYPE FUNCTION(ARG0_TYPE x, ARG1_TYPE y) { \ - return (RET_TYPE)((RET_TYPE){FUNCTION(x.s0, y.s0), FUNCTION(x.s1, y.s1), \ - FUNCTION(x.s2, y.s2), FUNCTION(x.s3, y.s3), \ - FUNCTION(x.s4, y.s4), FUNCTION(x.s5, y.s5), \ - FUNCTION(x.s6, y.s6), \ - FUNCTION(x.s7, y.s7)} != (RET_TYPE)0); \ - } - -#define _CLC_DEFINE_RELATIONAL_BINARY_VEC16(RET_TYPE, FUNCTION, ARG0_TYPE, \ - ARG1_TYPE) \ - _CLC_DEF _CLC_OVERLOAD RET_TYPE FUNCTION(ARG0_TYPE x, ARG1_TYPE y) { \ - return (RET_TYPE)((RET_TYPE){FUNCTION(x.s0, y.s0), FUNCTION(x.s1, y.s1), \ - FUNCTION(x.s2, y.s2), FUNCTION(x.s3, y.s3), \ - FUNCTION(x.s4, y.s4), FUNCTION(x.s5, y.s5), \ - FUNCTION(x.s6, y.s6), FUNCTION(x.s7, y.s7), \ - FUNCTION(x.s8, y.s8), FUNCTION(x.s9, y.s9), \ - FUNCTION(x.sa, y.sa), FUNCTION(x.sb, y.sb), \ - FUNCTION(x.sc, y.sc), FUNCTION(x.sd, y.sd), \ - FUNCTION(x.se, y.se), \ - FUNCTION(x.sf, y.sf)} != (RET_TYPE)0); \ - } - -#define _CLC_DEFINE_RELATIONAL_BINARY_VEC_ALL(RET_TYPE, FUNCTION, ARG0_TYPE, \ - ARG1_TYPE) \ - _CLC_DEFINE_RELATIONAL_BINARY_VEC2(RET_TYPE##2, FUNCTION, ARG0_TYPE##2, \ - ARG1_TYPE##2) \ - _CLC_DEFINE_RELATIONAL_BINARY_VEC3(RET_TYPE##3, FUNCTION, ARG0_TYPE##3, \ - ARG1_TYPE##3) \ - _CLC_DEFINE_RELATIONAL_BINARY_VEC4(RET_TYPE##4, FUNCTION, ARG0_TYPE##4, \ - ARG1_TYPE##4) \ - _CLC_DEFINE_RELATIONAL_BINARY_VEC8(RET_TYPE##8, FUNCTION, ARG0_TYPE##8, \ - ARG1_TYPE##8) \ - _CLC_DEFINE_RELATIONAL_BINARY_VEC16(RET_TYPE##16, FUNCTION, ARG0_TYPE##16, \ - ARG1_TYPE##16) - -#define _CLC_DEFINE_RELATIONAL_BINARY(RET_TYPE, FUNCTION, BUILTIN_FUNCTION, \ - ARG0_TYPE, ARG1_TYPE) \ - _CLC_DEFINE_RELATIONAL_BINARY_SCALAR(RET_TYPE, FUNCTION, BUILTIN_FUNCTION, \ - ARG0_TYPE, ARG1_TYPE) \ - _CLC_DEFINE_RELATIONAL_BINARY_VEC_ALL(RET_TYPE, FUNCTION, ARG0_TYPE, \ - ARG1_TYPE) - #define _CLC_DEFINE_SIMPLE_RELATIONAL_BINARY(RET_TYPE, RET_TYPE_VEC, FUNCTION, \ ARG1_TYPE, ARG2_TYPE) \ _CLC_DEF _CLC_OVERLOAD RET_TYPE FUNCTION(ARG1_TYPE x, ARG2_TYPE y) { \ diff --git a/libclc/clc/lib/generic/relational/clc_isordered.cl b/libclc/clc/lib/generic/relational/clc_isordered.cl index 6183d1ddf918f5..73cd96a0a56ed8 100644 --- a/libclc/clc/lib/generic/relational/clc_isordered.cl +++ b/libclc/clc/lib/generic/relational/clc_isordered.cl @@ -2,33 +2,29 @@ #include #include -#define _CLC_DEFINE_ISORDERED(RET_TYPE, FUNCTION, ARG1_TYPE, ARG2_TYPE) \ - _CLC_DEF _CLC_OVERLOAD RET_TYPE FUNCTION(ARG1_TYPE x, ARG2_TYPE y) { \ - return __clc_isequal(x, x) && __clc_isequal(y, y); \ - } +#define _CLC_RELATIONAL_OP(X, Y) \ + __clc_isequal((X), (X)) && __clc_isequal((Y), (Y)) -_CLC_DEFINE_ISORDERED(int, __clc_isordered, float, float) -_CLC_DEFINE_RELATIONAL_BINARY_VEC_ALL(int, __clc_isordered, float, float) +_CLC_DEFINE_SIMPLE_RELATIONAL_BINARY(int, int, __clc_isordered, float, float) #ifdef cl_khr_fp64 + #pragma OPENCL EXTENSION cl_khr_fp64 : enable // The scalar version of __clc_isordered(double, double) returns an int, but the // vector versions return long. - -_CLC_DEFINE_ISORDERED(int, __clc_isordered, double, double) -_CLC_DEFINE_RELATIONAL_BINARY_VEC_ALL(long, __clc_isordered, double, double) +_CLC_DEFINE_SIMPLE_RELATIONAL_BINARY(int, long, __clc_isordered, double, double) #endif + #ifdef cl_khr_fp16 + #pragma OPENCL EXTENSION cl_khr_fp16 : enable // The scalar version of __clc_isordered(half, half) returns an int, but the // vector versions return short. - -_CLC_DEFINE_ISORDERED(int, __clc_isordered, half, half) -_CLC_DEFINE_RELATIONAL_BINARY_VEC_ALL(short, __clc_isordered, half, half) +_CLC_DEFINE_SIMPLE_RELATIONAL_BINARY(int, short, __clc_isordered, half, half) #endif -#undef _CLC_DEFINE_ISORDERED +#undef _CLC_RELATIONAL_OP diff --git a/libclc/clc/lib/generic/relational/clc_isunordered.cl b/libclc/clc/lib/generic/relational/clc_isunordered.cl index dbbec031a65e58..fefda8e5675176 100644 --- a/libclc/clc/lib/generic/relational/clc_isunordered.cl +++ b/libclc/clc/lib/generic/relational/clc_isunordered.cl @@ -1,12 +1,11 @@ #include +#include #include -// Note: It would be nice to use __builtin_isunordered with vector inputs, but -// it seems to only take scalar values as input, which will produce incorrect -// output for vector input types. +#define _CLC_RELATIONAL_OP(X, Y) \ + !__clc_isequal((X), (X)) || !__clc_isequal((Y), (Y)) -_CLC_DEFINE_RELATIONAL_BINARY(int, __clc_isunordered, __builtin_isunordered, - float, float) +_CLC_DEFINE_SIMPLE_RELATIONAL_BINARY(int, int, __clc_isunordered, float, float) #ifdef cl_khr_fp64 @@ -14,25 +13,18 @@ _CLC_DEFINE_RELATIONAL_BINARY(int, __clc_isunordered, __builtin_isunordered, // The scalar version of __clc_isunordered(double, double) returns an int, but // the vector versions return long. - -_CLC_DEF _CLC_OVERLOAD int __clc_isunordered(double x, double y) { - return __builtin_isunordered(x, y); -} - -_CLC_DEFINE_RELATIONAL_BINARY_VEC_ALL(long, __clc_isunordered, double, double) +_CLC_DEFINE_SIMPLE_RELATIONAL_BINARY(int, long, __clc_isunordered, double, double) #endif + #ifdef cl_khr_fp16 #pragma OPENCL EXTENSION cl_khr_fp16 : enable // The scalar version of __clc_isunordered(half, half) returns an int, but the // vector versions return short. - -_CLC_DEF _CLC_OVERLOAD int __clc_isunordered(half x, half y) { - return __builtin_isunordered(x, y); -} - -_CLC_DEFINE_RELATIONAL_BINARY_VEC_ALL(short, __clc_isunordered, half, half) +_CLC_DEFINE_SIMPLE_RELATIONAL_BINARY(int, short, __clc_isunordered, half, half) #endif + +#undef _CLC_RELATIONAL_OP