Skip to content

Commit

Permalink
[libclc] Optimize CLC vector is(un)ordered builtins (llvm#124546)
Browse files Browse the repository at this point in the history
These are similar to 347fb20, but these builtins are expressed in terms
of other builtins. The LLVM IR generated features the same fcmp ord/uno
comparisons as before, but consistently in vector form.
  • Loading branch information
frasercrmck authored Jan 27, 2025
1 parent f1d5e70 commit eaa5897
Show file tree
Hide file tree
Showing 3 changed files with 18 additions and 109 deletions.
79 changes: 0 additions & 79 deletions libclc/clc/include/clc/relational/relational.h
Original file line number Diff line number Diff line change
Expand Up @@ -63,85 +63,6 @@
ARG_TYPE) \
_CLC_DEFINE_RELATIONAL_UNARY_VEC_ALL(RET_TYPE, FUNCTION, ARG_TYPE)

#define _CLC_DEFINE_RELATIONAL_BINARY_SCALAR(RET_TYPE, FUNCTION, BUILTIN_NAME, \
ARG0_TYPE, ARG1_TYPE) \
_CLC_DEF _CLC_OVERLOAD RET_TYPE FUNCTION(ARG0_TYPE x, ARG1_TYPE y) { \
return BUILTIN_NAME(x, y); \
}

#define _CLC_DEFINE_RELATIONAL_BINARY_VEC(RET_TYPE, FUNCTION, ARG0_TYPE, \
ARG1_TYPE) \
_CLC_DEF _CLC_OVERLOAD RET_TYPE FUNCTION(ARG0_TYPE x, ARG1_TYPE y) { \
return (RET_TYPE)((RET_TYPE){FUNCTION(x.lo, y.lo), \
FUNCTION(x.hi, y.hi)} != (RET_TYPE)0); \
}

#define _CLC_DEFINE_RELATIONAL_BINARY_VEC2(RET_TYPE, FUNCTION, ARG0_TYPE, \
ARG1_TYPE) \
_CLC_DEF _CLC_OVERLOAD RET_TYPE FUNCTION(ARG0_TYPE x, ARG1_TYPE y) { \
return (RET_TYPE)((RET_TYPE){FUNCTION(x.lo, y.lo), \
FUNCTION(x.hi, y.hi)} != (RET_TYPE)0); \
}

#define _CLC_DEFINE_RELATIONAL_BINARY_VEC3(RET_TYPE, FUNCTION, ARG0_TYPE, \
ARG1_TYPE) \
_CLC_DEF _CLC_OVERLOAD RET_TYPE FUNCTION(ARG0_TYPE x, ARG1_TYPE y) { \
return (RET_TYPE)((RET_TYPE){FUNCTION(x.s0, y.s0), FUNCTION(x.s1, y.s1), \
FUNCTION(x.s2, y.s2)} != (RET_TYPE)0); \
}

#define _CLC_DEFINE_RELATIONAL_BINARY_VEC4(RET_TYPE, FUNCTION, ARG0_TYPE, \
ARG1_TYPE) \
_CLC_DEF _CLC_OVERLOAD RET_TYPE FUNCTION(ARG0_TYPE x, ARG1_TYPE y) { \
return (RET_TYPE)((RET_TYPE){FUNCTION(x.s0, y.s0), FUNCTION(x.s1, y.s1), \
FUNCTION(x.s2, y.s2), \
FUNCTION(x.s3, y.s3)} != (RET_TYPE)0); \
}

#define _CLC_DEFINE_RELATIONAL_BINARY_VEC8(RET_TYPE, FUNCTION, ARG0_TYPE, \
ARG1_TYPE) \
_CLC_DEF _CLC_OVERLOAD RET_TYPE FUNCTION(ARG0_TYPE x, ARG1_TYPE y) { \
return (RET_TYPE)((RET_TYPE){FUNCTION(x.s0, y.s0), FUNCTION(x.s1, y.s1), \
FUNCTION(x.s2, y.s2), FUNCTION(x.s3, y.s3), \
FUNCTION(x.s4, y.s4), FUNCTION(x.s5, y.s5), \
FUNCTION(x.s6, y.s6), \
FUNCTION(x.s7, y.s7)} != (RET_TYPE)0); \
}

#define _CLC_DEFINE_RELATIONAL_BINARY_VEC16(RET_TYPE, FUNCTION, ARG0_TYPE, \
ARG1_TYPE) \
_CLC_DEF _CLC_OVERLOAD RET_TYPE FUNCTION(ARG0_TYPE x, ARG1_TYPE y) { \
return (RET_TYPE)((RET_TYPE){FUNCTION(x.s0, y.s0), FUNCTION(x.s1, y.s1), \
FUNCTION(x.s2, y.s2), FUNCTION(x.s3, y.s3), \
FUNCTION(x.s4, y.s4), FUNCTION(x.s5, y.s5), \
FUNCTION(x.s6, y.s6), FUNCTION(x.s7, y.s7), \
FUNCTION(x.s8, y.s8), FUNCTION(x.s9, y.s9), \
FUNCTION(x.sa, y.sa), FUNCTION(x.sb, y.sb), \
FUNCTION(x.sc, y.sc), FUNCTION(x.sd, y.sd), \
FUNCTION(x.se, y.se), \
FUNCTION(x.sf, y.sf)} != (RET_TYPE)0); \
}

#define _CLC_DEFINE_RELATIONAL_BINARY_VEC_ALL(RET_TYPE, FUNCTION, ARG0_TYPE, \
ARG1_TYPE) \
_CLC_DEFINE_RELATIONAL_BINARY_VEC2(RET_TYPE##2, FUNCTION, ARG0_TYPE##2, \
ARG1_TYPE##2) \
_CLC_DEFINE_RELATIONAL_BINARY_VEC3(RET_TYPE##3, FUNCTION, ARG0_TYPE##3, \
ARG1_TYPE##3) \
_CLC_DEFINE_RELATIONAL_BINARY_VEC4(RET_TYPE##4, FUNCTION, ARG0_TYPE##4, \
ARG1_TYPE##4) \
_CLC_DEFINE_RELATIONAL_BINARY_VEC8(RET_TYPE##8, FUNCTION, ARG0_TYPE##8, \
ARG1_TYPE##8) \
_CLC_DEFINE_RELATIONAL_BINARY_VEC16(RET_TYPE##16, FUNCTION, ARG0_TYPE##16, \
ARG1_TYPE##16)

#define _CLC_DEFINE_RELATIONAL_BINARY(RET_TYPE, FUNCTION, BUILTIN_FUNCTION, \
ARG0_TYPE, ARG1_TYPE) \
_CLC_DEFINE_RELATIONAL_BINARY_SCALAR(RET_TYPE, FUNCTION, BUILTIN_FUNCTION, \
ARG0_TYPE, ARG1_TYPE) \
_CLC_DEFINE_RELATIONAL_BINARY_VEC_ALL(RET_TYPE, FUNCTION, ARG0_TYPE, \
ARG1_TYPE)

#define _CLC_DEFINE_SIMPLE_RELATIONAL_BINARY(RET_TYPE, RET_TYPE_VEC, FUNCTION, \
ARG1_TYPE, ARG2_TYPE) \
_CLC_DEF _CLC_OVERLOAD RET_TYPE FUNCTION(ARG1_TYPE x, ARG2_TYPE y) { \
Expand Down
22 changes: 9 additions & 13 deletions libclc/clc/lib/generic/relational/clc_isordered.cl
Original file line number Diff line number Diff line change
Expand Up @@ -2,33 +2,29 @@
#include <clc/relational/clc_isequal.h>
#include <clc/relational/relational.h>

#define _CLC_DEFINE_ISORDERED(RET_TYPE, FUNCTION, ARG1_TYPE, ARG2_TYPE) \
_CLC_DEF _CLC_OVERLOAD RET_TYPE FUNCTION(ARG1_TYPE x, ARG2_TYPE y) { \
return __clc_isequal(x, x) && __clc_isequal(y, y); \
}
#define _CLC_RELATIONAL_OP(X, Y) \
__clc_isequal((X), (X)) && __clc_isequal((Y), (Y))

_CLC_DEFINE_ISORDERED(int, __clc_isordered, float, float)
_CLC_DEFINE_RELATIONAL_BINARY_VEC_ALL(int, __clc_isordered, float, float)
_CLC_DEFINE_SIMPLE_RELATIONAL_BINARY(int, int, __clc_isordered, float, float)

#ifdef cl_khr_fp64

#pragma OPENCL EXTENSION cl_khr_fp64 : enable

// The scalar version of __clc_isordered(double, double) returns an int, but the
// vector versions return long.

_CLC_DEFINE_ISORDERED(int, __clc_isordered, double, double)
_CLC_DEFINE_RELATIONAL_BINARY_VEC_ALL(long, __clc_isordered, double, double)
_CLC_DEFINE_SIMPLE_RELATIONAL_BINARY(int, long, __clc_isordered, double, double)

#endif

#ifdef cl_khr_fp16

#pragma OPENCL EXTENSION cl_khr_fp16 : enable

// The scalar version of __clc_isordered(half, half) returns an int, but the
// vector versions return short.

_CLC_DEFINE_ISORDERED(int, __clc_isordered, half, half)
_CLC_DEFINE_RELATIONAL_BINARY_VEC_ALL(short, __clc_isordered, half, half)
_CLC_DEFINE_SIMPLE_RELATIONAL_BINARY(int, short, __clc_isordered, half, half)

#endif

#undef _CLC_DEFINE_ISORDERED
#undef _CLC_RELATIONAL_OP
26 changes: 9 additions & 17 deletions libclc/clc/lib/generic/relational/clc_isunordered.cl
Original file line number Diff line number Diff line change
@@ -1,38 +1,30 @@
#include <clc/internal/clc.h>
#include <clc/relational/clc_isequal.h>
#include <clc/relational/relational.h>

// Note: It would be nice to use __builtin_isunordered with vector inputs, but
// it seems to only take scalar values as input, which will produce incorrect
// output for vector input types.
#define _CLC_RELATIONAL_OP(X, Y) \
!__clc_isequal((X), (X)) || !__clc_isequal((Y), (Y))

_CLC_DEFINE_RELATIONAL_BINARY(int, __clc_isunordered, __builtin_isunordered,
float, float)
_CLC_DEFINE_SIMPLE_RELATIONAL_BINARY(int, int, __clc_isunordered, float, float)

#ifdef cl_khr_fp64

#pragma OPENCL EXTENSION cl_khr_fp64 : enable

// The scalar version of __clc_isunordered(double, double) returns an int, but
// the vector versions return long.

_CLC_DEF _CLC_OVERLOAD int __clc_isunordered(double x, double y) {
return __builtin_isunordered(x, y);
}

_CLC_DEFINE_RELATIONAL_BINARY_VEC_ALL(long, __clc_isunordered, double, double)
_CLC_DEFINE_SIMPLE_RELATIONAL_BINARY(int, long, __clc_isunordered, double, double)

#endif

#ifdef cl_khr_fp16

#pragma OPENCL EXTENSION cl_khr_fp16 : enable

// The scalar version of __clc_isunordered(half, half) returns an int, but the
// vector versions return short.

_CLC_DEF _CLC_OVERLOAD int __clc_isunordered(half x, half y) {
return __builtin_isunordered(x, y);
}

_CLC_DEFINE_RELATIONAL_BINARY_VEC_ALL(short, __clc_isunordered, half, half)
_CLC_DEFINE_SIMPLE_RELATIONAL_BINARY(int, short, __clc_isunordered, half, half)

#endif

#undef _CLC_RELATIONAL_OP

0 comments on commit eaa5897

Please sign in to comment.