diff --git a/libclc/libspirv/lib/ptx-nvidiacl/group/group_non_uniform.cl b/libclc/libspirv/lib/ptx-nvidiacl/group/group_non_uniform.cl index 44e437c0d9ee2..671dbb4535d76 100644 --- a/libclc/libspirv/lib/ptx-nvidiacl/group/group_non_uniform.cl +++ b/libclc/libspirv/lib/ptx-nvidiacl/group/group_non_uniform.cl @@ -8,6 +8,7 @@ #include "membermask.h" +#include #include #include @@ -33,4 +34,19 @@ _Z29__spirv_GroupNonUniformBallotjb(unsigned flag, bool predicate) { res[0] = __nvvm_vote_ballot_sync(threads, predicate); return res; +} + +_CLC_DEF _CLC_CONVERGENT uint +_Z37__spirv_GroupNonUniformBallotBitCountN5__spv5Scope4FlagEiDv4_j( + uint scope, uint flag, __clc_vec4_uint32_t mask) { + // here we assume scope == __spv::Scope::Subgroup + // flag == InclusiveScan is not yet implemented + if (flag == Reduce) { + return __clc_popcount(mask[0]); + } else if (flag == ExclusiveScan) { + return __clc_popcount(__nvvm_read_ptx_sreg_lanemask_lt() & mask[0]); + } else { + __builtin_trap(); + __builtin_unreachable(); + } } \ No newline at end of file