diff --git a/src/coreclr/inc/bitposition.h b/src/coreclr/inc/bitposition.h index e1f05ffe9ca03d..aacc50ce15e017 100644 --- a/src/coreclr/inc/bitposition.h +++ b/src/coreclr/inc/bitposition.h @@ -12,47 +12,37 @@ // // Notes: // 'value' must have exactly one bit set. -// The algorithm is as follows: -// - PRIME is a prime bigger than sizeof(unsigned int), which is not of the -// form 2^n-1. -// - Taking the modulo of 'value' with this will produce a unique hash for all -// powers of 2 (which is what "value" is). -// - Entries in hashTable[] which are -1 should never be used. There -// should be PRIME-8*sizeof(value) entries which are -1 . +// It performs the "TrailingZeroCount" operation using intrinsics. // inline unsigned BitPosition(unsigned value) { _ASSERTE((value != 0) && ((value & (value-1)) == 0)); -#if defined(HOST_ARM) && defined(__llvm__) - // use intrinsic functions for arm32 - // this is applied for LLVM only but it may work for some compilers - DWORD index = __builtin_clz(__builtin_arm_rbit(value)); -#elif !defined(HOST_AMD64) - const unsigned PRIME = 37; - - static const char hashTable[PRIME] = - { - -1, 0, 1, 26, 2, 23, 27, -1, 3, 16, - 24, 30, 28, 11, -1, 13, 4, 7, 17, -1, - 25, 22, 31, 15, 29, 10, 12, 6, -1, 21, - 14, 9, 5, 20, 8, 19, 18 - }; - - _ASSERTE(PRIME >= 8*sizeof(value)); - _ASSERTE(sizeof(hashTable) == PRIME); + DWORD index; + BitScanForward(&index, value); + return index; +} - unsigned hash = value % PRIME; - unsigned index = hashTable[hash]; - _ASSERTE(index != (unsigned char)-1); -#else - // not enabled for x86 because BSF is extremely slow on Atom - // (15 clock cycles vs 1-3 on any other Intel CPU post-P4) +#ifdef HOST_64BIT +//------------------------------------------------------------------------ +// BitPosition: Return the position of the single bit that is set in 'value'. +// +// Return Value: +// The position (0 is LSB) of bit that is set in 'value' +// +// Notes: +// 'value' must have exactly one bit set. +// It performs the "TrailingZeroCount" operation using intrinsics. +// +inline +unsigned BitPosition(unsigned __int64 value) +{ + _ASSERTE((value != 0) && ((value & (value-1)) == 0)); DWORD index; - BitScanForward(&index, value); -#endif + BitScanForward64(&index, value); return index; } +#endif // HOST_64BIT #endif diff --git a/src/coreclr/jit/compiler.hpp b/src/coreclr/jit/compiler.hpp index e610096100f253..db0d382131ec7b 100644 --- a/src/coreclr/jit/compiler.hpp +++ b/src/coreclr/jit/compiler.hpp @@ -97,56 +97,49 @@ inline T genFindLowestBit(T value) return (value & (0 - value)); } -/*****************************************************************************/ -/***************************************************************************** - * - * Return the highest bit that is set (that is, a mask that includes just the highest bit). - * TODO-ARM64-Throughput: we should convert these to use the _BitScanReverse() / _BitScanReverse64() - * compiler intrinsics, but our CRT header file intrin.h doesn't define these for ARM64 yet. - */ - +//------------------------------------------------------------------------ +// genFindHighestBit: Return the highest bit that is set (that is, a mask that includes just the +// highest bit). +// +// Return Value: +// The highest position (0 is LSB) of bit that is set in the 'value'. +// +// Note: +// It performs the "LeadingZeroCount " operation using intrinsics and then mask out everything +// but the highest bit. inline unsigned int genFindHighestBit(unsigned int mask) { assert(mask != 0); - unsigned int bit = 1U << ((sizeof(unsigned int) * 8) - 1); // start looking at the top - while ((bit & mask) == 0) - { - bit >>= 1; - } - return bit; -} - -inline unsigned __int64 genFindHighestBit(unsigned __int64 mask) -{ - assert(mask != 0); - unsigned __int64 bit = 1ULL << ((sizeof(unsigned __int64) * 8) - 1); // start looking at the top - while ((bit & mask) == 0) - { - bit >>= 1; - } - return bit; -} - -#if 0 -// TODO-ARM64-Cleanup: These should probably be the implementation, when intrin.h is updated for ARM64 -inline -unsigned int genFindHighestBit(unsigned int mask) -{ - assert(mask != 0); +#if defined(_MSC_VER) + unsigned long index; +#else unsigned int index; - _BitScanReverse(&index, mask); +#endif + BitScanReverse(&index, mask); return 1L << index; } -inline -unsigned __int64 genFindHighestBit(unsigned __int64 mask) +//------------------------------------------------------------------------ +// genFindHighestBit: Return the highest bit that is set (that is, a mask that includes just the +// highest bit). +// +// Return Value: +// The highest position (0 is LSB) of bit that is set in the 'value'. +// +// Note: +// It performs the "LeadingZeroCount " operation using intrinsics and then mask out everything +// but the highest bit. +inline unsigned __int64 genFindHighestBit(unsigned __int64 mask) { assert(mask != 0); +#if defined(_MSC_VER) + unsigned long index; +#else unsigned int index; - _BitScanReverse64(&index, mask); +#endif + BitScanReverse64(&index, mask); return 1LL << index; } -#endif // 0 /***************************************************************************** * @@ -222,8 +215,11 @@ inline unsigned uhi32(unsigned __int64 value) inline unsigned genLog2(unsigned __int64 value) { - unsigned lo32 = ulo32(value); - unsigned hi32 = uhi32(value); +#ifdef HOST_64BIT + return BitPosition(value); +#else // HOST_32BIT + unsigned lo32 = ulo32(value); + unsigned hi32 = uhi32(value); if (lo32 != 0) { @@ -234,6 +230,7 @@ inline unsigned genLog2(unsigned __int64 value) { return genLog2(hi32) + 32; } +#endif } /*****************************************************************************