diff --git a/src/coreclr/inc/bitposition.h b/src/coreclr/inc/bitposition.h
index e1f05ffe9ca03d..aacc50ce15e017 100644
--- a/src/coreclr/inc/bitposition.h
+++ b/src/coreclr/inc/bitposition.h
@@ -12,47 +12,37 @@
 //
 // Notes:
 //    'value' must have exactly one bit set.
-//    The algorithm is as follows:
-//    - PRIME is a prime bigger than sizeof(unsigned int), which is not of the
-//      form 2^n-1.
-//    - Taking the modulo of 'value' with this will produce a unique hash for all
-//      powers of 2 (which is what "value" is).
-//    - Entries in hashTable[] which are -1 should never be used. There
-//      should be PRIME-8*sizeof(value) entries which are -1 .
+//    It performs the "TrailingZeroCount" operation using intrinsics.
 //
 inline
 unsigned            BitPosition(unsigned value)
 {
     _ASSERTE((value != 0) && ((value & (value-1)) == 0));
-#if defined(HOST_ARM) && defined(__llvm__)
-    // use intrinsic functions for arm32
-    // this is applied for LLVM only but it may work for some compilers
-    DWORD index = __builtin_clz(__builtin_arm_rbit(value));
-#elif !defined(HOST_AMD64)
-    const unsigned PRIME = 37;
-
-    static const char hashTable[PRIME] =
-    {
-        -1,  0,  1, 26,  2, 23, 27, -1,  3, 16,
-        24, 30, 28, 11, -1, 13,  4,  7, 17, -1,
-        25, 22, 31, 15, 29, 10, 12,  6, -1, 21,
-        14,  9,  5, 20,  8, 19, 18
-    };
-
-    _ASSERTE(PRIME >= 8*sizeof(value));
-    _ASSERTE(sizeof(hashTable) == PRIME);
+    DWORD index;
+    BitScanForward(&index, value);
+    return index;
+}
 
 
-    unsigned hash   = value % PRIME;
-    unsigned index  = hashTable[hash];
-    _ASSERTE(index != (unsigned char)-1);
-#else
-    // not enabled for x86 because BSF is extremely slow on Atom
-    // (15 clock cycles vs 1-3 on any other Intel CPU post-P4)
+#ifdef HOST_64BIT
+//------------------------------------------------------------------------
+// BitPosition: Return the position of the single bit that is set in 'value'.
+//
+// Return Value:
+//    The position (0 is LSB) of bit that is set in 'value'
+//
+// Notes:
+//    'value' must have exactly one bit set.
+//    It performs the "TrailingZeroCount" operation using intrinsics.
+//
+inline
+unsigned            BitPosition(unsigned __int64 value)
+{
+    _ASSERTE((value != 0) && ((value & (value-1)) == 0));
     DWORD index;
-    BitScanForward(&index, value);
-#endif
+    BitScanForward64(&index, value);
     return index;
 }
+#endif // HOST_64BIT
 
 #endif
diff --git a/src/coreclr/jit/compiler.hpp b/src/coreclr/jit/compiler.hpp
index e610096100f253..db0d382131ec7b 100644
--- a/src/coreclr/jit/compiler.hpp
+++ b/src/coreclr/jit/compiler.hpp
@@ -97,56 +97,49 @@ inline T genFindLowestBit(T value)
     return (value & (0 - value));
 }
 
-/*****************************************************************************/
-/*****************************************************************************
- *
- *  Return the highest bit that is set (that is, a mask that includes just the highest bit).
- *  TODO-ARM64-Throughput: we should convert these to use the _BitScanReverse() / _BitScanReverse64()
- *  compiler intrinsics, but our CRT header file intrin.h doesn't define these for ARM64 yet.
- */
-
+//------------------------------------------------------------------------
+// genFindHighestBit:  Return the highest bit that is set (that is, a mask that includes just the
+//                     highest bit).
+//
+// Return Value:
+//    The highest position (0 is LSB) of bit that is set in the 'value'.
+//
+// Note:
+//    It performs the "LeadingZeroCount " operation using intrinsics and then mask out everything
+//    but the highest bit.
 inline unsigned int genFindHighestBit(unsigned int mask)
 {
     assert(mask != 0);
-    unsigned int bit = 1U << ((sizeof(unsigned int) * 8) - 1); // start looking at the top
-    while ((bit & mask) == 0)
-    {
-        bit >>= 1;
-    }
-    return bit;
-}
-
-inline unsigned __int64 genFindHighestBit(unsigned __int64 mask)
-{
-    assert(mask != 0);
-    unsigned __int64 bit = 1ULL << ((sizeof(unsigned __int64) * 8) - 1); // start looking at the top
-    while ((bit & mask) == 0)
-    {
-        bit >>= 1;
-    }
-    return bit;
-}
-
-#if 0
-// TODO-ARM64-Cleanup: These should probably be the implementation, when intrin.h is updated for ARM64
-inline
-unsigned int genFindHighestBit(unsigned int mask)
-{
-    assert(mask != 0);
+#if defined(_MSC_VER)
+    unsigned long index;
+#else
     unsigned int index;
-    _BitScanReverse(&index, mask);
+#endif
+    BitScanReverse(&index, mask);
     return 1L << index;
 }
 
-inline
-unsigned __int64 genFindHighestBit(unsigned __int64 mask)
+//------------------------------------------------------------------------
+// genFindHighestBit:  Return the highest bit that is set (that is, a mask that includes just the
+//                     highest bit).
+//
+// Return Value:
+//    The highest position (0 is LSB) of bit that is set in the 'value'.
+//
+// Note:
+//    It performs the "LeadingZeroCount " operation using intrinsics and then mask out everything
+//    but the highest bit.
+inline unsigned __int64 genFindHighestBit(unsigned __int64 mask)
 {
     assert(mask != 0);
+#if defined(_MSC_VER)
+    unsigned long index;
+#else
     unsigned int index;
-    _BitScanReverse64(&index, mask);
+#endif
+    BitScanReverse64(&index, mask);
     return 1LL << index;
 }
-#endif // 0
 
 /*****************************************************************************
 *
@@ -222,8 +215,11 @@ inline unsigned uhi32(unsigned __int64 value)
 
 inline unsigned genLog2(unsigned __int64 value)
 {
-    unsigned lo32 = ulo32(value);
-    unsigned hi32 = uhi32(value);
+#ifdef HOST_64BIT
+    return BitPosition(value);
+#else // HOST_32BIT
+    unsigned     lo32 = ulo32(value);
+    unsigned     hi32 = uhi32(value);
 
     if (lo32 != 0)
     {
@@ -234,6 +230,7 @@ inline unsigned genLog2(unsigned __int64 value)
     {
         return genLog2(hi32) + 32;
     }
+#endif
 }
 
 /*****************************************************************************