diff --git a/XUtils.c b/XUtils.c index 94a14df64..248c572a6 100644 --- a/XUtils.c +++ b/XUtils.c @@ -12,6 +12,7 @@ in the source distribution for its full text. #include #include #include +#include // IWYU pragma: keep #include #include #include @@ -387,3 +388,14 @@ unsigned int countTrailingZeros(unsigned int x) { return mod37BitPosition[(-x & x) % 37]; } #endif + +#if !defined(HAVE_BUILTIN_CLZ) && !defined(HAVE_STDC_BIT_FLOOR) +/* Returns the nearest power of two that is not greater than x. + If x is 0, returns 0. */ +unsigned int powerOf2Floor(unsigned int x) { + for (unsigned int shift = 1; shift < sizeof(x) * CHAR_BIT; shift <<= 1) + x |= x >> shift; + + return x - (x >> 1); +} +#endif diff --git a/XUtils.h b/XUtils.h index 73335a611..f80d2d5a1 100644 --- a/XUtils.h +++ b/XUtils.h @@ -14,8 +14,10 @@ in the source distribution for its full text. #error "Must have #include \"config.h\" line at the top of the file that includes these XUtils helper functions" #endif +#include // IWYU pragma: keep #include #include // IWYU pragma: keep +#include // IWYU pragma: keep #include #include // IWYU pragma: keep #include // IWYU pragma: keep @@ -23,6 +25,16 @@ in the source distribution for its full text. #include "Compat.h" #include "Macros.h" +#ifdef HAVE_STDBIT_H +#include +#endif + +#if defined(HAVE_ARM_NEON_H) && defined(__ARM_NEON) +// ARM C Language Extensions (ACLE) recommends us to check __ARM_NEON before +// including +#include +#endif + ATTR_NORETURN void fail(void); @@ -149,6 +161,49 @@ static inline unsigned int countTrailingZeros(unsigned int x) { unsigned int countTrailingZeros(unsigned int x); #endif +/* Returns the nearest power of two that is not greater than x. + If x is 0, returns 0. */ +#if defined(HAVE_BUILTIN_CLZ) +static inline unsigned int powerOf2Floor(unsigned int x) { + if (x == 0) + return 0; + + return 1U << ((int)(sizeof(x) * CHAR_BIT) - 1 - __builtin_clz(x)); +} +#elif defined(HAVE_STDC_BIT_FLOOR) +static inline unsigned int powerOf2Floor(unsigned int x) { + return stdc_bit_floor_ui(x); +} +#else +unsigned int powerOf2Floor(unsigned int x); +#endif + +static inline unsigned int popCount8(uint8_t x) { +#if defined(HAVE_ARM_NEON_H) && defined(__ARM_NEON) + // With ARM Advanced SIMD extension (NEON), this generates smaller code than + // __builtin_popcount. + // + // Initialize the vector register. Set all lanes at once so that the + // compiler will not emit instruction to zero-initialize other lanes. + uint8x8_t v = vdup_n_u8(x); + // Count the number of set bits for each lane (8-bit) in the vector. + v = vcnt_u8(v); + // Get lane 0 and discard lanes 1 to 7. (Return type was uint8_t) + return vget_lane_u8(v, 0); +#elif defined(HAVE_BUILTIN_POPCOUNT) && defined(__POPCNT__) + // x86 POPCNT instruction. __builtin_popcount translates to it when it is + // enabled ("-mpopcnt"). (Return type was int) + return (unsigned int)__builtin_popcount(x); +#else + // This code is optimized for uint8_t input and smaller than the subroutine + // call of the compiler __builtin_popcount (which is tuned for + // unsigned int input type and not uint8_t). + uint32_t n = (uint32_t)(x * 0x08040201U); + n = (uint32_t)(((n >> 3) & 0x11111111U) * 0x11111111U) >> 28; + return n; +#endif +} + /* IEC unit prefixes */ static const char unitPrefixes[] = { 'K', 'M', 'G', 'T', 'P', 'E', 'Z', 'Y', 'R', 'Q' }; diff --git a/configure.ac b/configure.ac index 3bbaa7159..3eac8974c 100644 --- a/configure.ac +++ b/configure.ac @@ -166,7 +166,10 @@ fi]) # Optional Section -AC_CHECK_HEADERS([execinfo.h]) +AC_CHECK_HEADERS([ \ + execinfo.h \ + stdbit.h \ + ]) if test "$my_htop_platform" = darwin; then AC_CHECK_HEADERS([mach/mach_time.h]) @@ -292,11 +295,47 @@ AC_LINK_IFELSE([ AC_MSG_CHECKING(for __builtin_ctz) AC_COMPILE_IFELSE([ - AC_LANG_PROGRAM([], [[__builtin_ctz(1); /* Supported in GCC 3.4 or later */]])], + AC_LANG_PROGRAM([], [[return __builtin_ctz(1U); /* Supported in GCC 3.4 or later */]])], [AC_DEFINE([HAVE_BUILTIN_CTZ], 1, [Define to 1 if the compiler supports '__builtin_ctz' function.]) AC_MSG_RESULT(yes)], AC_MSG_RESULT(no)) +AC_MSG_CHECKING(for __builtin_clz) +AC_COMPILE_IFELSE([ + AC_LANG_PROGRAM([], [[return __builtin_clz(-1U); /* Supported in GCC 3.4 or later */]])], + [AC_DEFINE([HAVE_BUILTIN_CLZ], 1, [Define to 1 if the compiler supports '__builtin_clz' function.]) + AC_MSG_RESULT(yes)], + AC_MSG_RESULT(no)) + +AC_MSG_CHECKING(for __builtin_popcount) +AC_COMPILE_IFELSE([ + AC_LANG_PROGRAM([], [[return __builtin_popcount(0U); /* Supported in GCC 3.4 or later */]])], + [AC_DEFINE([HAVE_BUILTIN_POPCOUNT], 1, [Define to 1 if the compiler supports '__builtin_popcount' function.]) + AC_MSG_RESULT(yes)], + AC_MSG_RESULT(no)) + +AC_MSG_CHECKING(for stdc_bit_floor) +AC_LINK_IFELSE([ + AC_LANG_PROGRAM( + [[ +#include + ]], + [[ + /* Both the type-generic and type-specific versions should exist. + htop uses the type-specific version. */ + return stdc_bit_floor(0U) || stdc_bit_floor_ui(0U); + ]])], + [AC_DEFINE([HAVE_STDC_BIT_FLOOR], 1, [Define to 1 if stdc_bit_floor functions are supported.]) + AC_MSG_RESULT(yes)], + AC_MSG_RESULT(no)) + +case "$host_cpu" in + arm*|aarch64*) + dnl ARM NEON intrinsics + AC_CHECK_HEADERS([arm_neon.h]) + ;; +esac + # ----------------------------------------------------------------------