Skip to content

Commit

Permalink
Introduce powerOf2Floor() and popCount8() functions
Browse files Browse the repository at this point in the history
This is a prerequisite for the feature "Graph meter coloring (with
GraphData structure rework)".

powerOf2Floor() will utilize __builtin_clz() or stdc_bit_floor_ui()
(__builtin_clz() is preferred) if either is supported.

popCount8() will utilize ARM NEON instructions and x86 POPCNT
instruction if the machine supports either of them.

I am not adopting the C23 standard interface stdc_count_ones_uc() yet,
as I am not sure C libraries would implement it as fast as our version.

Signed-off-by: Kang-Che Sung <[email protected]>
  • Loading branch information
Explorer09 committed Aug 25, 2024
1 parent b4d430f commit 0e44068
Show file tree
Hide file tree
Showing 3 changed files with 108 additions and 2 deletions.
12 changes: 12 additions & 0 deletions XUtils.c
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ in the source distribution for its full text.
#include <assert.h>
#include <errno.h>
#include <fcntl.h>
#include <limits.h> // IWYU pragma: keep
#include <math.h>
#include <stdarg.h>
#include <stdint.h>
Expand Down Expand Up @@ -387,3 +388,14 @@ unsigned int countTrailingZeros(unsigned int x) {
return mod37BitPosition[(-x & x) % 37];
}
#endif

#if !defined(HAVE_BUILTIN_CLZ) && !defined(HAVE_STDC_BIT_FLOOR)
/* Returns the nearest power of two that is not greater than x.
If x is 0, returns 0. */
unsigned int powerOf2Floor(unsigned int x) {
for (unsigned int shift = 1; shift < sizeof(x) * CHAR_BIT; shift <<= 1)
x |= x >> shift;

return x - (x >> 1);
}
#endif
55 changes: 55 additions & 0 deletions XUtils.h
Original file line number Diff line number Diff line change
Expand Up @@ -14,15 +14,27 @@ in the source distribution for its full text.
#error "Must have #include \"config.h\" line at the top of the file that includes these XUtils helper functions"
#endif

#include <limits.h> // IWYU pragma: keep
#include <stdbool.h>
#include <stddef.h> // IWYU pragma: keep
#include <stdint.h> // IWYU pragma: keep
#include <stdio.h>
#include <stdlib.h> // IWYU pragma: keep
#include <string.h> // IWYU pragma: keep

#include "Compat.h"
#include "Macros.h"

#ifdef HAVE_STDBIT_H
#include <stdbit.h>
#endif

#if defined(HAVE_ARM_NEON_H) && defined(__ARM_NEON)
// ARM C Language Extensions (ACLE) recommends us to check __ARM_NEON before
// including <arm_neon.h>
#include <arm_neon.h>
#endif


ATTR_NORETURN
void fail(void);
Expand Down Expand Up @@ -149,6 +161,49 @@ static inline unsigned int countTrailingZeros(unsigned int x) {
unsigned int countTrailingZeros(unsigned int x);
#endif

/* Returns the nearest power of two that is not greater than x.
If x is 0, returns 0. */
#if defined(HAVE_BUILTIN_CLZ)
static inline unsigned int powerOf2Floor(unsigned int x) {
if (x == 0)
return 0;

return 1U << ((int)(sizeof(x) * CHAR_BIT) - 1 - __builtin_clz(x));
}
#elif defined(HAVE_STDC_BIT_FLOOR)
static inline unsigned int powerOf2Floor(unsigned int x) {
return stdc_bit_floor_ui(x);
}
#else
unsigned int powerOf2Floor(unsigned int x);
#endif

static inline unsigned int popCount8(uint8_t x) {
#if defined(HAVE_ARM_NEON_H) && defined(__ARM_NEON)
// With ARM Advanced SIMD extension (NEON), this generates smaller code than
// __builtin_popcount.
//
// Initialize the vector register. Set all lanes at once so that the
// compiler will not emit instruction to zero-initialize other lanes.
uint8x8_t v = vdup_n_u8(x);
// Count the number of set bits for each lane (8-bit) in the vector.
v = vcnt_u8(v);
// Get lane 0 and discard lanes 1 to 7. (Return type was uint8_t)
return vget_lane_u8(v, 0);
#elif defined(HAVE_BUILTIN_POPCOUNT) && defined(__POPCNT__)
// x86 POPCNT instruction. __builtin_popcount translates to it when it is
// enabled ("-mpopcnt"). (Return type was int)
return (unsigned int)__builtin_popcount(x);
#else
// This code is optimized for uint8_t input and smaller than the subroutine
// call of the compiler __builtin_popcount (which is tuned for
// unsigned int input type and not uint8_t).
uint32_t n = (uint32_t)(x * 0x08040201U);
n = (uint32_t)(((n >> 3) & 0x11111111U) * 0x11111111U) >> 28;
return n;
#endif
}

/* IEC unit prefixes */
static const char unitPrefixes[] = { 'K', 'M', 'G', 'T', 'P', 'E', 'Z', 'Y', 'R', 'Q' };

Expand Down
43 changes: 41 additions & 2 deletions configure.ac
Original file line number Diff line number Diff line change
Expand Up @@ -166,7 +166,10 @@ fi])

# Optional Section

AC_CHECK_HEADERS([execinfo.h])
AC_CHECK_HEADERS([ \
execinfo.h \
stdbit.h \
])

if test "$my_htop_platform" = darwin; then
AC_CHECK_HEADERS([mach/mach_time.h])
Expand Down Expand Up @@ -292,11 +295,47 @@ AC_LINK_IFELSE([

AC_MSG_CHECKING(for __builtin_ctz)
AC_COMPILE_IFELSE([
AC_LANG_PROGRAM([], [[__builtin_ctz(1); /* Supported in GCC 3.4 or later */]])],
AC_LANG_PROGRAM([], [[return __builtin_ctz(1U); /* Supported in GCC 3.4 or later */]])],
[AC_DEFINE([HAVE_BUILTIN_CTZ], 1, [Define to 1 if the compiler supports '__builtin_ctz' function.])
AC_MSG_RESULT(yes)],
AC_MSG_RESULT(no))

AC_MSG_CHECKING(for __builtin_clz)
AC_COMPILE_IFELSE([
AC_LANG_PROGRAM([], [[return __builtin_clz(-1U); /* Supported in GCC 3.4 or later */]])],
[AC_DEFINE([HAVE_BUILTIN_CLZ], 1, [Define to 1 if the compiler supports '__builtin_clz' function.])
AC_MSG_RESULT(yes)],
AC_MSG_RESULT(no))

AC_MSG_CHECKING(for __builtin_popcount)
AC_COMPILE_IFELSE([
AC_LANG_PROGRAM([], [[return __builtin_popcount(0U); /* Supported in GCC 3.4 or later */]])],
[AC_DEFINE([HAVE_BUILTIN_POPCOUNT], 1, [Define to 1 if the compiler supports '__builtin_popcount' function.])
AC_MSG_RESULT(yes)],
AC_MSG_RESULT(no))

AC_MSG_CHECKING(for stdc_bit_floor)
AC_LINK_IFELSE([
AC_LANG_PROGRAM(
[[
#include <stdbit.h>
]],
[[
/* Both the type-generic and type-specific versions should exist.
htop uses the type-specific version. */
return stdc_bit_floor(0U) || stdc_bit_floor_ui(0U);
]])],
[AC_DEFINE([HAVE_STDC_BIT_FLOOR], 1, [Define to 1 if stdc_bit_floor functions are supported.])
AC_MSG_RESULT(yes)],
AC_MSG_RESULT(no))

case "$host_cpu" in
arm*|aarch64*)
dnl ARM NEON intrinsics
AC_CHECK_HEADERS([arm_neon.h])
;;
esac

# ----------------------------------------------------------------------


Expand Down

0 comments on commit 0e44068

Please sign in to comment.