Skip to content

Commit

Permalink
Speedup trailing bytes
Browse files Browse the repository at this point in the history
  • Loading branch information
kimwalisch committed Jun 26, 2024
1 parent 2dae764 commit 7cfa698
Showing 1 changed file with 48 additions and 26 deletions.
74 changes: 48 additions & 26 deletions libpopcnt.h
Original file line number Diff line number Diff line change
Expand Up @@ -584,15 +584,18 @@ static inline uint64_t popcnt(const void* data, uint64_t size)
{
if (i + 8 <= size)
{
/* Align &ptr[i] to an 8 byte boundary */
uintptr_t rem = ((uintptr_t) &ptr[i]) % 8;

/* Align &ptr[i] to an 8 byte boundary */
if (rem != 0)
{
size_t bytes = (size_t) (8 - rem % 8);
uint64_t val = 0;
for (; i < bytes; i++)
val = (val << 8) + ptr[i];
uint64_t bytes = (uint64_t) (8 - rem % 8);
bytes = (bytes <= 7) ? bytes : 7;
for (uint64_t j = 0; j < bytes; j++)
val = (val << 8) + ptr[i + j];
cnt += popcnt64(val);
i += bytes;
}
}

Expand All @@ -602,8 +605,10 @@ static inline uint64_t popcnt(const void* data, uint64_t size)
if (i < size)
{
uint64_t val = 0;
for (; i < size; i++)
val = (val << 8) + ptr[i];
uint64_t bytes = (uint64_t) (size - i);
bytes = (bytes <= 7) ? bytes : 7;
for (uint64_t j = 0; j < bytes; j++)
val = (val << 8) + ptr[i + j];
cnt += popcnt64(val);
}

Expand All @@ -621,15 +626,18 @@ static inline uint64_t popcnt(const void* data, uint64_t size)

if (i + 8 <= size)
{
/* Align &ptr[i] to an 8 byte boundary */
uintptr_t rem = ((uintptr_t) &ptr[i]) % 8;

/* Align &ptr[i] to an 8 byte boundary */
if (rem != 0)
{
size_t bytes = (size_t) (8 - rem % 8);
uint64_t val = 0;
for (; i < bytes; i++)
val = (val << 8) + ptr[i];
uint64_t bytes = (uint64_t) (8 - rem % 8);
bytes = (bytes <= 7) ? bytes : 7;
for (uint64_t j = 0; j < bytes; j++)
val = (val << 8) + ptr[i + j];
cnt += popcnt64_bitwise(val);
i += bytes;
}
}

Expand All @@ -639,8 +647,10 @@ static inline uint64_t popcnt(const void* data, uint64_t size)
if (i < size)
{
uint64_t val = 0;
for (; i < size; i++)
val = (val << 8) + ptr[i];
uint64_t bytes = (uint64_t) (size - i);
bytes = (bytes <= 7) ? bytes : 7;
for (uint64_t j = 0; j < bytes; j++)
val = (val << 8) + ptr[i + j];
cnt += popcnt64_bitwise(val);
}

Expand Down Expand Up @@ -684,8 +694,10 @@ static inline uint64_t popcnt(const void* data, uint64_t size)
i = size - rem;
const uint8_t* ptr8 = (const uint8_t*) data;
uint64_t val = 0;
for (; i < size; i++)
val = (val << 8) + ptr8[i];
uint64_t bytes = (uint64_t) (size - i);
bytes = (bytes <= 7) ? bytes : 7;
for (uint64_t j = 0; j < bytes; j++)
val = (val << 8) + ptr8[i + j];
cnt += popcnt64(val);
}

Expand Down Expand Up @@ -766,15 +778,18 @@ static inline uint64_t popcnt(const void* data, uint64_t size)

if (i + 8 <= size)
{
/* Align &ptr[i] to an 8 byte boundary */
uintptr_t rem = ((uintptr_t) &ptr[i]) % 8;

/* Align &ptr[i] to an 8 byte boundary */
if (rem != 0)
{
size_t bytes = (size_t) (8 - rem % 8);
uint64_t val = 0;
for (; i < bytes; i++)
val = (val << 8) + ptr[i];
uint64_t bytes = (uint64_t) (8 - rem % 8);
bytes = (bytes <= 7) ? bytes : 7;
for (uint64_t j = 0; j < bytes; j++)
val = (val << 8) + ptr[i + j];
cnt += popcnt64(val);
i += bytes;
}
}

Expand All @@ -784,8 +799,10 @@ static inline uint64_t popcnt(const void* data, uint64_t size)
if (i < size)
{
uint64_t val = 0;
for (; i < size; i++)
val = (val << 8) + ptr[i];
uint64_t bytes = (uint64_t) (size - i);
bytes = (bytes <= 7) ? bytes : 7;
for (uint64_t j = 0; j < bytes; j++)
val = (val << 8) + ptr[i + j];
cnt += popcnt64(val);
}

Expand All @@ -808,15 +825,18 @@ static inline uint64_t popcnt(const void* data, uint64_t size)

if (i + 8 <= size)
{
/* Align &ptr[i] to an 8 byte boundary */
uintptr_t rem = ((uintptr_t) &ptr[i]) % 8;

/* Align &ptr[i] to an 8 byte boundary */
if (rem != 0)
{
size_t bytes = (size_t) (8 - rem % 8);
uint64_t val = 0;
for (; i < bytes; i++)
val = (val << 8) + ptr[i];
uint64_t bytes = (uint64_t) (8 - rem % 8);
bytes = (bytes <= 7) ? bytes : 7;
for (uint64_t j = 0; j < bytes; j++)
val = (val << 8) + ptr[i + j];
cnt += popcnt64(val);
i += bytes;
}
}

Expand All @@ -826,8 +846,10 @@ static inline uint64_t popcnt(const void* data, uint64_t size)
if (i < size)
{
uint64_t val = 0;
for (; i < size; i++)
val = (val << 8) + ptr[i];
uint64_t bytes = (uint64_t) (size - i);
bytes = (bytes <= 7) ? bytes : 7;
for (uint64_t j = 0; j < bytes; j++)
val = (val << 8) + ptr[i + j];
cnt += popcnt64(val);
}

Expand Down

0 comments on commit 7cfa698

Please sign in to comment.