Skip to content

Commit f0432d0

Browse files
committed
Fix: Make get_ctz64() portable for non-GCC compilers
1 parent edc3118 commit f0432d0

File tree

2 files changed

+62
-32
lines changed

2 files changed

+62
-32
lines changed

common_func.c

Lines changed: 21 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -431,12 +431,15 @@ void* rhash_realloc(void* mem, size_t size, const char* srcfile, int srcline)
431431
* Bit functions
432432
*=========================================================================*/
433433
#ifndef get_ctz
434-
/**
435-
* Returns index of the trailing bit of a 32-bit number.
436-
* This is a plain C equivalent for GCC __builtin_ctz() bit scan.
437-
*
438-
* @param x the number to process
439-
* @return zero-based index of the trailing bit
434+
/**
435+
* Returns index of the least significant set bit in a 32-bit number.
436+
* This operation is also known as Count Trailing Zeros (CTZ).
437+
*
438+
* The function is a portable, branch-free equivalent of GCC's __builtin_ctz(),
439+
* using a De Bruijn sequence for constant-time lookup.
440+
*
441+
* @param x 32-bit unsigned integer to analyze (must not be zero)
442+
* @return zero-based index of the least significant set bit (0 to 31)
440443
*/
441444
unsigned get_ctz(unsigned x)
442445
{
@@ -448,23 +451,27 @@ unsigned get_ctz(unsigned x)
448451
return bit_pos[((uint32_t)((x & -x) * 0x077CB531U)) >> 27];
449452
}
450453
/**
451-
* Returns index of the trailing bit of a 64-bit number.
452-
* This is a plain C equivalent for GCC __builtin_ctzll() bit scan.
453-
* Original author: Matt Taylor (2003).
454+
* Returns the zero-based index of the least significant set bit in a 64-bit number.
455+
* This operation is also known as Count Trailing Zeros (CTZ).
454456
*
455-
* @param x the number to process
456-
* @return zero-based index of the trailing bit
457+
* The function is a portable, branch-free equivalent of GCC's __builtin_ctzll().
458+
* Uses a 32-bit optimized implementation with magic constant `0x78291ACF`,
459+
* based on Matt Taylor's original algorithm (2003).
460+
*
461+
* @param x 64-bit unsigned integer to analyze (must not be zero)
462+
* @return zero-based index of the least significant set bit (0 to 63)
457463
*/
458464
unsigned get_ctz64(uint64_t x)
459465
{
460-
/* array for conversion to bit position */
461-
static unsigned bit_pos[64] = {
466+
/* lookup table mapping hash values to bit position */
467+
static const unsigned bit_pos[64] = {
462468
63, 30, 3, 32, 59, 14, 11, 33, 60, 24, 50, 9, 55, 19, 21, 34,
463469
61, 29, 2, 53, 51, 23, 41, 18, 56, 28, 1, 43, 46, 27, 0, 35,
464470
62, 31, 58, 4, 5, 49, 54, 6, 15, 52, 12, 40, 7, 42, 45, 16,
465471
25, 57, 48, 13, 10, 39, 8, 44, 20, 47, 38, 22, 17, 37, 36, 26
466472
};
467-
uint32_t folded = (uint32_t)(((x - 1) >> 32) ^ (x - 1));
473+
x ^= x - 1;
474+
uint32_t folded = (uint32_t)((x >> 32) ^ x);
468475
return bit_pos[folded * 0x78291ACF >> 26];
469476
}
470477
#endif

librhash/byte_order.c

Lines changed: 41 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -37,11 +37,17 @@ unsigned rhash_ctz(unsigned x)
3737
# else /* _MSC_VER >= 1300... */
3838

3939
/**
40-
* Returns index of the trailing bit of a 32-bit number.
41-
* This is a plain C equivalent for GCC __builtin_ctz() bit scan.
40+
* Returns index of the least significant set bit in a 32-bit number.
41+
* This operation is also known as Count Trailing Zeros (CTZ).
4242
*
43-
* @param x the number to process
44-
* @return zero-based index of the trailing bit
43+
* The function is a portable, branch-free equivalent of GCC's __builtin_ctz(),
44+
* using a De Bruijn sequence for constant-time lookup.
45+
*
46+
* @param x 32-bit unsigned integer to analyze (must not be zero)
47+
* @return zero-based index of the least significant set bit (0 to 31)
48+
*
49+
* @note Undefined behavior when `x == 0`. The current implementation
50+
* returns 0, but this value must not be relied upon.
4551
*/
4652
unsigned rhash_ctz(unsigned x)
4753
{
@@ -64,23 +70,40 @@ unsigned rhash_ctz(unsigned x)
6470

6571
#ifndef rhash_ctz64
6672
/**
67-
* Returns index of the trailing bit of a 64-bit number.
68-
* This is a plain C equivalent for GCC __builtin_ctzll() bit scan.
69-
* Original author: Matt Taylor (2003).
73+
* Returns the zero-based index of the least significant set bit in a 64-bit number.
74+
* This operation is also known as Count Trailing Zeros (CTZ).
7075
*
71-
* @param x the number to process
72-
* @return zero-based index of the trailing bit
76+
* The function is a portable, branch-free equivalent of GCC's __builtin_ctzll().
77+
* Uses a 32-bit optimized implementation with magic constant `0x78291ACF`,
78+
* based on Matt Taylor's original algorithm (2003).
79+
*
80+
* @param x 64-bit unsigned integer to analyze (must not be zero)
81+
* @return zero-based index of the least significant set bit (0 to 63)
82+
*
83+
* @note Undefined behavior when `x == 0`. The current implementation
84+
* returns 63, but this value must not be relied upon.
85+
* @see rhash_ctz() for 32-bit version.
7386
*/
7487
unsigned rhash_ctz64(uint64_t x)
7588
{
76-
/* array for conversion to bit position */
89+
/* lookup table mapping hash values to bit position */
7790
static unsigned char bit_pos[64] = {
7891
63, 30, 3, 32, 59, 14, 11, 33, 60, 24, 50, 9, 55, 19, 21, 34,
7992
61, 29, 2, 53, 51, 23, 41, 18, 56, 28, 1, 43, 46, 27, 0, 35,
8093
62, 31, 58, 4, 5, 49, 54, 6, 15, 52, 12, 40, 7, 42, 45, 16,
8194
25, 57, 48, 13, 10, 39, 8, 44, 20, 47, 38, 22, 17, 37, 36, 26
8295
};
83-
uint32_t folded = (uint32_t)(((x - 1) >> 32) ^ (x - 1));
96+
/* transform 0b01000 -> 0b01111 (isolate least significant bit) */
97+
x ^= x - 1;
98+
/* fold 64-bit value to 32-bit to be efficient on 32-bit systems */
99+
uint32_t folded = (uint32_t)((x >> 32) ^ x);
100+
/* Use Matt Taylor's multiplication trick (2003):
101+
* - multiply by (specially chosen) magic constant 0x78291ACF
102+
* - use top 6 bits of result (>>26) as table index
103+
* Original discussion:
104+
* https://groups.google.com/g/comp.lang.asm.x86/c/3pVGzQGb1ys/m/fPpKBKNi848J
105+
* https://groups.google.com/g/comp.lang.asm.x86/c/3pVGzQGb1ys/m/230qffQJYvQJ
106+
*/
84107
return bit_pos[folded * 0x78291ACF >> 26];
85108
}
86109
#endif /* rhash_ctz64 */
@@ -94,10 +117,10 @@ unsigned rhash_ctz64(uint64_t x)
94117
*/
95118
unsigned rhash_popcount(unsigned x)
96119
{
97-
x -= (x >>1) & 0x55555555;
98-
x = ((x >> 2) & 0x33333333) + (x & 0x33333333);
99-
x = ((x >> 4) + x) & 0x0f0f0f0f;
100-
return (x * 0x01010101) >> 24;
120+
x -= (x >>1) & 0x55555555;
121+
x = ((x >> 2) & 0x33333333) + (x & 0x33333333);
122+
x = ((x >> 4) + x) & 0x0f0f0f0f;
123+
return (x * 0x01010101) >> 24;
101124
}
102125
#endif /* rhash_popcount */
103126

@@ -216,10 +239,10 @@ void rhash_u32_mem_swap(unsigned* arr, int length)
216239
# if defined(HAS_GCC_INTEL_CPUID)
217240
# include <cpuid.h>
218241
# define RHASH_CPUID(id, regs) \
219-
__get_cpuid(id, &(regs[0]), &(regs[1]), &(regs[2]), &(regs[3]));
242+
__get_cpuid(id, &(regs[0]), &(regs[1]), &(regs[2]), &(regs[3]));
220243
# if HAS_GNUC(6, 3)
221244
# define RHASH_CPUIDEX(id, sub_id, regs) \
222-
__get_cpuid_count(id, sub_id, &regs[0], &regs[1], &regs[2], &regs[3]);
245+
__get_cpuid_count(id, sub_id, &regs[0], &regs[1], &regs[2], &regs[3]);
223246
# endif
224247
# elif defined(HAS_MSVC_INTEL_CPUID)
225248
# define RHASH_CPUID(id, regs) __cpuid((int*)regs, id)
@@ -245,7 +268,7 @@ static uint64_t get_cpuid_features(void)
245268
if (cpu_info[0] >= 7)
246269
{
247270
/* Request CPUID AX=7 CX=0 to get SHANI bit */
248-
RHASH_CPUIDEX(7, 0, cpu_info);
271+
RHASH_CPUIDEX(7, 0, cpu_info);
249272
result |= (cpu_info[1] & (1 << 29));
250273
}
251274
#endif

0 commit comments

Comments
 (0)