@@ -37,11 +37,17 @@ unsigned rhash_ctz(unsigned x)
37
37
# else /* _MSC_VER >= 1300... */
38
38
39
39
/**
40
- * Returns index of the trailing bit of a 32-bit number.
41
- * This is a plain C equivalent for GCC __builtin_ctz() bit scan .
40
+ * Returns index of the least significant set bit in a 32-bit number.
41
+ * This operation is also known as Count Trailing Zeros (CTZ) .
42
42
*
43
- * @param x the number to process
44
- * @return zero-based index of the trailing bit
43
+ * The function is a portable, branch-free equivalent of GCC's __builtin_ctz(),
44
+ * using a De Bruijn sequence for constant-time lookup.
45
+ *
46
+ * @param x 32-bit unsigned integer to analyze (must not be zero)
47
+ * @return zero-based index of the least significant set bit (0 to 31)
48
+ *
49
+ * @note Undefined behavior when `x == 0`. The current implementation
50
+ * returns 0, but this value must not be relied upon.
45
51
*/
46
52
unsigned rhash_ctz (unsigned x )
47
53
{
@@ -64,23 +70,40 @@ unsigned rhash_ctz(unsigned x)
64
70
65
71
#ifndef rhash_ctz64
66
72
/**
67
- * Returns index of the trailing bit of a 64-bit number.
68
- * This is a plain C equivalent for GCC __builtin_ctzll() bit scan.
69
- * Original author: Matt Taylor (2003).
73
+ * Returns the zero-based index of the least significant set bit in a 64-bit number.
74
+ * This operation is also known as Count Trailing Zeros (CTZ).
70
75
*
71
- * @param x the number to process
72
- * @return zero-based index of the trailing bit
76
+ * The function is a portable, branch-free equivalent of GCC's __builtin_ctzll().
77
+ * Uses a 32-bit optimized implementation with magic constant `0x78291ACF`,
78
+ * based on Matt Taylor's original algorithm (2003).
79
+ *
80
+ * @param x 64-bit unsigned integer to analyze (must not be zero)
81
+ * @return zero-based index of the least significant set bit (0 to 63)
82
+ *
83
+ * @note Undefined behavior when `x == 0`. The current implementation
84
+ * returns 63, but this value must not be relied upon.
85
+ * @see rhash_ctz() for 32-bit version.
73
86
*/
74
87
unsigned rhash_ctz64 (uint64_t x )
75
88
{
76
- /* array for conversion to bit position */
89
+ /* lookup table mapping hash values to bit position */
77
90
static unsigned char bit_pos [64 ] = {
78
91
63 , 30 , 3 , 32 , 59 , 14 , 11 , 33 , 60 , 24 , 50 , 9 , 55 , 19 , 21 , 34 ,
79
92
61 , 29 , 2 , 53 , 51 , 23 , 41 , 18 , 56 , 28 , 1 , 43 , 46 , 27 , 0 , 35 ,
80
93
62 , 31 , 58 , 4 , 5 , 49 , 54 , 6 , 15 , 52 , 12 , 40 , 7 , 42 , 45 , 16 ,
81
94
25 , 57 , 48 , 13 , 10 , 39 , 8 , 44 , 20 , 47 , 38 , 22 , 17 , 37 , 36 , 26
82
95
};
83
- uint32_t folded = (uint32_t )(((x - 1 ) >> 32 ) ^ (x - 1 ));
96
+ /* transform 0b01000 -> 0b01111 (isolate least significant bit) */
97
+ x ^= x - 1 ;
98
+ /* fold 64-bit value to 32-bit to be efficient on 32-bit systems */
99
+ uint32_t folded = (uint32_t )((x >> 32 ) ^ x );
100
+ /* Use Matt Taylor's multiplication trick (2003):
101
+ * - multiply by (specially chosen) magic constant 0x78291ACF
102
+ * - use top 6 bits of result (>>26) as table index
103
+ * Original discussion:
104
+ * https://groups.google.com/g/comp.lang.asm.x86/c/3pVGzQGb1ys/m/fPpKBKNi848J
105
+ * https://groups.google.com/g/comp.lang.asm.x86/c/3pVGzQGb1ys/m/230qffQJYvQJ
106
+ */
84
107
return bit_pos [folded * 0x78291ACF >> 26 ];
85
108
}
86
109
#endif /* rhash_ctz64 */
@@ -94,10 +117,10 @@ unsigned rhash_ctz64(uint64_t x)
94
117
*/
95
118
unsigned rhash_popcount (unsigned x )
96
119
{
97
- x -= (x >>1 ) & 0x55555555 ;
98
- x = ((x >> 2 ) & 0x33333333 ) + (x & 0x33333333 );
99
- x = ((x >> 4 ) + x ) & 0x0f0f0f0f ;
100
- return (x * 0x01010101 ) >> 24 ;
120
+ x -= (x >>1 ) & 0x55555555 ;
121
+ x = ((x >> 2 ) & 0x33333333 ) + (x & 0x33333333 );
122
+ x = ((x >> 4 ) + x ) & 0x0f0f0f0f ;
123
+ return (x * 0x01010101 ) >> 24 ;
101
124
}
102
125
#endif /* rhash_popcount */
103
126
@@ -216,10 +239,10 @@ void rhash_u32_mem_swap(unsigned* arr, int length)
216
239
# if defined(HAS_GCC_INTEL_CPUID )
217
240
# include <cpuid.h>
218
241
# define RHASH_CPUID (id , regs ) \
219
- __get_cpuid(id, &(regs[0]), &(regs[1]), &(regs[2]), &(regs[3]));
242
+ __get_cpuid(id, &(regs[0]), &(regs[1]), &(regs[2]), &(regs[3]));
220
243
# if HAS_GNUC (6 , 3 )
221
244
# define RHASH_CPUIDEX (id , sub_id , regs ) \
222
- __get_cpuid_count(id, sub_id, ®s[0], ®s[1], ®s[2], ®s[3]);
245
+ __get_cpuid_count(id, sub_id, ®s[0], ®s[1], ®s[2], ®s[3]);
223
246
# endif
224
247
# elif defined(HAS_MSVC_INTEL_CPUID )
225
248
# define RHASH_CPUID (id , regs ) __cpuid((int*)regs, id)
@@ -245,7 +268,7 @@ static uint64_t get_cpuid_features(void)
245
268
if (cpu_info [0 ] >= 7 )
246
269
{
247
270
/* Request CPUID AX=7 CX=0 to get SHANI bit */
248
- RHASH_CPUIDEX (7 , 0 , cpu_info );
271
+ RHASH_CPUIDEX (7 , 0 , cpu_info );
249
272
result |= (cpu_info [1 ] & (1 << 29 ));
250
273
}
251
274
#endif
0 commit comments