From cafa56ecd6d1a2d15e7c47d5ebbdb82d7be3f689 Mon Sep 17 00:00:00 2001 From: Willy Tarreau Date: Sat, 2 Feb 2019 20:17:31 +0100 Subject: [PATCH] MINOR: tools: improve the popcount() operation We'll call popcount() more often so better use a parallel method than an iterative one. One optimal design is proposed at the site below. It requires a fast multiplication though, but even without it will still be faster than the iterative one, and all relevant 64 bit platforms do have a multiply unit. https://graphics.stanford.edu/~seander/bithacks.html --- include/common/standard.h | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/include/common/standard.h b/include/common/standard.h index dc771477b..4f4b2d509 100644 --- a/include/common/standard.h +++ b/include/common/standard.h @@ -790,15 +790,15 @@ static inline unsigned int div64_32(unsigned long long o1, unsigned int o2) return result; } -/* Simple popcountl implementation. It returns the number of ones in a word */ +/* Simple popcountl implementation. It returns the number of ones in a word. + * Described here : https://graphics.stanford.edu/~seander/bithacks.html + */ static inline unsigned int my_popcountl(unsigned long a) { - unsigned int cnt; - for (cnt = 0; a; a >>= 1) { - if (a & 1) - cnt++; - } - return cnt; + a = a - ((a >> 1) & ~0UL/3); + a = (a & ~0UL/15*3) + ((a >> 2) & ~0UL/15*3); + a = (a + (a >> 4)) & ~0UL/255*15; + return (unsigned long)(a * (~0UL/255)) >> (sizeof(unsigned long) - 1) * 8; } /* returns non-zero if has at least 2 bits set */