From c9e4adf608603b1cb9a10f999697097814d6602e Mon Sep 17 00:00:00 2001 From: Willy Tarreau Date: Sat, 7 Jun 2025 13:12:40 +0200 Subject: [PATCH] IMPORT: eb32/eb64: use a more parallelizable check for lack of common bits Instead of shifting the XOR value right and comparing it to 1, which roughly requires 2 sequential instructions, better test if the XOR has any bit above the current bit, which means any bit set among those strictly higher, or in other words that XOR & (-bit << 1) is non-zero. This is one less instruction in the fast path and gives another nice performance gain on random keys (in million lookups/s): eb32 1k: 33.17 -> 37.30 +12.5% 10k: 15.74 -> 17.08 +8.51% 100k: 8.00 -> 9.00 +12.5% eb64 1k: 34.40 -> 38.10 +10.8% 10k: 16.17 -> 17.10 +5.75% 100k: 8.38 -> 8.87 +5.85% This is ebtree commit c942a2771758eed4f4584fe23cf2914573817a6b. --- include/import/eb32tree.h | 4 ++-- include/import/eb64tree.h | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/include/import/eb32tree.h b/include/import/eb32tree.h index be45e19aa..6b52945c5 100644 --- a/include/import/eb32tree.h +++ b/include/import/eb32tree.h @@ -161,7 +161,7 @@ static forceinline struct eb32_node *__eb32_lookup(struct eb_root *root, u32 x) return node; } - if ((y >> node_bit) >= EB_NODE_BRANCHES) + if (y & -(z << 1)) return NULL; /* no more common bits */ } } @@ -217,7 +217,7 @@ static forceinline struct eb32_node *__eb32i_lookup(struct eb_root *root, s32 x) return node; } - if ((y >> node_bit) >= EB_NODE_BRANCHES) + if (y & -(z << 1)) return NULL; /* no more common bits */ } } diff --git a/include/import/eb64tree.h b/include/import/eb64tree.h index 75c13c367..baaeb1dfe 100644 --- a/include/import/eb64tree.h +++ b/include/import/eb64tree.h @@ -159,7 +159,7 @@ static forceinline struct eb64_node *__eb64_lookup(struct eb_root *root, u64 x) return node; } - if ((y >> node->node.bit) >= EB_NODE_BRANCHES) + if (y & -(z << 1)) return NULL; /* no more common bits */ } } @@ -213,7 +213,7 @@ static forceinline struct eb64_node *__eb64i_lookup(struct eb_root *root, s64 x) return node; } - if ((y >> node->node.bit) >= EB_NODE_BRANCHES) + if (y & -(z << 1)) return NULL; /* no more common bits */ } }