IMPORT: ebmb: optimize the lookup for modern CPUs

This is the same principles as for the latest improvements made on
integer trees. Applying the same recipes made the ebmb_lookup()
function jump from 10.07 to 12.25 million lookups per second on a
10k random values tree (+21.6%).

It's likely that the ebmb_lookup_longest() code could also benefit
from this, though this was neither explored nor tested.

This is ebtree commit a159731fd6b91648a2fef3b953feeb830438c924.
This commit is contained in:
Willy Tarreau 2025-06-08 11:50:59 +02:00
parent 6c54bf7295
commit 61654c07bd

View File

@ -184,6 +184,9 @@ static forceinline struct ebmb_node *__ebmb_lookup(struct eb_root *root, const v
pos = 0;
while (1) {
void *b0, *b1;
unsigned char k, b;
if (eb_gettag(troot) == EB_LEAF) {
node = container_of(eb_untag(troot, EB_LEAF),
struct ebmb_node, node.branches);
@ -234,11 +237,19 @@ static forceinline struct ebmb_node *__ebmb_lookup(struct eb_root *root, const v
* - more than the last bit differs => return NULL
* - walk down on side = (x[pos] >> node_bit) & 1
*/
side = *(unsigned char *)x >> node_bit;
if (((node->key[pos] >> node_bit) ^ side) > 1)
b = *(unsigned char *)x;
side = 1 << node_bit;
__builtin_prefetch(node->node.branches.b[0], 0);
__builtin_prefetch(node->node.branches.b[1], 0);
k = node->key[pos];
b0 = node->node.branches.b[0];
b1 = node->node.branches.b[1];
troot = (b & side) ? b1 : b0;
if ((k ^ b) & -(side << 1))
goto ret_null;
side &= 1;
troot = node->node.branches.b[side];
}
walk_left:
troot = node->node.branches.b[EB_LEFT];