IMPORT: eb32/eb64: use a more parallelizable check for lack of common bits

Instead of shifting the XOR value right and comparing it to 1, which
roughly requires 2 sequential instructions, better test if the XOR has
any bit above the current bit, which means any bit set among those
strictly higher, or in other words that XOR & (-bit << 1) is non-zero.
This is one less instruction in the fast path and gives another nice
performance gain on random keys (in million lookups/s):

    eb32   1k:  33.17 -> 37.30   +12.5%
          10k:  15.74 -> 17.08   +8.51%
         100k:   8.00 ->  9.00   +12.5%
    eb64   1k:  34.40 -> 38.10   +10.8%
          10k:  16.17 -> 17.10   +5.75%
         100k:   8.38 ->  8.87   +5.85%

This is ebtree commit c942a2771758eed4f4584fe23cf2914573817a6b.
This commit is contained in:
Willy Tarreau 2025-06-07 13:12:40 +02:00
parent 6af17d491f
commit c9e4adf608
2 changed files with 4 additions and 4 deletions

View File

@ -161,7 +161,7 @@ static forceinline struct eb32_node *__eb32_lookup(struct eb_root *root, u32 x)
return node;
}
if ((y >> node_bit) >= EB_NODE_BRANCHES)
if (y & -(z << 1))
return NULL; /* no more common bits */
}
}
@ -217,7 +217,7 @@ static forceinline struct eb32_node *__eb32i_lookup(struct eb_root *root, s32 x)
return node;
}
if ((y >> node_bit) >= EB_NODE_BRANCHES)
if (y & -(z << 1))
return NULL; /* no more common bits */
}
}

View File

@ -159,7 +159,7 @@ static forceinline struct eb64_node *__eb64_lookup(struct eb_root *root, u64 x)
return node;
}
if ((y >> node->node.bit) >= EB_NODE_BRANCHES)
if (y & -(z << 1))
return NULL; /* no more common bits */
}
}
@ -213,7 +213,7 @@ static forceinline struct eb64_node *__eb64i_lookup(struct eb_root *root, s64 x)
return node;
}
if ((y >> node->node.bit) >= EB_NODE_BRANCHES)
if (y & -(z << 1))
return NULL; /* no more common bits */
}
}