mirror of
https://git.haproxy.org/git/haproxy.git/
synced 2025-09-20 13:21:29 +02:00
The current code calculates the next troot based on a calculation. This was efficient when the algorithm was developed many years ago on K6 and K7 CPUs running at low frequencies with few registers and limited branch prediction units but nowadays with ultra-deep pipelines and high latency memory that's no longer efficient, because the CPU needs to have completed multiple operations before knowing which address to start fetching from. It's sad because we only have two branches each time but the CPU cannot know it. In addition, the calculation is performed late in the loop, which does not help the address generation unit to start prefetching next data. Instead we should help the CPU by preloading data early from the node and calculing troot as soon as possible. The CPU will be able to postpone that processing until the dependencies are available and it really needs to dereference it. In addition we must absolutely avoid serializing instructions such as "(a >> b) & 1" because there's no way for the compiler to parallelize that code nor for the CPU to pre- process some early data. What this patch does is relatively simple: - we try to prefetch the next two branches as soon as the node is known, which will help dereference the selected node in the next iteration; it was shown that it only works with the next changes though, otherwise it can reduce the performance instead. In practice the prefetching will start a bit later once the node is really in the cache, but since there's no dependency between these instructions and any other one, we let the CPU optimize as it wants. - we preload all important data from the node (next two branches, key and node.bit) very early even if not immediately needed. This is cheap, it doesn't cause any pipeline stall and speeds up later operations. - we pre-calculate 1<<bit that we assign into a register, so as to avoid serializing instructions when deciding which branch to take. - we assign the troot based on a ternary operation (or if/else) so that the CPU knows upfront the two possible next addresses without waiting for the end of a calculation and can prefetch their contents every time the branch prediction unit guesses right. Just doing this provides significant gains at various tree sizes on random keys (in million lookups per second): eb32 1k: 29.07 -> 33.17 +14.1% 10k: 14.27 -> 15.74 +10.3% 100k: 6.64 -> 8.00 +20.5% eb64 1k: 27.51 -> 34.40 +25.0% 10k: 13.54 -> 16.17 +19.4% 100k: 7.53 -> 8.38 +11.3% The performance is now much closer to the sequential keys. This was done for all variants ({32,64}{,i,le,ge}). Another point, the equality test in the loop improves the performance when looking up random keys (since we don't need to reach the leaf), but is counter-productive for sequential keys, which can gain ~17% without that test. However sequential keys are normally not used with exact lookups, but rather with lookup_ge() that spans a time frame, and which does not have that test for this precise reason, so in the end both use cases are served optimally. It's interesting to note that everything here is solely based on data dependencies, and that trying to perform *less* operations upfront always ends up with lower performance (typically the original one). This is ebtree commit 05a0613e97f51b6665ad5ae2801199ad55991534.
233 lines
7.1 KiB
C
233 lines
7.1 KiB
C
/*
|
|
* Elastic Binary Trees - exported functions for operations on 32bit nodes.
|
|
* Version 6.0.6
|
|
* (C) 2002-2011 - Willy Tarreau <w@1wt.eu>
|
|
*
|
|
* This library is free software; you can redistribute it and/or
|
|
* modify it under the terms of the GNU Lesser General Public
|
|
* License as published by the Free Software Foundation, version 2.1
|
|
* exclusively.
|
|
*
|
|
* This library is distributed in the hope that it will be useful,
|
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
|
* Lesser General Public License for more details.
|
|
*
|
|
* You should have received a copy of the GNU Lesser General Public
|
|
* License along with this library; if not, write to the Free Software
|
|
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
|
*/
|
|
|
|
/* Consult eb32tree.h for more details about those functions */
|
|
|
|
#include <import/eb32tree.h>
|
|
|
|
struct eb32_node *eb32_insert(struct eb_root *root, struct eb32_node *new)
|
|
{
|
|
return __eb32_insert(root, new);
|
|
}
|
|
|
|
struct eb32_node *eb32i_insert(struct eb_root *root, struct eb32_node *new)
|
|
{
|
|
return __eb32i_insert(root, new);
|
|
}
|
|
|
|
struct eb32_node *eb32_lookup(struct eb_root *root, u32 x)
|
|
{
|
|
return __eb32_lookup(root, x);
|
|
}
|
|
|
|
struct eb32_node *eb32i_lookup(struct eb_root *root, s32 x)
|
|
{
|
|
return __eb32i_lookup(root, x);
|
|
}
|
|
|
|
/*
|
|
* Find the last occurrence of the highest key in the tree <root>, which is
|
|
* equal to or less than <x>. NULL is returned is no key matches.
|
|
*/
|
|
struct eb32_node *eb32_lookup_le(struct eb_root *root, u32 x)
|
|
{
|
|
struct eb32_node *node;
|
|
eb_troot_t *troot;
|
|
u32 y, z;
|
|
|
|
troot = root->b[EB_LEFT];
|
|
if (unlikely(troot == NULL))
|
|
return NULL;
|
|
|
|
while (1) {
|
|
if ((eb_gettag(troot) == EB_LEAF)) {
|
|
/* We reached a leaf, which means that the whole upper
|
|
* parts were common. We will return either the current
|
|
* node or its next one if the former is too small.
|
|
*/
|
|
node = container_of(eb_untag(troot, EB_LEAF),
|
|
struct eb32_node, node.branches);
|
|
if (node->key <= x)
|
|
return node;
|
|
/* return prev */
|
|
troot = node->node.leaf_p;
|
|
break;
|
|
}
|
|
node = container_of(eb_untag(troot, EB_NODE),
|
|
struct eb32_node, node.branches);
|
|
|
|
__builtin_prefetch(node->node.branches.b[0], 0);
|
|
__builtin_prefetch(node->node.branches.b[1], 0);
|
|
|
|
y = node->key;
|
|
z = 1U << (node->node.bit & 31);
|
|
troot = (x & z) ? node->node.branches.b[1] : node->node.branches.b[0];
|
|
|
|
if (node->node.bit < 0) {
|
|
/* We're at the top of a dup tree. Either we got a
|
|
* matching value and we return the rightmost node, or
|
|
* we don't and we skip the whole subtree to return the
|
|
* prev node before the subtree. Note that since we're
|
|
* at the top of the dup tree, we can simply return the
|
|
* prev node without first trying to escape from the
|
|
* tree.
|
|
*/
|
|
if (node->key <= x) {
|
|
troot = node->node.branches.b[EB_RGHT];
|
|
while (eb_gettag(troot) != EB_LEAF)
|
|
troot = (eb_untag(troot, EB_NODE))->b[EB_RGHT];
|
|
return container_of(eb_untag(troot, EB_LEAF),
|
|
struct eb32_node, node.branches);
|
|
}
|
|
/* return prev */
|
|
troot = node->node.node_p;
|
|
break;
|
|
}
|
|
|
|
if ((x ^ y) & -(z << 1)) {
|
|
/* No more common bits at all. Either this node is too
|
|
* small and we need to get its highest value, or it is
|
|
* too large, and we need to get the prev value.
|
|
*/
|
|
if ((node->key >> node->node.bit) < (x >> node->node.bit)) {
|
|
troot = node->node.branches.b[EB_RGHT];
|
|
return eb32_entry(eb_walk_down(troot, EB_RGHT), struct eb32_node, node);
|
|
}
|
|
|
|
/* Further values will be too high here, so return the prev
|
|
* unique node (if it exists).
|
|
*/
|
|
troot = node->node.node_p;
|
|
break;
|
|
}
|
|
}
|
|
|
|
/* If we get here, it means we want to report previous node before the
|
|
* current one which is not above. <troot> is already initialised to
|
|
* the parent's branches.
|
|
*/
|
|
while (eb_gettag(troot) == EB_LEFT) {
|
|
/* Walking up from left branch. We must ensure that we never
|
|
* walk beyond root.
|
|
*/
|
|
if (unlikely(eb_clrtag((eb_untag(troot, EB_LEFT))->b[EB_RGHT]) == NULL))
|
|
return NULL;
|
|
troot = (eb_root_to_node(eb_untag(troot, EB_LEFT)))->node_p;
|
|
}
|
|
/* Note that <troot> cannot be NULL at this stage */
|
|
troot = (eb_untag(troot, EB_RGHT))->b[EB_LEFT];
|
|
node = eb32_entry(eb_walk_down(troot, EB_RGHT), struct eb32_node, node);
|
|
return node;
|
|
}
|
|
|
|
/*
|
|
* Find the first occurrence of the lowest key in the tree <root>, which is
|
|
* equal to or greater than <x>. NULL is returned is no key matches.
|
|
*/
|
|
struct eb32_node *eb32_lookup_ge(struct eb_root *root, u32 x)
|
|
{
|
|
struct eb32_node *node;
|
|
eb_troot_t *troot;
|
|
u32 y, z;
|
|
|
|
troot = root->b[EB_LEFT];
|
|
if (unlikely(troot == NULL))
|
|
return NULL;
|
|
|
|
while (1) {
|
|
if ((eb_gettag(troot) == EB_LEAF)) {
|
|
/* We reached a leaf, which means that the whole upper
|
|
* parts were common. We will return either the current
|
|
* node or its next one if the former is too small.
|
|
*/
|
|
node = container_of(eb_untag(troot, EB_LEAF),
|
|
struct eb32_node, node.branches);
|
|
if (node->key >= x)
|
|
return node;
|
|
/* return next */
|
|
troot = node->node.leaf_p;
|
|
break;
|
|
}
|
|
node = container_of(eb_untag(troot, EB_NODE),
|
|
struct eb32_node, node.branches);
|
|
|
|
__builtin_prefetch(node->node.branches.b[0], 0);
|
|
__builtin_prefetch(node->node.branches.b[1], 0);
|
|
|
|
y = node->key;
|
|
z = 1U << (node->node.bit & 31);
|
|
troot = (x & z) ? node->node.branches.b[1] : node->node.branches.b[0];
|
|
|
|
if (node->node.bit < 0) {
|
|
/* We're at the top of a dup tree. Either we got a
|
|
* matching value and we return the leftmost node, or
|
|
* we don't and we skip the whole subtree to return the
|
|
* next node after the subtree. Note that since we're
|
|
* at the top of the dup tree, we can simply return the
|
|
* next node without first trying to escape from the
|
|
* tree.
|
|
*/
|
|
if (node->key >= x) {
|
|
troot = node->node.branches.b[EB_LEFT];
|
|
while (eb_gettag(troot) != EB_LEAF)
|
|
troot = (eb_untag(troot, EB_NODE))->b[EB_LEFT];
|
|
return container_of(eb_untag(troot, EB_LEAF),
|
|
struct eb32_node, node.branches);
|
|
}
|
|
/* return next */
|
|
troot = node->node.node_p;
|
|
break;
|
|
}
|
|
|
|
if ((x ^ y) & -(z << 1)) {
|
|
/* No more common bits at all. Either this node is too
|
|
* large and we need to get its lowest value, or it is too
|
|
* small, and we need to get the next value.
|
|
*/
|
|
if ((node->key >> node->node.bit) > (x >> node->node.bit)) {
|
|
troot = node->node.branches.b[EB_LEFT];
|
|
return eb32_entry(eb_walk_down(troot, EB_LEFT), struct eb32_node, node);
|
|
}
|
|
|
|
/* Further values will be too low here, so return the next
|
|
* unique node (if it exists).
|
|
*/
|
|
troot = node->node.node_p;
|
|
break;
|
|
}
|
|
}
|
|
|
|
/* If we get here, it means we want to report next node after the
|
|
* current one which is not below. <troot> is already initialised
|
|
* to the parent's branches.
|
|
*/
|
|
while (eb_gettag(troot) != EB_LEFT)
|
|
/* Walking up from right branch, so we cannot be below root */
|
|
troot = (eb_root_to_node(eb_untag(troot, EB_RGHT)))->node_p;
|
|
|
|
/* Note that <troot> cannot be NULL at this stage */
|
|
troot = (eb_untag(troot, EB_LEFT))->b[EB_RGHT];
|
|
if (eb_clrtag(troot) == NULL)
|
|
return NULL;
|
|
|
|
node = eb32_entry(eb_walk_down(troot, EB_LEFT), struct eb32_node, node);
|
|
return node;
|
|
}
|