MINOR: stick-tables: limit the number of visited nodes during expiration

As reported by Felipe in GH issue #3084, on large systems it's not
sufficient to leave the expiration process after a certain number of
expired entries, because if they accumulate too fast, it's possible
to still spend some time visiting many (e.g. those still in use),
which takes time.

Thus here we're taking a stricter approach consisting in counting the
number of visited entries, which allows to leave early if we can't do
the expected work in a reasonable amount of time.

In order to avoid always stopping on first shards and never visiting
last ones, we're always starting from a random shard number and looping
from that one. This way even if we always leave early, all shards will
be handled equally.

This should be backported to 3.2.
This commit is contained in:
Willy Tarreau 2025-09-03 10:45:30 +02:00
parent 2421c3769a
commit 696793205b

View File

@ -898,24 +898,24 @@ struct task *process_table_expire(struct task *task, void *context, unsigned int
struct stktable *t = context; struct stktable *t = context;
struct stksess *ts; struct stksess *ts;
struct eb32_node *eb; struct eb32_node *eb;
int need_resched = 0;
int updt_locked; int updt_locked;
int expired; int to_visit = STKTABLE_MAX_UPDATES_AT_ONCE;
int looped; int looped;
int exp_next; int exp_next;
int task_exp; int task_exp;
int shard; int shard, init_shard;
task_exp = TICK_ETERNITY; task_exp = TICK_ETERNITY;
for (shard = 0; shard < CONFIG_HAP_TBL_BUCKETS; shard++) { /* start from a random shard number to avoid starvation in the last ones */
shard = init_shard = statistical_prng_range(CONFIG_HAP_TBL_BUCKETS - 1);
do {
updt_locked = 0; updt_locked = 0;
looped = 0; looped = 0;
HA_RWLOCK_WRLOCK(STK_TABLE_LOCK, &t->shards[shard].sh_lock); HA_RWLOCK_WRLOCK(STK_TABLE_LOCK, &t->shards[shard].sh_lock);
eb = eb32_lookup_ge(&t->shards[shard].exps, now_ms - TIMER_LOOK_BACK); eb = eb32_lookup_ge(&t->shards[shard].exps, now_ms - TIMER_LOOK_BACK);
expired = 0;
while (1) { while (to_visit >= 0) {
if (unlikely(!eb)) { if (unlikely(!eb)) {
/* we might have reached the end of the tree, typically because /* we might have reached the end of the tree, typically because
* <now_ms> is in the first half and we're first scanning the last * <now_ms> is in the first half and we're first scanning the last
@ -936,19 +936,12 @@ struct task *process_table_expire(struct task *task, void *context, unsigned int
goto out_unlock; goto out_unlock;
} }
to_visit--;
/* timer looks expired, detach it from the queue */ /* timer looks expired, detach it from the queue */
ts = eb32_entry(eb, struct stksess, exp); ts = eb32_entry(eb, struct stksess, exp);
eb = eb32_next(eb); eb = eb32_next(eb);
if (updt_locked == 1) {
expired++;
if (expired == STKTABLE_MAX_UPDATES_AT_ONCE) {
need_resched = 1;
exp_next = TICK_ETERNITY;
goto out_unlock;
}
}
/* This entry's key is expired, we must delete it. It /* This entry's key is expired, we must delete it. It
* may be properly requeued if the element is still in * may be properly requeued if the element is still in
* use or not really expired though. * use or not really expired though.
@ -1011,9 +1004,13 @@ struct task *process_table_expire(struct task *task, void *context, unsigned int
task_exp = tick_first(task_exp, exp_next); task_exp = tick_first(task_exp, exp_next);
HA_RWLOCK_WRUNLOCK(STK_TABLE_LOCK, &t->shards[shard].sh_lock); HA_RWLOCK_WRUNLOCK(STK_TABLE_LOCK, &t->shards[shard].sh_lock);
}
if (need_resched) { shard++;
if (shard >= CONFIG_HAP_TBL_BUCKETS)
shard = 0;
} while (to_visit > 0 && shard != init_shard);
if (to_visit <= 0) {
task_wakeup(task, TASK_WOKEN_OTHER); task_wakeup(task, TASK_WOKEN_OTHER);
} else { } else {
/* Reset the task's expiration. We do this under the lock so as not /* Reset the task's expiration. We do this under the lock so as not