From 696793205b6f949aba63ebccdd99465ed6cb854c Mon Sep 17 00:00:00 2001 From: Willy Tarreau Date: Wed, 3 Sep 2025 10:45:30 +0200 Subject: [PATCH] MINOR: stick-tables: limit the number of visited nodes during expiration As reported by Felipe in GH issue #3084, on large systems it's not sufficient to leave the expiration process after a certain number of expired entries, because if they accumulate too fast, it's possible to still spend some time visiting many (e.g. those still in use), which takes time. Thus here we're taking a stricter approach consisting in counting the number of visited entries, which allows to leave early if we can't do the expected work in a reasonable amount of time. In order to avoid always stopping on first shards and never visiting last ones, we're always starting from a random shard number and looping from that one. This way even if we always leave early, all shards will be handled equally. This should be backported to 3.2. --- src/stick_table.c | 31 ++++++++++++++----------------- 1 file changed, 14 insertions(+), 17 deletions(-) diff --git a/src/stick_table.c b/src/stick_table.c index f9334213e..e6dbe99a4 100644 --- a/src/stick_table.c +++ b/src/stick_table.c @@ -898,24 +898,24 @@ struct task *process_table_expire(struct task *task, void *context, unsigned int struct stktable *t = context; struct stksess *ts; struct eb32_node *eb; - int need_resched = 0; int updt_locked; - int expired; + int to_visit = STKTABLE_MAX_UPDATES_AT_ONCE; int looped; int exp_next; int task_exp; - int shard; + int shard, init_shard; task_exp = TICK_ETERNITY; - for (shard = 0; shard < CONFIG_HAP_TBL_BUCKETS; shard++) { + /* start from a random shard number to avoid starvation in the last ones */ + shard = init_shard = statistical_prng_range(CONFIG_HAP_TBL_BUCKETS - 1); + do { updt_locked = 0; looped = 0; HA_RWLOCK_WRLOCK(STK_TABLE_LOCK, &t->shards[shard].sh_lock); eb = eb32_lookup_ge(&t->shards[shard].exps, now_ms - TIMER_LOOK_BACK); - expired = 0; - while (1) { + while (to_visit >= 0) { if (unlikely(!eb)) { /* we might have reached the end of the tree, typically because * is in the first half and we're first scanning the last @@ -936,19 +936,12 @@ struct task *process_table_expire(struct task *task, void *context, unsigned int goto out_unlock; } + to_visit--; + /* timer looks expired, detach it from the queue */ ts = eb32_entry(eb, struct stksess, exp); eb = eb32_next(eb); - if (updt_locked == 1) { - expired++; - if (expired == STKTABLE_MAX_UPDATES_AT_ONCE) { - need_resched = 1; - exp_next = TICK_ETERNITY; - goto out_unlock; - } - } - /* This entry's key is expired, we must delete it. It * may be properly requeued if the element is still in * use or not really expired though. @@ -1011,9 +1004,13 @@ struct task *process_table_expire(struct task *task, void *context, unsigned int task_exp = tick_first(task_exp, exp_next); HA_RWLOCK_WRUNLOCK(STK_TABLE_LOCK, &t->shards[shard].sh_lock); - } - if (need_resched) { + shard++; + if (shard >= CONFIG_HAP_TBL_BUCKETS) + shard = 0; + } while (to_visit > 0 && shard != init_shard); + + if (to_visit <= 0) { task_wakeup(task, TASK_WOKEN_OTHER); } else { /* Reset the task's expiration. We do this under the lock so as not