MINOR: stick-tables: limit the number of visited nodes during expiration

As reported by Felipe in GH issue #3084, on large systems it's not sufficient to leave the expiration process after a certain number of expired entries, because if they accumulate too fast, it's possible to still spend some time visiting many (e.g. those still in use), which takes time. Thus here we're taking a stricter approach consisting in counting the number of visited entries, which allows to leave early if we can't do the expected work in a reasonable amount of time. In order to avoid always stopping on first shards and never visiting last ones, we're always starting from a random shard number and looping from that one. This way even if we always leave early, all shards will be handled equally. This should be backported to 3.2.
2025-11-18 17:31:30 +01:00 · 2025-09-03 10:45:30 +02:00 · 2025-09-03 10:45:30 +02:00 · 696793205b
commit 696793205b
parent 2421c3769a
1 changed files with 14 additions and 17 deletions
--- a/src/stick_table.c
+++ b/src/stick_table.c
@ -898,24 +898,24 @@ struct task *process_table_expire(struct task *task, void *context, unsigned int
 	struct stktable *t = context;
 	struct stksess *ts;
 	struct eb32_node *eb;
 	int need_resched = 0;
 	int updt_locked;
-	int expired;
+	int to_visit = STKTABLE_MAX_UPDATES_AT_ONCE;
 	int looped;
 	int exp_next;
 	int task_exp;
-	int shard;
+	int shard, init_shard;
 	task_exp = TICK_ETERNITY;
-	for (shard = 0; shard < CONFIG_HAP_TBL_BUCKETS; shard++) {
+	/* start from a random shard number to avoid starvation in the last ones */
 	shard = init_shard = statistical_prng_range(CONFIG_HAP_TBL_BUCKETS - 1);
 	do {
 		updt_locked = 0;
 		looped = 0;
 		HA_RWLOCK_WRLOCK(STK_TABLE_LOCK, &t->shards[shard].sh_lock);
 		eb = eb32_lookup_ge(&t->shards[shard].exps, now_ms - TIMER_LOOK_BACK);
 		expired = 0;
-		while (1) {
+		while (to_visit >= 0) {
 			if (unlikely(!eb)) {
 				/* we might have reached the end of the tree, typically because
 				 * <now_ms> is in the first half and we're first scanning the last
@ -936,19 +936,12 @@ struct task *process_table_expire(struct task *task, void *context, unsigned int
 				goto out_unlock;
 			}
 			to_visit--;
 			/* timer looks expired, detach it from the queue */
 			ts = eb32_entry(eb, struct stksess, exp);
 			eb = eb32_next(eb);
 			if (updt_locked == 1) {
 				expired++;
 				if (expired == STKTABLE_MAX_UPDATES_AT_ONCE) {
 					need_resched = 1;
 					exp_next = TICK_ETERNITY;
 					goto out_unlock;
 				}
 			}
 			/* This entry's key is expired, we must delete it. It
 			 * may be properly requeued if the element is still in
 			 * use or not really expired though.
@ -1011,9 +1004,13 @@ struct task *process_table_expire(struct task *task, void *context, unsigned int
 		task_exp = tick_first(task_exp, exp_next);
 		HA_RWLOCK_WRUNLOCK(STK_TABLE_LOCK, &t->shards[shard].sh_lock);
 	}
-	if (need_resched) {
+		shard++;
 		if (shard >= CONFIG_HAP_TBL_BUCKETS)
 			shard = 0;
 	} while (to_visit > 0 && shard != init_shard);
 	if (to_visit <= 0) {
 		task_wakeup(task, TASK_WOKEN_OTHER);
 	} else {
 		/* Reset the task's expiration. We do this under the lock so as not