From a771b14541528a96060d1b711fcd7c94e9f0cfbf Mon Sep 17 00:00:00 2001 From: Willy Tarreau Date: Tue, 9 Sep 2025 15:03:52 +0200 Subject: [PATCH] MEDIUM: stick-tables: give up on lock contention in process_table_expire() process_table_expire() can take quite a lot of time running over all shards. During this time it will hinder track-sc rules and peers, which will experience an increased latency to do their work, especially peers where each message will cause a lock, whose cumulated time can exceed the watchdog's patience. Here, we proceed just like in stktable_trash_oldest(), which is that we're using a trylock to detect contention. The first time it happens, if we hadn't purged anything, we switch to a regular lock to perform the operation, and next time it happens we abort. This guarantees that some entries will be expired and that contention will be reduced with when detected. With this change, various tests didn't manage to produce any warning, including at the end of the load generation session. This should be backported to 3.2 after a bit more testing. --- src/stick_table.c | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) diff --git a/src/stick_table.c b/src/stick_table.c index a7129b2fd..90fc7b0e6 100644 --- a/src/stick_table.c +++ b/src/stick_table.c @@ -926,6 +926,8 @@ struct task *process_table_expire(struct task *task, void *context, unsigned int int exp_next; int task_exp; int shard, init_shard; + int failed_once = 0; + int purged = 0; task_exp = TICK_ETERNITY; @@ -934,7 +936,18 @@ struct task *process_table_expire(struct task *task, void *context, unsigned int do { updt_locked = 0; looped = 0; - HA_RWLOCK_WRLOCK(STK_TABLE_LOCK, &t->shards[shard].sh_lock); + + if (HA_RWLOCK_TRYWRLOCK(STK_TABLE_LOCK, &t->shards[shard].sh_lock) != 0) { + if (purged || failed_once) { + /* already purged or second failed lock, yield and come back later */ + to_visit = 0; + break; + } + /* make sure we succeed at least once */ + failed_once = 1; + HA_RWLOCK_WRLOCK(STK_TABLE_LOCK, &t->shards[shard].sh_lock); + } + eb = eb32_lookup_ge(&t->shards[shard].exps, now_ms - TIMER_LOOK_BACK); while (to_visit >= 0) { @@ -1016,6 +1029,7 @@ struct task *process_table_expire(struct task *task, void *context, unsigned int MT_LIST_DELETE(&ts->pend_updts); eb32_delete(&ts->upd); __stksess_free(t, ts); + purged++; } /* We have found no task to expire in any tree */