From 4a6dec71934d4b3bccff52be0ea01e312a5bda72 Mon Sep 17 00:00:00 2001 From: Willy Tarreau Date: Fri, 14 Nov 2025 16:48:16 +0100 Subject: [PATCH] DEBUG: servers: add a few checks for stress-testing idle conns The latest idle conns fix 9481cef948 ("BUG/MEDIUM: connection: do not reinsert a purgeable conn in idle list") addresses a very hard-to-hit case which manifests itself with an attempt to reuse a connection fails because conn->mux is NULL: Program terminated with signal SIGSEGV, Segmentation fault. #0 0x0000655410b8642c in conn_backend_get (reuse_mode=4, srv=srv@entry=0x6554378a7140, sess=sess@entry=0x7cfe140948a0, is_safe=is_safe@entry=0, hash=hash@entry=910818338996668161) at src/backend.c:1390 1390 if (conn->mux->takeover && conn->mux->takeover(conn, i, 0) == 0) { However the condition that leads to this situation can be detected earlier, by the presence of the connection in the toremove_list, whose race window is much larger and easier to detect. This patch adds a few BUG_ON_STRESS() at selected places that an detect this condition. When built with -DDEBUG_STRESS and run under stress with two distinct processes communicating over H2 over SSL, under a stress of 400-500k req/s, the front process usually crashes in the first 10-30s triggering in _srv_add_idle() if the fix above is reverted (and it does not crash with the fix). This is mainly included to serve as an illustration of how to instrument the code for seamless stress testing. --- src/mux_h2.c | 1 + src/server.c | 6 +++++- 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/src/mux_h2.c b/src/mux_h2.c index 006670810..2a57f41a8 100644 --- a/src/mux_h2.c +++ b/src/mux_h2.c @@ -1489,6 +1489,7 @@ static void h2_release(struct h2c *h2c) TRACE_ENTER(H2_EV_H2C_END); + BUG_ON_STRESS(LIST_INLIST(&conn->idle_list) && conn->flags & CO_FL_LIST_MASK); hpack_dht_free(h2c->ddht); b_dequeue(&h2c->buf_wait); diff --git a/src/server.c b/src/server.c index b61489042..1b3061487 100644 --- a/src/server.c +++ b/src/server.c @@ -7329,6 +7329,7 @@ static inline void _srv_add_idle(struct server *srv, struct connection *conn, in /* first insert in idle or safe tree. */ ceb64_item_insert(tree, hash_node.node, hash_node.key, conn); + BUG_ON_STRESS(!mt_list_isempty(&conn->toremove_list)); /* insert in list sorted by connection usage. */ LIST_APPEND(&srv->per_thr[tid].idle_conn_list, &conn->idle_list); } @@ -7394,6 +7395,8 @@ int srv_add_to_idle_list(struct server *srv, struct connection *conn, int is_saf _srv_add_idle(srv, conn, 0); _HA_ATOMIC_INC(&srv->curr_idle_nb); } + + BUG_ON_STRESS(!mt_list_isempty(&conn->toremove_list)); HA_SPIN_UNLOCK(IDLE_CONNS_LOCK, &idle_conns[tid].idle_conns_lock); _HA_ATOMIC_INC(&srv->curr_idle_thr[tid]); @@ -7409,7 +7412,7 @@ int srv_add_to_idle_list(struct server *srv, struct connection *conn, int is_saf task_schedule(idle_conn_task, srv->idle_node.key); } - + BUG_ON_STRESS(!mt_list_isempty(&conn->toremove_list)); } HA_SPIN_UNLOCK(OTHER_LOCK, &idle_conn_srv_lock); } @@ -7425,6 +7428,7 @@ void srv_add_to_avail_list(struct server *srv, struct connection *conn) { /* connection cannot be in idle list if used as an avail idle conn. */ BUG_ON(LIST_INLIST(&conn->idle_list)); + BUG_ON_STRESS(!mt_list_isempty(&conn->toremove_list)); ceb64_item_insert(&srv->per_thr[tid].avail_conns, hash_node.node, hash_node.key, conn); }