From 9481cef94882b7bdf3dfab24951649465bc3f5d0 Mon Sep 17 00:00:00 2001 From: Amaury Denoyelle Date: Wed, 12 Nov 2025 17:44:36 +0100 Subject: [PATCH] BUG/MEDIUM: connection: do not reinsert a purgeable conn in idle list A recent patch was introduced to fix a rare race condition in idle connection code which would result in a crash. The issue is when MUX IO handler run on top of connection moved in the purgeable list. The connection would be considered as present in the idle list instead, and reinserted in it at the end of the handler while still in the purge list. 096999ee208b8ae306983bc3fd677517d05948d2 BUG/MEDIUM: connections: permit to permanently remove an idle conn This patch solves the described issue. However, it introduces another bug as it may clear connection flag when removing a connection from its parent list. However, these flags now serve primarily as a status which indicate that the connection is accounted by the server. When a backend connection is freed, server idle/used counters are decremented accordingly to these flags. With the above patch, an incorrect counter could be adjusted and thus wrapping would occured. The first impact of this bug is that it may distort the estimated number of connections needed by servers, which would result either in poor reuse rate or too many idle connections kept. Another noticeable impact is that it may prevent server deletion. The main problem of the original and current issues is that connection flags are misinterpreted as telling if a connection is present in the idle list. As already described here, in fact these flags are solely a status which indicate that the connection is accounted in server counters. Thus, here are the definitive conclusion that can be learned here : * (conn->flags & CO_FL_LIST_MASK) == 1: the connection is accounted by the server it may or may not be present in the idle list * (conn->flags & CO_FL_LIST_MASK) == 0 the connection is not accounted and not present in idle list The discussion above does not mention session list, but a similar pattern can be observed when CO_FL_SESS_IDLE flag is set. To keep the original issue solved and fix the current one, IO MUX handlers prologue are rewritten. Now, flags are not checked anymore for list appartenance and LIST_INLIST macro is used instead. This is definitely clearer with conn_in_list purpose here. On IO MUX handlers end, conn idle flags may be checked if conn_in_list was true, to reinsert the connection either in idle or safe list. This is considered safe as no function should modify idle flags when a connection is not stored in a list, except during conn_free() operation. This patch must be backported to every stable versions after revert of the above commit. It should be appliable up to 3.0 without any issue. On 2.8 and below, connection member does not exist. It should be safe to check tree node as a replacement. --- src/mux_fcgi.c | 29 ++++++++++++++++++----------- src/mux_h1.c | 26 +++++++++++++++----------- src/mux_h2.c | 29 +++++++++++++++++------------ src/mux_quic.c | 21 ++++++++++++++------- src/mux_spop.c | 29 ++++++++++++++++++----------- 5 files changed, 82 insertions(+), 52 deletions(-) diff --git a/src/mux_fcgi.c b/src/mux_fcgi.c index ba736f371..45b546f01 100644 --- a/src/mux_fcgi.c +++ b/src/mux_fcgi.c @@ -3061,15 +3061,22 @@ struct task *fcgi_io_cb(struct task *t, void *ctx, unsigned int state) conn = fconn->conn; TRACE_POINT(FCGI_EV_FCONN_WAKE, conn); - conn_in_list = conn->flags & (CO_FL_LIST_MASK|CO_FL_SESS_IDLE); - if (conn_in_list) { - if (conn->flags & CO_FL_SESS_IDLE) { - if (!session_detach_idle_conn(conn->owner, conn)) - conn_in_list = 0; - } - else { - conn_delete_from_tree(conn, tid); - } + /* Remove the connection from the list, to be sure nobody attempts + * to use it while we handle the I/O events + */ + if (LIST_INLIST(&conn->idle_list)) { + conn_in_list = 1; + BUG_ON(!(conn->flags & CO_FL_LIST_MASK)); + conn_delete_from_tree(conn, tid); + } + else if (LIST_INLIST(&conn->sess_el)) { + conn_in_list = 1; + BUG_ON(!(conn->flags & CO_FL_SESS_IDLE)); + /* session_detach_idle_conn cannot fail thanks to LIST_INLIST() above test */ + session_detach_idle_conn(conn->owner, conn); + } + else { + conn_in_list = 0; } HA_SPIN_UNLOCK(IDLE_CONNS_LOCK, &idle_conns[tid].idle_conns_lock); @@ -3106,8 +3113,8 @@ struct task *fcgi_io_cb(struct task *t, void *ctx, unsigned int state) } } else { - ASSUME_NONNULL(srv); /* srv is guaranteed by CO_FL_LIST_MASK */ - srv_add_idle(srv, conn, conn_in_list == CO_FL_SAFE_LIST); + ASSUME_NONNULL(srv); /* srv is guaranteed to be non-null if conn is in idle list */ + srv_add_idle(srv, conn, (conn->flags & CO_FL_LIST_MASK) == CO_FL_SAFE_LIST); } } else { diff --git a/src/mux_h1.c b/src/mux_h1.c index 60862fc57..30dcb6086 100644 --- a/src/mux_h1.c +++ b/src/mux_h1.c @@ -4342,15 +4342,19 @@ struct task *h1_io_cb(struct task *t, void *ctx, unsigned int state) /* Remove the connection from the list, to be sure nobody attempts * to use it while we handle the I/O events */ - conn_in_list = conn->flags & (CO_FL_LIST_MASK|CO_FL_SESS_IDLE); - if (conn_in_list) { - if (conn->flags & CO_FL_SESS_IDLE) { - if (!session_detach_idle_conn(conn->owner, conn)) - conn_in_list = 0; - } - else { - conn_delete_from_tree(conn, tid); - } + if (LIST_INLIST(&conn->idle_list)) { + conn_in_list = 1; + BUG_ON(!(conn->flags & CO_FL_LIST_MASK)); + conn_delete_from_tree(conn, tid); + } + else if (LIST_INLIST(&conn->sess_el)) { + conn_in_list = 1; + BUG_ON(!(conn->flags & CO_FL_SESS_IDLE)); + /* session_detach_idle_conn cannot fail thanks to LIST_INLIST() above test */ + session_detach_idle_conn(conn->owner, conn); + } + else { + conn_in_list = 0; } HA_SPIN_UNLOCK(IDLE_CONNS_LOCK, &idle_conns[tid].idle_conns_lock); @@ -4387,8 +4391,8 @@ struct task *h1_io_cb(struct task *t, void *ctx, unsigned int state) } } else { - ASSUME_NONNULL(srv); /* srv is guaranteed by CO_FL_LIST_MASK */ - srv_add_idle(srv, conn, conn_in_list == CO_FL_SAFE_LIST); + ASSUME_NONNULL(srv); /* srv is guaranteed to be non-null if conn is in idle list */ + srv_add_idle(srv, conn, (conn->flags & CO_FL_LIST_MASK) == CO_FL_SAFE_LIST); } } else { diff --git a/src/mux_h2.c b/src/mux_h2.c index 963aa1a8a..006670810 100644 --- a/src/mux_h2.c +++ b/src/mux_h2.c @@ -4978,19 +4978,24 @@ struct task *h2_io_cb(struct task *t, void *ctx, unsigned int state) /* Remove the connection from the list, to be sure nobody attempts * to use it while we handle the I/O events */ - conn_in_list = conn->flags & (CO_FL_LIST_MASK|CO_FL_SESS_IDLE); - if (conn_in_list) { - if (conn->flags & CO_FL_SESS_IDLE) { - if (!session_detach_idle_conn(conn->owner, conn)) - conn_in_list = 0; - } - else { - conn_delete_from_tree(conn, tid); - } + if (LIST_INLIST(&conn->idle_list)) { + conn_in_list = 1; + BUG_ON(!(conn->flags & CO_FL_LIST_MASK)); + conn_delete_from_tree(conn, tid); + } + else if (LIST_INLIST(&conn->sess_el)) { + conn_in_list = 1; + BUG_ON(!(conn->flags & CO_FL_SESS_IDLE)); + /* session_detach_idle_conn cannot fail thanks to LIST_INLIST() above test */ + session_detach_idle_conn(conn->owner, conn); + } + else { + conn_in_list = 0; } HA_SPIN_UNLOCK(IDLE_CONNS_LOCK, &idle_conns[tid].idle_conns_lock); - } else { + } + else { /* we're certain the connection was not in an idle list */ conn = h2c->conn; TRACE_ENTER(H2_EV_H2C_WAKE, conn); @@ -5026,8 +5031,8 @@ struct task *h2_io_cb(struct task *t, void *ctx, unsigned int state) } } else { - ASSUME_NONNULL(srv); /* srv is guaranteed by CO_FL_LIST_MASK */ - srv_add_idle(srv, conn, conn_in_list == CO_FL_SAFE_LIST); + ASSUME_NONNULL(srv); /* srv is guaranteed to be non-null if conn is in idle list */ + srv_add_idle(srv, conn, (conn->flags & CO_FL_LIST_MASK) == CO_FL_SAFE_LIST); } } else { diff --git a/src/mux_quic.c b/src/mux_quic.c index da471cc57..43c681394 100644 --- a/src/mux_quic.c +++ b/src/mux_quic.c @@ -3399,12 +3399,19 @@ struct task *qcc_io_cb(struct task *t, void *ctx, unsigned int state) /* Remove the connection from the list, to be sure nobody attempts * to use it while we handle the I/O events */ - conn_in_list = conn->flags & (CO_FL_LIST_MASK|CO_FL_SESS_IDLE); - if (conn_in_list) { - if (conn->flags & CO_FL_SESS_IDLE) - session_detach_idle_conn(conn->owner, conn); - else - conn_delete_from_tree(conn, tid); + if (LIST_INLIST(&conn->idle_list)) { + conn_in_list = 1; + BUG_ON(!(conn->flags & CO_FL_LIST_MASK)); + conn_delete_from_tree(conn, tid); + } + else if (LIST_INLIST(&conn->sess_el)) { + conn_in_list = 1; + BUG_ON(!(conn->flags & CO_FL_SESS_IDLE)); + /* session_detach_idle_conn cannot fail thanks to LIST_INLIST() above test */ + session_detach_idle_conn(conn->owner, conn); + } + else { + conn_in_list = 0; } HA_SPIN_UNLOCK(IDLE_CONNS_LOCK, &idle_conns[tid].idle_conns_lock); @@ -3448,7 +3455,7 @@ struct task *qcc_io_cb(struct task *t, void *ctx, unsigned int state) } } else { - srv_add_idle(srv, conn, conn_in_list == CO_FL_SAFE_LIST); + srv_add_idle(srv, conn, (conn->flags & CO_FL_LIST_MASK) == CO_FL_SAFE_LIST); } /* Do not access conn without protection as soon as it is reinserted in idle list. */ diff --git a/src/mux_spop.c b/src/mux_spop.c index 1307ff6e0..57686838e 100644 --- a/src/mux_spop.c +++ b/src/mux_spop.c @@ -2557,15 +2557,22 @@ static struct task *spop_io_cb(struct task *t, void *ctx, unsigned int state) conn = spop_conn->conn; TRACE_POINT(SPOP_EV_SPOP_CONN_WAKE, conn); - conn_in_list = conn->flags & (CO_FL_LIST_MASK|CO_FL_SESS_IDLE); - if (conn_in_list) { - if (conn->flags & CO_FL_SESS_IDLE) { - if (!session_detach_idle_conn(conn->owner, conn)) - conn_in_list = 0; - } - else { - conn_delete_from_tree(conn, tid); - } + /* Remove the connection from the list, to be sure nobody attempts + * to use it while we handle the I/O events + */ + if (LIST_INLIST(&conn->idle_list)) { + conn_in_list = 1; + BUG_ON(!(conn->flags & CO_FL_LIST_MASK)); + conn_delete_from_tree(conn, tid); + } + else if (LIST_INLIST(&conn->sess_el)) { + conn_in_list = 1; + BUG_ON(!(conn->flags & CO_FL_SESS_IDLE)); + /* session_detach_idle_conn cannot fail thanks to LIST_INLIST() above test */ + session_detach_idle_conn(conn->owner, conn); + } + else { + conn_in_list = 0; } HA_SPIN_UNLOCK(IDLE_CONNS_LOCK, &idle_conns[tid].idle_conns_lock); @@ -2602,8 +2609,8 @@ static struct task *spop_io_cb(struct task *t, void *ctx, unsigned int state) } } else { - ASSUME_NONNULL(srv); /* srv is guaranteed by CO_FL_LIST_MASK */ - srv_add_idle(srv, conn, conn_in_list == CO_FL_SAFE_LIST); + ASSUME_NONNULL(srv); /* srv is guaranteed to be non-null if conn is in idle list */ + srv_add_idle(srv, conn, (conn->flags & CO_FL_LIST_MASK) == CO_FL_SAFE_LIST); } } else {