BUG/MEDIUM: connection: do not reinsert a purgeable conn in idle list

A recent patch was introduced to fix a rare race condition in idle
connection code which would result in a crash. The issue is when MUX IO
handler run on top of connection moved in the purgeable list. The
connection would be considered as present in the idle list instead, and
reinserted in it at the end of the handler while still in the purge
list.

  096999ee208b8ae306983bc3fd677517d05948d2
  BUG/MEDIUM: connections: permit to permanently remove an idle conn

This patch solves the described issue. However, it introduces another
bug as it may clear connection flag when removing a connection from its
parent list. However, these flags now serve primarily as a status which
indicate that the connection is accounted by the server. When a backend
connection is freed, server idle/used counters are decremented
accordingly to these flags. With the above patch, an incorrect counter
could be adjusted and thus wrapping would occured.

The first impact of this bug is that it may distort the estimated number
of connections needed by servers, which would result either in poor
reuse rate or too many idle connections kept. Another noticeable impact
is that it may prevent server deletion.

The main problem of the original and current issues is that connection
flags are misinterpreted as telling if a connection is present in the
idle list. As already described here, in fact these flags are solely a
status which indicate that the connection is accounted in server
counters. Thus, here are the definitive conclusion that can be learned
here :

* (conn->flags & CO_FL_LIST_MASK) == 1:
  the connection is accounted by the server
  it may or may not be present in the idle list

* (conn->flags & CO_FL_LIST_MASK) == 0
  the connection is not accounted and not present in idle list

The discussion above does not mention session list, but a similar
pattern can be observed when CO_FL_SESS_IDLE flag is set.

To keep the original issue solved and fix the current one, IO MUX
handlers prologue are rewritten. Now, flags are not checked anymore for
list appartenance and LIST_INLIST macro is used instead. This is
definitely clearer with conn_in_list purpose here.

On IO MUX handlers end, conn idle flags may be checked if conn_in_list
was true, to reinsert the connection either in idle or safe list. This
is considered safe as no function should modify idle flags when a
connection is not stored in a list, except during conn_free() operation.

This patch must be backported to every stable versions after revert of
the above commit. It should be appliable up to 3.0 without any issue. On
2.8 and below, <idle_list> connection member does not exist. It should
be safe to check <leaf_p> tree node as a replacement.
This commit is contained in:
Amaury Denoyelle 2025-11-12 17:44:36 +01:00
parent d79295d89b
commit 9481cef948
5 changed files with 82 additions and 52 deletions

View File

@ -3061,15 +3061,22 @@ struct task *fcgi_io_cb(struct task *t, void *ctx, unsigned int state)
conn = fconn->conn;
TRACE_POINT(FCGI_EV_FCONN_WAKE, conn);
conn_in_list = conn->flags & (CO_FL_LIST_MASK|CO_FL_SESS_IDLE);
if (conn_in_list) {
if (conn->flags & CO_FL_SESS_IDLE) {
if (!session_detach_idle_conn(conn->owner, conn))
conn_in_list = 0;
}
else {
conn_delete_from_tree(conn, tid);
}
/* Remove the connection from the list, to be sure nobody attempts
* to use it while we handle the I/O events
*/
if (LIST_INLIST(&conn->idle_list)) {
conn_in_list = 1;
BUG_ON(!(conn->flags & CO_FL_LIST_MASK));
conn_delete_from_tree(conn, tid);
}
else if (LIST_INLIST(&conn->sess_el)) {
conn_in_list = 1;
BUG_ON(!(conn->flags & CO_FL_SESS_IDLE));
/* session_detach_idle_conn cannot fail thanks to LIST_INLIST() above test */
session_detach_idle_conn(conn->owner, conn);
}
else {
conn_in_list = 0;
}
HA_SPIN_UNLOCK(IDLE_CONNS_LOCK, &idle_conns[tid].idle_conns_lock);
@ -3106,8 +3113,8 @@ struct task *fcgi_io_cb(struct task *t, void *ctx, unsigned int state)
}
}
else {
ASSUME_NONNULL(srv); /* srv is guaranteed by CO_FL_LIST_MASK */
srv_add_idle(srv, conn, conn_in_list == CO_FL_SAFE_LIST);
ASSUME_NONNULL(srv); /* srv is guaranteed to be non-null if conn is in idle list */
srv_add_idle(srv, conn, (conn->flags & CO_FL_LIST_MASK) == CO_FL_SAFE_LIST);
}
}
else {

View File

@ -4342,15 +4342,19 @@ struct task *h1_io_cb(struct task *t, void *ctx, unsigned int state)
/* Remove the connection from the list, to be sure nobody attempts
* to use it while we handle the I/O events
*/
conn_in_list = conn->flags & (CO_FL_LIST_MASK|CO_FL_SESS_IDLE);
if (conn_in_list) {
if (conn->flags & CO_FL_SESS_IDLE) {
if (!session_detach_idle_conn(conn->owner, conn))
conn_in_list = 0;
}
else {
conn_delete_from_tree(conn, tid);
}
if (LIST_INLIST(&conn->idle_list)) {
conn_in_list = 1;
BUG_ON(!(conn->flags & CO_FL_LIST_MASK));
conn_delete_from_tree(conn, tid);
}
else if (LIST_INLIST(&conn->sess_el)) {
conn_in_list = 1;
BUG_ON(!(conn->flags & CO_FL_SESS_IDLE));
/* session_detach_idle_conn cannot fail thanks to LIST_INLIST() above test */
session_detach_idle_conn(conn->owner, conn);
}
else {
conn_in_list = 0;
}
HA_SPIN_UNLOCK(IDLE_CONNS_LOCK, &idle_conns[tid].idle_conns_lock);
@ -4387,8 +4391,8 @@ struct task *h1_io_cb(struct task *t, void *ctx, unsigned int state)
}
}
else {
ASSUME_NONNULL(srv); /* srv is guaranteed by CO_FL_LIST_MASK */
srv_add_idle(srv, conn, conn_in_list == CO_FL_SAFE_LIST);
ASSUME_NONNULL(srv); /* srv is guaranteed to be non-null if conn is in idle list */
srv_add_idle(srv, conn, (conn->flags & CO_FL_LIST_MASK) == CO_FL_SAFE_LIST);
}
}
else {

View File

@ -4978,19 +4978,24 @@ struct task *h2_io_cb(struct task *t, void *ctx, unsigned int state)
/* Remove the connection from the list, to be sure nobody attempts
* to use it while we handle the I/O events
*/
conn_in_list = conn->flags & (CO_FL_LIST_MASK|CO_FL_SESS_IDLE);
if (conn_in_list) {
if (conn->flags & CO_FL_SESS_IDLE) {
if (!session_detach_idle_conn(conn->owner, conn))
conn_in_list = 0;
}
else {
conn_delete_from_tree(conn, tid);
}
if (LIST_INLIST(&conn->idle_list)) {
conn_in_list = 1;
BUG_ON(!(conn->flags & CO_FL_LIST_MASK));
conn_delete_from_tree(conn, tid);
}
else if (LIST_INLIST(&conn->sess_el)) {
conn_in_list = 1;
BUG_ON(!(conn->flags & CO_FL_SESS_IDLE));
/* session_detach_idle_conn cannot fail thanks to LIST_INLIST() above test */
session_detach_idle_conn(conn->owner, conn);
}
else {
conn_in_list = 0;
}
HA_SPIN_UNLOCK(IDLE_CONNS_LOCK, &idle_conns[tid].idle_conns_lock);
} else {
}
else {
/* we're certain the connection was not in an idle list */
conn = h2c->conn;
TRACE_ENTER(H2_EV_H2C_WAKE, conn);
@ -5026,8 +5031,8 @@ struct task *h2_io_cb(struct task *t, void *ctx, unsigned int state)
}
}
else {
ASSUME_NONNULL(srv); /* srv is guaranteed by CO_FL_LIST_MASK */
srv_add_idle(srv, conn, conn_in_list == CO_FL_SAFE_LIST);
ASSUME_NONNULL(srv); /* srv is guaranteed to be non-null if conn is in idle list */
srv_add_idle(srv, conn, (conn->flags & CO_FL_LIST_MASK) == CO_FL_SAFE_LIST);
}
}
else {

View File

@ -3399,12 +3399,19 @@ struct task *qcc_io_cb(struct task *t, void *ctx, unsigned int state)
/* Remove the connection from the list, to be sure nobody attempts
* to use it while we handle the I/O events
*/
conn_in_list = conn->flags & (CO_FL_LIST_MASK|CO_FL_SESS_IDLE);
if (conn_in_list) {
if (conn->flags & CO_FL_SESS_IDLE)
session_detach_idle_conn(conn->owner, conn);
else
conn_delete_from_tree(conn, tid);
if (LIST_INLIST(&conn->idle_list)) {
conn_in_list = 1;
BUG_ON(!(conn->flags & CO_FL_LIST_MASK));
conn_delete_from_tree(conn, tid);
}
else if (LIST_INLIST(&conn->sess_el)) {
conn_in_list = 1;
BUG_ON(!(conn->flags & CO_FL_SESS_IDLE));
/* session_detach_idle_conn cannot fail thanks to LIST_INLIST() above test */
session_detach_idle_conn(conn->owner, conn);
}
else {
conn_in_list = 0;
}
HA_SPIN_UNLOCK(IDLE_CONNS_LOCK, &idle_conns[tid].idle_conns_lock);
@ -3448,7 +3455,7 @@ struct task *qcc_io_cb(struct task *t, void *ctx, unsigned int state)
}
}
else {
srv_add_idle(srv, conn, conn_in_list == CO_FL_SAFE_LIST);
srv_add_idle(srv, conn, (conn->flags & CO_FL_LIST_MASK) == CO_FL_SAFE_LIST);
}
/* Do not access conn without protection as soon as it is reinserted in idle list. */

View File

@ -2557,15 +2557,22 @@ static struct task *spop_io_cb(struct task *t, void *ctx, unsigned int state)
conn = spop_conn->conn;
TRACE_POINT(SPOP_EV_SPOP_CONN_WAKE, conn);
conn_in_list = conn->flags & (CO_FL_LIST_MASK|CO_FL_SESS_IDLE);
if (conn_in_list) {
if (conn->flags & CO_FL_SESS_IDLE) {
if (!session_detach_idle_conn(conn->owner, conn))
conn_in_list = 0;
}
else {
conn_delete_from_tree(conn, tid);
}
/* Remove the connection from the list, to be sure nobody attempts
* to use it while we handle the I/O events
*/
if (LIST_INLIST(&conn->idle_list)) {
conn_in_list = 1;
BUG_ON(!(conn->flags & CO_FL_LIST_MASK));
conn_delete_from_tree(conn, tid);
}
else if (LIST_INLIST(&conn->sess_el)) {
conn_in_list = 1;
BUG_ON(!(conn->flags & CO_FL_SESS_IDLE));
/* session_detach_idle_conn cannot fail thanks to LIST_INLIST() above test */
session_detach_idle_conn(conn->owner, conn);
}
else {
conn_in_list = 0;
}
HA_SPIN_UNLOCK(IDLE_CONNS_LOCK, &idle_conns[tid].idle_conns_lock);
@ -2602,8 +2609,8 @@ static struct task *spop_io_cb(struct task *t, void *ctx, unsigned int state)
}
}
else {
ASSUME_NONNULL(srv); /* srv is guaranteed by CO_FL_LIST_MASK */
srv_add_idle(srv, conn, conn_in_list == CO_FL_SAFE_LIST);
ASSUME_NONNULL(srv); /* srv is guaranteed to be non-null if conn is in idle list */
srv_add_idle(srv, conn, (conn->flags & CO_FL_LIST_MASK) == CO_FL_SAFE_LIST);
}
}
else {