BUG/MEDIUM: connections: permit to permanently remove an idle conn

There's currently a function conn_delete_from_tree() which is used to
detach an idle connection from the tree it's currently attached to so
that it is no longer found. This function is used in three circumstances:
  - when picking a new connection that no longer has any avail stream
  - when temporarily working on the connection from an I/O handler,
    in which case it's re-added at the end
  - when killing a connection

The 2nd case above is quite specific, as it requires to preserve the
CO_FL_LIST_MASK flags so that the connection can be re-inserted into
the proper tree when leaving the handler. However, there's a catch.
When killing a connection, we want to be certain it will not be
reinserted into the tree. The flags preservation is causing a tiny
race if an I/O happens while the connection is in the kill list,
because in this case the I/O handler will note the connection flags,
do its work, then reinsert the connection where it believed it was,
then the connection gets purged, and another user can find it in the
tree.

The issue is very difficult to reproduce. On a 128-thread machine it
happens in H2 around 500k req/s after around 50M requests. In H1 it
happens after around 1 billion requests.

The fix here consists in passing an extra argument to the function to
indicate if the removal is permanent or not. When it's permanent, the
function will clear the associated flags. The callers were adjusted
so that all those dequeuing a connection in order to kill it do it
permanently and all other ones do it only temporarily.

A slightly different approach could have worked: the function could
always remove all flags, and the callers would need to restore them.
But this would require trickier modifications of the various call
places, compared to only passing 0/1 to indicate the permanent status.

This will need to be backported to all stable versions. The issue was
at least reproduced since 3.1 (not tested before). The patch will need
to be adjusted for 3.2 and older, because a 2nd argument "thr" was
added in 3.3, so the patch will not apply to older versions as-is.
This commit is contained in:
Willy Tarreau 2025-11-05 10:51:27 +01:00
parent 59c599f3f0
commit 096999ee20
10 changed files with 35 additions and 32 deletions

View File

@ -83,7 +83,7 @@ int conn_install_mux_be(struct connection *conn, void *ctx, struct session *sess
const struct mux_ops *force_mux_ops); const struct mux_ops *force_mux_ops);
int conn_install_mux_chk(struct connection *conn, void *ctx, struct session *sess); int conn_install_mux_chk(struct connection *conn, void *ctx, struct session *sess);
void conn_delete_from_tree(struct connection *conn, int thr); void conn_delete_from_tree(struct connection *conn, int thr, int permanent);
void conn_init(struct connection *conn, void *target); void conn_init(struct connection *conn, void *target);
struct connection *conn_new(void *target); struct connection *conn_new(void *target);

View File

@ -1319,7 +1319,7 @@ struct connection *conn_backend_get(int reuse_mode,
HA_SPIN_LOCK(IDLE_CONNS_LOCK, &idle_conns[tid].idle_conns_lock); HA_SPIN_LOCK(IDLE_CONNS_LOCK, &idle_conns[tid].idle_conns_lock);
conn = srv_lookup_conn(is_safe ? &srv->per_thr[tid].safe_conns : &srv->per_thr[tid].idle_conns, hash); conn = srv_lookup_conn(is_safe ? &srv->per_thr[tid].safe_conns : &srv->per_thr[tid].idle_conns, hash);
if (conn) if (conn)
conn_delete_from_tree(conn, tid); conn_delete_from_tree(conn, tid, 0);
/* If we failed to pick a connection from the idle list, let's try again with /* If we failed to pick a connection from the idle list, let's try again with
* the safe list. * the safe list.
@ -1327,7 +1327,7 @@ struct connection *conn_backend_get(int reuse_mode,
if (!conn && !is_safe && srv->curr_safe_nb > 0) { if (!conn && !is_safe && srv->curr_safe_nb > 0) {
conn = srv_lookup_conn(&srv->per_thr[tid].safe_conns, hash); conn = srv_lookup_conn(&srv->per_thr[tid].safe_conns, hash);
if (conn) { if (conn) {
conn_delete_from_tree(conn, tid); conn_delete_from_tree(conn, tid, 0);
is_safe = 1; is_safe = 1;
} }
} }
@ -1388,7 +1388,7 @@ check_tgid:
conn = srv_lookup_conn(tree, hash); conn = srv_lookup_conn(tree, hash);
while (conn) { while (conn) {
if (conn->mux->takeover && conn->mux->takeover(conn, i, 0) == 0) { if (conn->mux->takeover && conn->mux->takeover(conn, i, 0) == 0) {
conn_delete_from_tree(conn, i); conn_delete_from_tree(conn, i, 0);
_HA_ATOMIC_INC(&activity[tid].fd_takeover); _HA_ATOMIC_INC(&activity[tid].fd_takeover);
found = 1; found = 1;
break; break;
@ -1490,7 +1490,7 @@ takeover_random_idle_conn(struct ceb_root **root, int curtid)
conn = ceb64_item_first(root, hash_node.node, hash_node.key, struct connection); conn = ceb64_item_first(root, hash_node.node, hash_node.key, struct connection);
while (conn) { while (conn) {
if (conn->mux->takeover && conn->mux->takeover(conn, curtid, 1) == 0) { if (conn->mux->takeover && conn->mux->takeover(conn, curtid, 1) == 0) {
conn_delete_from_tree(conn, curtid); conn_delete_from_tree(conn, curtid, 0);
return conn; return conn;
} }
conn = ceb64_item_next(root, hash_node.node, hash_node.key, conn); conn = ceb64_item_next(root, hash_node.node, hash_node.key, conn);
@ -1751,7 +1751,7 @@ int be_reuse_connection(int64_t hash, struct session *sess,
if (avail <= 1) { if (avail <= 1) {
/* no more streams available, remove it from the list */ /* no more streams available, remove it from the list */
HA_SPIN_LOCK(IDLE_CONNS_LOCK, &idle_conns[tid].idle_conns_lock); HA_SPIN_LOCK(IDLE_CONNS_LOCK, &idle_conns[tid].idle_conns_lock);
conn_delete_from_tree(srv_conn, tid); conn_delete_from_tree(srv_conn, tid, 0);
HA_SPIN_UNLOCK(IDLE_CONNS_LOCK, &idle_conns[tid].idle_conns_lock); HA_SPIN_UNLOCK(IDLE_CONNS_LOCK, &idle_conns[tid].idle_conns_lock);
} }
@ -1863,7 +1863,7 @@ int connect_server(struct stream *s)
HA_SPIN_LOCK(IDLE_CONNS_LOCK, &idle_conns[tid].idle_conns_lock); HA_SPIN_LOCK(IDLE_CONNS_LOCK, &idle_conns[tid].idle_conns_lock);
if (!LIST_ISEMPTY(&srv->per_thr[tid].idle_conn_list)) { if (!LIST_ISEMPTY(&srv->per_thr[tid].idle_conn_list)) {
tokill_conn = LIST_ELEM(srv->per_thr[tid].idle_conn_list.n, struct connection *, idle_list); tokill_conn = LIST_ELEM(srv->per_thr[tid].idle_conn_list.n, struct connection *, idle_list);
conn_delete_from_tree(tokill_conn, tid); conn_delete_from_tree(tokill_conn, tid, 1);
HA_SPIN_UNLOCK(IDLE_CONNS_LOCK, &idle_conns[tid].idle_conns_lock); HA_SPIN_UNLOCK(IDLE_CONNS_LOCK, &idle_conns[tid].idle_conns_lock);
/* Release the idle lock before calling mux->destroy. /* Release the idle lock before calling mux->destroy.
@ -1892,7 +1892,7 @@ int connect_server(struct stream *s)
if (!LIST_ISEMPTY(&srv->per_thr[i].idle_conn_list)) { if (!LIST_ISEMPTY(&srv->per_thr[i].idle_conn_list)) {
tokill_conn = LIST_ELEM(srv->per_thr[i].idle_conn_list.n, struct connection *, idle_list); tokill_conn = LIST_ELEM(srv->per_thr[i].idle_conn_list.n, struct connection *, idle_list);
conn_delete_from_tree(tokill_conn, i); conn_delete_from_tree(tokill_conn, i, 1);
} }
HA_SPIN_UNLOCK(IDLE_CONNS_LOCK, &idle_conns[i].idle_conns_lock); HA_SPIN_UNLOCK(IDLE_CONNS_LOCK, &idle_conns[i].idle_conns_lock);

View File

@ -74,11 +74,13 @@ struct conn_tlv_list *conn_get_tlv(struct connection *conn, int type)
/* Remove <conn> idle connection from its attached tree (idle, safe or avail) /* Remove <conn> idle connection from its attached tree (idle, safe or avail)
* for the server in the connection's target and thread <thr>. If also present * for the server in the connection's target and thread <thr>. If also present
* in the secondary server idle list, conn is removed from it. * in the secondary server idle list, conn is removed from it. Finally, if
* <permanent> is non-nul, the idle connection flags are cleared as well so
* that the connection is not re-inserted later.
* *
* Must be called with idle_conns_lock held. * Must be called with idle_conns_lock held.
*/ */
void conn_delete_from_tree(struct connection *conn, int thr) void conn_delete_from_tree(struct connection *conn, int thr, int permanent)
{ {
struct ceb_root **conn_tree; struct ceb_root **conn_tree;
struct server *srv = __objt_server(conn->target); struct server *srv = __objt_server(conn->target);
@ -100,6 +102,8 @@ void conn_delete_from_tree(struct connection *conn, int thr)
} }
ceb64_item_delete(conn_tree, hash_node.node, hash_node.key, conn); ceb64_item_delete(conn_tree, hash_node.node, hash_node.key, conn);
if (permanent)
conn->flags &= ~CO_FL_LIST_MASK;
} }
int conn_create_mux(struct connection *conn, int *closed_connection) int conn_create_mux(struct connection *conn, int *closed_connection)
@ -227,7 +231,7 @@ int conn_notify_mux(struct connection *conn, int old_flags, int forced_wake)
conn_in_list = 0; conn_in_list = 0;
} }
else { else {
conn_delete_from_tree(conn, tid); conn_delete_from_tree(conn, tid, 0);
} }
HA_SPIN_UNLOCK(IDLE_CONNS_LOCK, &idle_conns[tid].idle_conns_lock); HA_SPIN_UNLOCK(IDLE_CONNS_LOCK, &idle_conns[tid].idle_conns_lock);
} }

View File

@ -3068,7 +3068,7 @@ struct task *fcgi_io_cb(struct task *t, void *ctx, unsigned int state)
conn_in_list = 0; conn_in_list = 0;
} }
else { else {
conn_delete_from_tree(conn, tid); conn_delete_from_tree(conn, tid, 0);
} }
} }
@ -3329,7 +3329,7 @@ struct task *fcgi_timeout_task(struct task *t, void *context, unsigned int state
* to steal it from us. * to steal it from us.
*/ */
if (fconn->conn->flags & CO_FL_LIST_MASK) if (fconn->conn->flags & CO_FL_LIST_MASK)
conn_delete_from_tree(fconn->conn, tid); conn_delete_from_tree(fconn->conn, tid, 0);
else if (fconn->conn->flags & CO_FL_SESS_IDLE) else if (fconn->conn->flags & CO_FL_SESS_IDLE)
session_detach_idle_conn(fconn->conn->owner, fconn->conn); session_detach_idle_conn(fconn->conn->owner, fconn->conn);

View File

@ -4335,7 +4335,7 @@ struct task *h1_io_cb(struct task *t, void *ctx, unsigned int state)
conn_in_list = 0; conn_in_list = 0;
} }
else { else {
conn_delete_from_tree(conn, tid); conn_delete_from_tree(conn, tid, 0);
} }
} }
@ -4475,7 +4475,7 @@ struct task *h1_timeout_task(struct task *t, void *context, unsigned int state)
* to steal it from us. * to steal it from us.
*/ */
if (h1c->conn->flags & CO_FL_LIST_MASK) if (h1c->conn->flags & CO_FL_LIST_MASK)
conn_delete_from_tree(h1c->conn, tid); conn_delete_from_tree(h1c->conn, tid, 1);
HA_SPIN_UNLOCK(IDLE_CONNS_LOCK, &idle_conns[tid].idle_conns_lock); HA_SPIN_UNLOCK(IDLE_CONNS_LOCK, &idle_conns[tid].idle_conns_lock);

View File

@ -4985,7 +4985,7 @@ struct task *h2_io_cb(struct task *t, void *ctx, unsigned int state)
conn_in_list = 0; conn_in_list = 0;
} }
else { else {
conn_delete_from_tree(conn, tid); conn_delete_from_tree(conn, tid, 0);
} }
} }
@ -5162,7 +5162,7 @@ static int h2_process(struct h2c *h2c)
/* connections in error must be removed from the idle lists */ /* connections in error must be removed from the idle lists */
if (conn->flags & CO_FL_LIST_MASK) { if (conn->flags & CO_FL_LIST_MASK) {
HA_SPIN_LOCK(IDLE_CONNS_LOCK, &idle_conns[tid].idle_conns_lock); HA_SPIN_LOCK(IDLE_CONNS_LOCK, &idle_conns[tid].idle_conns_lock);
conn_delete_from_tree(conn, tid); conn_delete_from_tree(conn, tid, 1);
HA_SPIN_UNLOCK(IDLE_CONNS_LOCK, &idle_conns[tid].idle_conns_lock); HA_SPIN_UNLOCK(IDLE_CONNS_LOCK, &idle_conns[tid].idle_conns_lock);
} }
} }
@ -5170,7 +5170,7 @@ static int h2_process(struct h2c *h2c)
/* connections in error must be removed from the idle lists */ /* connections in error must be removed from the idle lists */
if (conn->flags & CO_FL_LIST_MASK) { if (conn->flags & CO_FL_LIST_MASK) {
HA_SPIN_LOCK(IDLE_CONNS_LOCK, &idle_conns[tid].idle_conns_lock); HA_SPIN_LOCK(IDLE_CONNS_LOCK, &idle_conns[tid].idle_conns_lock);
conn_delete_from_tree(conn, tid); conn_delete_from_tree(conn, tid, 1);
HA_SPIN_UNLOCK(IDLE_CONNS_LOCK, &idle_conns[tid].idle_conns_lock); HA_SPIN_UNLOCK(IDLE_CONNS_LOCK, &idle_conns[tid].idle_conns_lock);
} }
} }
@ -5278,7 +5278,7 @@ struct task *h2_timeout_task(struct task *t, void *context, unsigned int state)
* to steal it from us. * to steal it from us.
*/ */
if (h2c->conn->flags & CO_FL_LIST_MASK) if (h2c->conn->flags & CO_FL_LIST_MASK)
conn_delete_from_tree(h2c->conn, tid); conn_delete_from_tree(h2c->conn, tid, 1);
else if (h2c->conn->flags & CO_FL_SESS_IDLE) else if (h2c->conn->flags & CO_FL_SESS_IDLE)
session_detach_idle_conn(h2c->conn->owner, h2c->conn); session_detach_idle_conn(h2c->conn->owner, h2c->conn);
@ -5360,7 +5360,7 @@ do_leave:
/* in any case this connection must not be considered idle anymore */ /* in any case this connection must not be considered idle anymore */
if (h2c->conn->flags & CO_FL_LIST_MASK) { if (h2c->conn->flags & CO_FL_LIST_MASK) {
HA_SPIN_LOCK(IDLE_CONNS_LOCK, &idle_conns[tid].idle_conns_lock); HA_SPIN_LOCK(IDLE_CONNS_LOCK, &idle_conns[tid].idle_conns_lock);
conn_delete_from_tree(h2c->conn, tid); conn_delete_from_tree(h2c->conn, tid, 1);
HA_SPIN_UNLOCK(IDLE_CONNS_LOCK, &idle_conns[tid].idle_conns_lock); HA_SPIN_UNLOCK(IDLE_CONNS_LOCK, &idle_conns[tid].idle_conns_lock);
} }

View File

@ -3201,7 +3201,7 @@ static void qcc_shutdown(struct qcc *qcc)
/* A connection is not reusable if app layer is closed. */ /* A connection is not reusable if app layer is closed. */
if (qcc->flags & QC_CF_IS_BACK) if (qcc->flags & QC_CF_IS_BACK)
conn_delete_from_tree(qcc->conn, tid); conn_delete_from_tree(qcc->conn, tid, 1);
out: out:
qcc->app_st = QCC_APP_ST_SHUT; qcc->app_st = QCC_APP_ST_SHUT;
@ -3404,7 +3404,7 @@ struct task *qcc_io_cb(struct task *t, void *ctx, unsigned int state)
if (conn->flags & CO_FL_SESS_IDLE) if (conn->flags & CO_FL_SESS_IDLE)
session_detach_idle_conn(conn->owner, conn); session_detach_idle_conn(conn->owner, conn);
else else
conn_delete_from_tree(conn, tid); conn_delete_from_tree(conn, tid, 0);
} }
HA_SPIN_UNLOCK(IDLE_CONNS_LOCK, &idle_conns[tid].idle_conns_lock); HA_SPIN_UNLOCK(IDLE_CONNS_LOCK, &idle_conns[tid].idle_conns_lock);
@ -3528,7 +3528,7 @@ static struct task *qcc_timeout_task(struct task *t, void *ctx, unsigned int sta
* attempts to steal it from us. * attempts to steal it from us.
*/ */
if (qcc->conn->flags & CO_FL_LIST_MASK) if (qcc->conn->flags & CO_FL_LIST_MASK)
conn_delete_from_tree(qcc->conn, tid); conn_delete_from_tree(qcc->conn, tid, 1);
else if (qcc->conn->flags & CO_FL_SESS_IDLE) else if (qcc->conn->flags & CO_FL_SESS_IDLE)
session_unown_conn(qcc->conn->owner, qcc->conn); session_unown_conn(qcc->conn->owner, qcc->conn);

View File

@ -2564,7 +2564,7 @@ static struct task *spop_io_cb(struct task *t, void *ctx, unsigned int state)
conn_in_list = 0; conn_in_list = 0;
} }
else { else {
conn_delete_from_tree(conn, tid); conn_delete_from_tree(conn, tid, 0);
} }
} }
@ -2676,7 +2676,7 @@ static int spop_process(struct spop_conn *spop_conn)
/* connections in error must be removed from the idle lists */ /* connections in error must be removed from the idle lists */
if (conn->flags & CO_FL_LIST_MASK) { if (conn->flags & CO_FL_LIST_MASK) {
HA_SPIN_LOCK(IDLE_CONNS_LOCK, &idle_conns[tid].idle_conns_lock); HA_SPIN_LOCK(IDLE_CONNS_LOCK, &idle_conns[tid].idle_conns_lock);
conn_delete_from_tree(conn, tid); conn_delete_from_tree(conn, tid, 1);
HA_SPIN_UNLOCK(IDLE_CONNS_LOCK, &idle_conns[tid].idle_conns_lock); HA_SPIN_UNLOCK(IDLE_CONNS_LOCK, &idle_conns[tid].idle_conns_lock);
} }
} }
@ -2799,7 +2799,7 @@ static struct task *spop_timeout_task(struct task *t, void *context, unsigned in
* to steal it from us. * to steal it from us.
*/ */
if (spop_conn->conn->flags & CO_FL_LIST_MASK) if (spop_conn->conn->flags & CO_FL_LIST_MASK)
conn_delete_from_tree(spop_conn->conn, tid); conn_delete_from_tree(spop_conn->conn, tid, 1);
else if (spop_conn->conn->flags & CO_FL_SESS_IDLE) else if (spop_conn->conn->flags & CO_FL_SESS_IDLE)
session_detach_idle_conn(spop_conn->conn->owner, spop_conn->conn); session_detach_idle_conn(spop_conn->conn->owner, spop_conn->conn);

View File

@ -7212,7 +7212,7 @@ static int srv_migrate_conns_to_remove(struct server *srv, int thr, int toremove
break; break;
conn = LIST_ELEM(srv->per_thr[thr].idle_conn_list.n, struct connection *, idle_list); conn = LIST_ELEM(srv->per_thr[thr].idle_conn_list.n, struct connection *, idle_list);
conn_delete_from_tree(conn, thr); conn_delete_from_tree(conn, thr, 1);
MT_LIST_APPEND(&idle_conns[thr].toremove_conns, &conn->toremove_list); MT_LIST_APPEND(&idle_conns[thr].toremove_conns, &conn->toremove_list);
i++; i++;
} }
@ -7287,8 +7287,7 @@ void srv_release_conn(struct server *srv, struct connection *conn)
/* Remove the connection from any tree (safe, idle or available) */ /* Remove the connection from any tree (safe, idle or available) */
if (ceb_intree(&conn->hash_node.node)) { if (ceb_intree(&conn->hash_node.node)) {
HA_SPIN_LOCK(IDLE_CONNS_LOCK, &idle_conns[tid].idle_conns_lock); HA_SPIN_LOCK(IDLE_CONNS_LOCK, &idle_conns[tid].idle_conns_lock);
conn_delete_from_tree(conn, tid); conn_delete_from_tree(conn, tid, 1);
conn->flags &= ~CO_FL_LIST_MASK;
HA_SPIN_UNLOCK(IDLE_CONNS_LOCK, &idle_conns[tid].idle_conns_lock); HA_SPIN_UNLOCK(IDLE_CONNS_LOCK, &idle_conns[tid].idle_conns_lock);
} }
} }
@ -7365,7 +7364,7 @@ int srv_add_to_idle_list(struct server *srv, struct connection *conn, int is_saf
_HA_ATOMIC_DEC(&srv->curr_used_conns); _HA_ATOMIC_DEC(&srv->curr_used_conns);
HA_SPIN_LOCK(IDLE_CONNS_LOCK, &idle_conns[tid].idle_conns_lock); HA_SPIN_LOCK(IDLE_CONNS_LOCK, &idle_conns[tid].idle_conns_lock);
conn_delete_from_tree(conn, tid); conn_delete_from_tree(conn, tid, 0);
if (is_safe) { if (is_safe) {
conn->flags = (conn->flags & ~CO_FL_LIST_MASK) | CO_FL_SAFE_LIST; conn->flags = (conn->flags & ~CO_FL_LIST_MASK) | CO_FL_SAFE_LIST;
@ -7525,7 +7524,7 @@ static void srv_close_idle_conns(struct server *srv)
hash_node.key, struct connection))) { hash_node.key, struct connection))) {
if (conn->ctrl->ctrl_close) if (conn->ctrl->ctrl_close)
conn->ctrl->ctrl_close(conn); conn->ctrl->ctrl_close(conn);
conn_delete_from_tree(conn, i); conn_delete_from_tree(conn, i, 1);
} }
} }
} }

View File

@ -6752,7 +6752,7 @@ struct task *ssl_sock_io_cb(struct task *t, void *context, unsigned int state)
conn_in_list = 0; conn_in_list = 0;
} }
else { else {
conn_delete_from_tree(conn, tid); conn_delete_from_tree(conn, tid, 0);
} }
} }