mirror of
https://git.haproxy.org/git/haproxy.git/
synced 2025-11-07 03:50:59 +01:00
BUG/MEDIUM: connections: permit to permanently remove an idle conn
There's currently a function conn_delete_from_tree() which is used to
detach an idle connection from the tree it's currently attached to so
that it is no longer found. This function is used in three circumstances:
- when picking a new connection that no longer has any avail stream
- when temporarily working on the connection from an I/O handler,
in which case it's re-added at the end
- when killing a connection
The 2nd case above is quite specific, as it requires to preserve the
CO_FL_LIST_MASK flags so that the connection can be re-inserted into
the proper tree when leaving the handler. However, there's a catch.
When killing a connection, we want to be certain it will not be
reinserted into the tree. The flags preservation is causing a tiny
race if an I/O happens while the connection is in the kill list,
because in this case the I/O handler will note the connection flags,
do its work, then reinsert the connection where it believed it was,
then the connection gets purged, and another user can find it in the
tree.
The issue is very difficult to reproduce. On a 128-thread machine it
happens in H2 around 500k req/s after around 50M requests. In H1 it
happens after around 1 billion requests.
The fix here consists in passing an extra argument to the function to
indicate if the removal is permanent or not. When it's permanent, the
function will clear the associated flags. The callers were adjusted
so that all those dequeuing a connection in order to kill it do it
permanently and all other ones do it only temporarily.
A slightly different approach could have worked: the function could
always remove all flags, and the callers would need to restore them.
But this would require trickier modifications of the various call
places, compared to only passing 0/1 to indicate the permanent status.
This will need to be backported to all stable versions. The issue was
at least reproduced since 3.1 (not tested before). The patch will need
to be adjusted for 3.2 and older, because a 2nd argument "thr" was
added in 3.3, so the patch will not apply to older versions as-is.
This commit is contained in:
parent
59c599f3f0
commit
096999ee20
@ -83,7 +83,7 @@ int conn_install_mux_be(struct connection *conn, void *ctx, struct session *sess
|
||||
const struct mux_ops *force_mux_ops);
|
||||
int conn_install_mux_chk(struct connection *conn, void *ctx, struct session *sess);
|
||||
|
||||
void conn_delete_from_tree(struct connection *conn, int thr);
|
||||
void conn_delete_from_tree(struct connection *conn, int thr, int permanent);
|
||||
|
||||
void conn_init(struct connection *conn, void *target);
|
||||
struct connection *conn_new(void *target);
|
||||
|
||||
@ -1319,7 +1319,7 @@ struct connection *conn_backend_get(int reuse_mode,
|
||||
HA_SPIN_LOCK(IDLE_CONNS_LOCK, &idle_conns[tid].idle_conns_lock);
|
||||
conn = srv_lookup_conn(is_safe ? &srv->per_thr[tid].safe_conns : &srv->per_thr[tid].idle_conns, hash);
|
||||
if (conn)
|
||||
conn_delete_from_tree(conn, tid);
|
||||
conn_delete_from_tree(conn, tid, 0);
|
||||
|
||||
/* If we failed to pick a connection from the idle list, let's try again with
|
||||
* the safe list.
|
||||
@ -1327,7 +1327,7 @@ struct connection *conn_backend_get(int reuse_mode,
|
||||
if (!conn && !is_safe && srv->curr_safe_nb > 0) {
|
||||
conn = srv_lookup_conn(&srv->per_thr[tid].safe_conns, hash);
|
||||
if (conn) {
|
||||
conn_delete_from_tree(conn, tid);
|
||||
conn_delete_from_tree(conn, tid, 0);
|
||||
is_safe = 1;
|
||||
}
|
||||
}
|
||||
@ -1388,7 +1388,7 @@ check_tgid:
|
||||
conn = srv_lookup_conn(tree, hash);
|
||||
while (conn) {
|
||||
if (conn->mux->takeover && conn->mux->takeover(conn, i, 0) == 0) {
|
||||
conn_delete_from_tree(conn, i);
|
||||
conn_delete_from_tree(conn, i, 0);
|
||||
_HA_ATOMIC_INC(&activity[tid].fd_takeover);
|
||||
found = 1;
|
||||
break;
|
||||
@ -1490,7 +1490,7 @@ takeover_random_idle_conn(struct ceb_root **root, int curtid)
|
||||
conn = ceb64_item_first(root, hash_node.node, hash_node.key, struct connection);
|
||||
while (conn) {
|
||||
if (conn->mux->takeover && conn->mux->takeover(conn, curtid, 1) == 0) {
|
||||
conn_delete_from_tree(conn, curtid);
|
||||
conn_delete_from_tree(conn, curtid, 0);
|
||||
return conn;
|
||||
}
|
||||
conn = ceb64_item_next(root, hash_node.node, hash_node.key, conn);
|
||||
@ -1751,7 +1751,7 @@ int be_reuse_connection(int64_t hash, struct session *sess,
|
||||
if (avail <= 1) {
|
||||
/* no more streams available, remove it from the list */
|
||||
HA_SPIN_LOCK(IDLE_CONNS_LOCK, &idle_conns[tid].idle_conns_lock);
|
||||
conn_delete_from_tree(srv_conn, tid);
|
||||
conn_delete_from_tree(srv_conn, tid, 0);
|
||||
HA_SPIN_UNLOCK(IDLE_CONNS_LOCK, &idle_conns[tid].idle_conns_lock);
|
||||
}
|
||||
|
||||
@ -1863,7 +1863,7 @@ int connect_server(struct stream *s)
|
||||
HA_SPIN_LOCK(IDLE_CONNS_LOCK, &idle_conns[tid].idle_conns_lock);
|
||||
if (!LIST_ISEMPTY(&srv->per_thr[tid].idle_conn_list)) {
|
||||
tokill_conn = LIST_ELEM(srv->per_thr[tid].idle_conn_list.n, struct connection *, idle_list);
|
||||
conn_delete_from_tree(tokill_conn, tid);
|
||||
conn_delete_from_tree(tokill_conn, tid, 1);
|
||||
HA_SPIN_UNLOCK(IDLE_CONNS_LOCK, &idle_conns[tid].idle_conns_lock);
|
||||
|
||||
/* Release the idle lock before calling mux->destroy.
|
||||
@ -1892,7 +1892,7 @@ int connect_server(struct stream *s)
|
||||
|
||||
if (!LIST_ISEMPTY(&srv->per_thr[i].idle_conn_list)) {
|
||||
tokill_conn = LIST_ELEM(srv->per_thr[i].idle_conn_list.n, struct connection *, idle_list);
|
||||
conn_delete_from_tree(tokill_conn, i);
|
||||
conn_delete_from_tree(tokill_conn, i, 1);
|
||||
}
|
||||
HA_SPIN_UNLOCK(IDLE_CONNS_LOCK, &idle_conns[i].idle_conns_lock);
|
||||
|
||||
|
||||
@ -74,11 +74,13 @@ struct conn_tlv_list *conn_get_tlv(struct connection *conn, int type)
|
||||
|
||||
/* Remove <conn> idle connection from its attached tree (idle, safe or avail)
|
||||
* for the server in the connection's target and thread <thr>. If also present
|
||||
* in the secondary server idle list, conn is removed from it.
|
||||
* in the secondary server idle list, conn is removed from it. Finally, if
|
||||
* <permanent> is non-nul, the idle connection flags are cleared as well so
|
||||
* that the connection is not re-inserted later.
|
||||
*
|
||||
* Must be called with idle_conns_lock held.
|
||||
*/
|
||||
void conn_delete_from_tree(struct connection *conn, int thr)
|
||||
void conn_delete_from_tree(struct connection *conn, int thr, int permanent)
|
||||
{
|
||||
struct ceb_root **conn_tree;
|
||||
struct server *srv = __objt_server(conn->target);
|
||||
@ -100,6 +102,8 @@ void conn_delete_from_tree(struct connection *conn, int thr)
|
||||
}
|
||||
|
||||
ceb64_item_delete(conn_tree, hash_node.node, hash_node.key, conn);
|
||||
if (permanent)
|
||||
conn->flags &= ~CO_FL_LIST_MASK;
|
||||
}
|
||||
|
||||
int conn_create_mux(struct connection *conn, int *closed_connection)
|
||||
@ -227,7 +231,7 @@ int conn_notify_mux(struct connection *conn, int old_flags, int forced_wake)
|
||||
conn_in_list = 0;
|
||||
}
|
||||
else {
|
||||
conn_delete_from_tree(conn, tid);
|
||||
conn_delete_from_tree(conn, tid, 0);
|
||||
}
|
||||
HA_SPIN_UNLOCK(IDLE_CONNS_LOCK, &idle_conns[tid].idle_conns_lock);
|
||||
}
|
||||
|
||||
@ -3068,7 +3068,7 @@ struct task *fcgi_io_cb(struct task *t, void *ctx, unsigned int state)
|
||||
conn_in_list = 0;
|
||||
}
|
||||
else {
|
||||
conn_delete_from_tree(conn, tid);
|
||||
conn_delete_from_tree(conn, tid, 0);
|
||||
}
|
||||
}
|
||||
|
||||
@ -3329,7 +3329,7 @@ struct task *fcgi_timeout_task(struct task *t, void *context, unsigned int state
|
||||
* to steal it from us.
|
||||
*/
|
||||
if (fconn->conn->flags & CO_FL_LIST_MASK)
|
||||
conn_delete_from_tree(fconn->conn, tid);
|
||||
conn_delete_from_tree(fconn->conn, tid, 0);
|
||||
else if (fconn->conn->flags & CO_FL_SESS_IDLE)
|
||||
session_detach_idle_conn(fconn->conn->owner, fconn->conn);
|
||||
|
||||
|
||||
@ -4335,7 +4335,7 @@ struct task *h1_io_cb(struct task *t, void *ctx, unsigned int state)
|
||||
conn_in_list = 0;
|
||||
}
|
||||
else {
|
||||
conn_delete_from_tree(conn, tid);
|
||||
conn_delete_from_tree(conn, tid, 0);
|
||||
}
|
||||
}
|
||||
|
||||
@ -4475,7 +4475,7 @@ struct task *h1_timeout_task(struct task *t, void *context, unsigned int state)
|
||||
* to steal it from us.
|
||||
*/
|
||||
if (h1c->conn->flags & CO_FL_LIST_MASK)
|
||||
conn_delete_from_tree(h1c->conn, tid);
|
||||
conn_delete_from_tree(h1c->conn, tid, 1);
|
||||
|
||||
HA_SPIN_UNLOCK(IDLE_CONNS_LOCK, &idle_conns[tid].idle_conns_lock);
|
||||
|
||||
|
||||
10
src/mux_h2.c
10
src/mux_h2.c
@ -4985,7 +4985,7 @@ struct task *h2_io_cb(struct task *t, void *ctx, unsigned int state)
|
||||
conn_in_list = 0;
|
||||
}
|
||||
else {
|
||||
conn_delete_from_tree(conn, tid);
|
||||
conn_delete_from_tree(conn, tid, 0);
|
||||
}
|
||||
}
|
||||
|
||||
@ -5162,7 +5162,7 @@ static int h2_process(struct h2c *h2c)
|
||||
/* connections in error must be removed from the idle lists */
|
||||
if (conn->flags & CO_FL_LIST_MASK) {
|
||||
HA_SPIN_LOCK(IDLE_CONNS_LOCK, &idle_conns[tid].idle_conns_lock);
|
||||
conn_delete_from_tree(conn, tid);
|
||||
conn_delete_from_tree(conn, tid, 1);
|
||||
HA_SPIN_UNLOCK(IDLE_CONNS_LOCK, &idle_conns[tid].idle_conns_lock);
|
||||
}
|
||||
}
|
||||
@ -5170,7 +5170,7 @@ static int h2_process(struct h2c *h2c)
|
||||
/* connections in error must be removed from the idle lists */
|
||||
if (conn->flags & CO_FL_LIST_MASK) {
|
||||
HA_SPIN_LOCK(IDLE_CONNS_LOCK, &idle_conns[tid].idle_conns_lock);
|
||||
conn_delete_from_tree(conn, tid);
|
||||
conn_delete_from_tree(conn, tid, 1);
|
||||
HA_SPIN_UNLOCK(IDLE_CONNS_LOCK, &idle_conns[tid].idle_conns_lock);
|
||||
}
|
||||
}
|
||||
@ -5278,7 +5278,7 @@ struct task *h2_timeout_task(struct task *t, void *context, unsigned int state)
|
||||
* to steal it from us.
|
||||
*/
|
||||
if (h2c->conn->flags & CO_FL_LIST_MASK)
|
||||
conn_delete_from_tree(h2c->conn, tid);
|
||||
conn_delete_from_tree(h2c->conn, tid, 1);
|
||||
else if (h2c->conn->flags & CO_FL_SESS_IDLE)
|
||||
session_detach_idle_conn(h2c->conn->owner, h2c->conn);
|
||||
|
||||
@ -5360,7 +5360,7 @@ do_leave:
|
||||
/* in any case this connection must not be considered idle anymore */
|
||||
if (h2c->conn->flags & CO_FL_LIST_MASK) {
|
||||
HA_SPIN_LOCK(IDLE_CONNS_LOCK, &idle_conns[tid].idle_conns_lock);
|
||||
conn_delete_from_tree(h2c->conn, tid);
|
||||
conn_delete_from_tree(h2c->conn, tid, 1);
|
||||
HA_SPIN_UNLOCK(IDLE_CONNS_LOCK, &idle_conns[tid].idle_conns_lock);
|
||||
}
|
||||
|
||||
|
||||
@ -3201,7 +3201,7 @@ static void qcc_shutdown(struct qcc *qcc)
|
||||
|
||||
/* A connection is not reusable if app layer is closed. */
|
||||
if (qcc->flags & QC_CF_IS_BACK)
|
||||
conn_delete_from_tree(qcc->conn, tid);
|
||||
conn_delete_from_tree(qcc->conn, tid, 1);
|
||||
|
||||
out:
|
||||
qcc->app_st = QCC_APP_ST_SHUT;
|
||||
@ -3404,7 +3404,7 @@ struct task *qcc_io_cb(struct task *t, void *ctx, unsigned int state)
|
||||
if (conn->flags & CO_FL_SESS_IDLE)
|
||||
session_detach_idle_conn(conn->owner, conn);
|
||||
else
|
||||
conn_delete_from_tree(conn, tid);
|
||||
conn_delete_from_tree(conn, tid, 0);
|
||||
}
|
||||
|
||||
HA_SPIN_UNLOCK(IDLE_CONNS_LOCK, &idle_conns[tid].idle_conns_lock);
|
||||
@ -3528,7 +3528,7 @@ static struct task *qcc_timeout_task(struct task *t, void *ctx, unsigned int sta
|
||||
* attempts to steal it from us.
|
||||
*/
|
||||
if (qcc->conn->flags & CO_FL_LIST_MASK)
|
||||
conn_delete_from_tree(qcc->conn, tid);
|
||||
conn_delete_from_tree(qcc->conn, tid, 1);
|
||||
else if (qcc->conn->flags & CO_FL_SESS_IDLE)
|
||||
session_unown_conn(qcc->conn->owner, qcc->conn);
|
||||
|
||||
|
||||
@ -2564,7 +2564,7 @@ static struct task *spop_io_cb(struct task *t, void *ctx, unsigned int state)
|
||||
conn_in_list = 0;
|
||||
}
|
||||
else {
|
||||
conn_delete_from_tree(conn, tid);
|
||||
conn_delete_from_tree(conn, tid, 0);
|
||||
}
|
||||
}
|
||||
|
||||
@ -2676,7 +2676,7 @@ static int spop_process(struct spop_conn *spop_conn)
|
||||
/* connections in error must be removed from the idle lists */
|
||||
if (conn->flags & CO_FL_LIST_MASK) {
|
||||
HA_SPIN_LOCK(IDLE_CONNS_LOCK, &idle_conns[tid].idle_conns_lock);
|
||||
conn_delete_from_tree(conn, tid);
|
||||
conn_delete_from_tree(conn, tid, 1);
|
||||
HA_SPIN_UNLOCK(IDLE_CONNS_LOCK, &idle_conns[tid].idle_conns_lock);
|
||||
}
|
||||
}
|
||||
@ -2799,7 +2799,7 @@ static struct task *spop_timeout_task(struct task *t, void *context, unsigned in
|
||||
* to steal it from us.
|
||||
*/
|
||||
if (spop_conn->conn->flags & CO_FL_LIST_MASK)
|
||||
conn_delete_from_tree(spop_conn->conn, tid);
|
||||
conn_delete_from_tree(spop_conn->conn, tid, 1);
|
||||
else if (spop_conn->conn->flags & CO_FL_SESS_IDLE)
|
||||
session_detach_idle_conn(spop_conn->conn->owner, spop_conn->conn);
|
||||
|
||||
|
||||
@ -7212,7 +7212,7 @@ static int srv_migrate_conns_to_remove(struct server *srv, int thr, int toremove
|
||||
break;
|
||||
|
||||
conn = LIST_ELEM(srv->per_thr[thr].idle_conn_list.n, struct connection *, idle_list);
|
||||
conn_delete_from_tree(conn, thr);
|
||||
conn_delete_from_tree(conn, thr, 1);
|
||||
MT_LIST_APPEND(&idle_conns[thr].toremove_conns, &conn->toremove_list);
|
||||
i++;
|
||||
}
|
||||
@ -7287,8 +7287,7 @@ void srv_release_conn(struct server *srv, struct connection *conn)
|
||||
/* Remove the connection from any tree (safe, idle or available) */
|
||||
if (ceb_intree(&conn->hash_node.node)) {
|
||||
HA_SPIN_LOCK(IDLE_CONNS_LOCK, &idle_conns[tid].idle_conns_lock);
|
||||
conn_delete_from_tree(conn, tid);
|
||||
conn->flags &= ~CO_FL_LIST_MASK;
|
||||
conn_delete_from_tree(conn, tid, 1);
|
||||
HA_SPIN_UNLOCK(IDLE_CONNS_LOCK, &idle_conns[tid].idle_conns_lock);
|
||||
}
|
||||
}
|
||||
@ -7365,7 +7364,7 @@ int srv_add_to_idle_list(struct server *srv, struct connection *conn, int is_saf
|
||||
_HA_ATOMIC_DEC(&srv->curr_used_conns);
|
||||
|
||||
HA_SPIN_LOCK(IDLE_CONNS_LOCK, &idle_conns[tid].idle_conns_lock);
|
||||
conn_delete_from_tree(conn, tid);
|
||||
conn_delete_from_tree(conn, tid, 0);
|
||||
|
||||
if (is_safe) {
|
||||
conn->flags = (conn->flags & ~CO_FL_LIST_MASK) | CO_FL_SAFE_LIST;
|
||||
@ -7525,7 +7524,7 @@ static void srv_close_idle_conns(struct server *srv)
|
||||
hash_node.key, struct connection))) {
|
||||
if (conn->ctrl->ctrl_close)
|
||||
conn->ctrl->ctrl_close(conn);
|
||||
conn_delete_from_tree(conn, i);
|
||||
conn_delete_from_tree(conn, i, 1);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@ -6752,7 +6752,7 @@ struct task *ssl_sock_io_cb(struct task *t, void *context, unsigned int state)
|
||||
conn_in_list = 0;
|
||||
}
|
||||
else {
|
||||
conn_delete_from_tree(conn, tid);
|
||||
conn_delete_from_tree(conn, tid, 0);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user