mirror of
https://git.haproxy.org/git/haproxy.git/
synced 2026-05-02 11:41:07 +02:00
BUG/MEDIUM: cli: fix master CLI connection slot leak on client disconnect
In master-worker mode the master CLI proxy (mworker_proxy) has a hardcoded maxconn of 10. When a client connects to the master CLI socket and issues a command that gets forwarded to an unresponsive worker (e.g. one that is stuck or very slow), the connection hangs waiting for the worker's response. If the client then disconnects (timeout, Ctrl-C, etc.), the connection slot is never released because the client-side FIN is never acknowledged by the unresponsive worker. After 10 such leaked slots the master CLI socket becomes completely unreachable, returning "Resource temporarily unavailable" to any new connection attempt. In containerized deployments this means readiness probes start failing and the pod gets restarted. The fix adds a timeout server-fin of 1s to the mworker_proxy. When the client disconnects while waiting for a worker response, this timeout ensures the dangling backend connection is cleaned up after 1s, freeing the connection slot. This does not affect normal CLI operations since the timeout only starts after the client has already closed its side of the connection. A regression test is included that blocks the worker CLI thread using "debug dev delay" with nbthread 1, fills all 10 master CLI slots, waits for client-side timeouts, then verifies a new connection still succeeds. This fixes GH issue #3351. This should be backported to all stable branches. Co-authored-by: Martin Strenge <github@trixer.net> Co-authored-by: William Lallemand <wlallemand@haproxy.com>
This commit is contained in:
parent
55ba951f3d
commit
64383e655b
66
reg-tests/mcli/mcli_master_socket_leak.vtc
Normal file
66
reg-tests/mcli/mcli_master_socket_leak.vtc
Normal file
@ -0,0 +1,66 @@
|
||||
varnishtest "Bug fix: master CLI connection slots freed on client disconnect"
|
||||
|
||||
# Regression test for a master CLI socket connection leak in master-worker mode.
|
||||
#
|
||||
# mworker_proxy has a fixed maxconn of 10. When clients connect to the master
|
||||
# socket, send a command that is forwarded to a busy worker, and then close
|
||||
# the connection due to a client-side receive timeout, haproxy must free each
|
||||
# slot as the client disconnects.
|
||||
#
|
||||
# Without the fix: slots remain occupied after the client disconnects, so the
|
||||
# master CLI becomes unreachable once all 10 slots are filled.
|
||||
# With the fix: slots are freed on client disconnect and the master CLI keeps
|
||||
# accepting new connections.
|
||||
#
|
||||
# The worker is made unresponsive using "debug dev delay" (expert-mode) with
|
||||
# nbthread 1 so a single delay blocks the entire worker CLI. This avoids
|
||||
# using SIGSTOP/SIGCONT which can be unreliable in CI environments.
|
||||
|
||||
#REGTEST_TYPE=bug
|
||||
|
||||
feature cmd "command -v socat"
|
||||
feature cmd "command -v timeout"
|
||||
feature ignore_unknown_macro
|
||||
|
||||
server s1 {
|
||||
} -start
|
||||
|
||||
haproxy h1 -W -S -conf {
|
||||
global
|
||||
nbthread 1
|
||||
|
||||
defaults
|
||||
mode http
|
||||
timeout connect "${HAPROXY_TEST_TIMEOUT-5s}"
|
||||
timeout client "${HAPROXY_TEST_TIMEOUT-5s}"
|
||||
timeout server "${HAPROXY_TEST_TIMEOUT-5s}"
|
||||
|
||||
frontend fe
|
||||
bind "fd@${fe}"
|
||||
default_backend be
|
||||
|
||||
backend be
|
||||
server s1 ${s1_addr}:${s1_port}
|
||||
} -start
|
||||
|
||||
# Fill all 10 master CLI slots (mworker_proxy->maxconn is hardcoded to 10).
|
||||
# Each socat sends "expert-mode on" followed by "@1 debug dev delay 10000"
|
||||
# which blocks the single worker thread for 10 s. After 2 s the timeout(1)
|
||||
# wrapper kills socat, simulating a client-side receive timeout. "wait"
|
||||
# ensures all background processes have exited before proceeding.
|
||||
shell {
|
||||
for i in $(seq 1 10); do
|
||||
(printf "expert-mode on\n@1 debug dev delay 10000\n" \
|
||||
| timeout 2 socat TCP:${h1_mcli_addr}:${h1_mcli_port} - 2>/dev/null) &
|
||||
done
|
||||
wait
|
||||
}
|
||||
|
||||
# This is the key assertion: after all 10 clients have disconnected, a new
|
||||
# connection to the master CLI must succeed. With the bug all 10 slots are
|
||||
# still marked occupied and this connect is refused or times out.
|
||||
haproxy h1 -mcli {
|
||||
send "show version"
|
||||
expect ~ "3."
|
||||
}
|
||||
|
||||
@ -3834,8 +3834,9 @@ int mworker_cli_create_master_proxy(char **errmsg)
|
||||
mworker_proxy->mode = PR_MODE_CLI;
|
||||
/* default to 10 concurrent connections */
|
||||
mworker_proxy->maxconn = 10;
|
||||
/* no timeout */
|
||||
mworker_proxy->timeout.client = 0;
|
||||
mworker_proxy->timeout.client = 0; /* no timeout */
|
||||
mworker_proxy->timeout.serverfin = MS_TO_TICKS(1000); /* 1s timeout in case worker is not responding on shutdown */
|
||||
|
||||
mworker_proxy->conf.file = strdup("MASTER");
|
||||
mworker_proxy->conf.line = 0;
|
||||
mworker_proxy->accept = frontend_accept;
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user