MEDIUM: connections: Allow taking over connections from other tgroups.

Allow haproxy to take over idle connections from other thread groups than our own. To control that, add a new tunable, tune.takeover-other-tg-connections. It can have 3 values, "none", where we won't attempt to get connections from the other thread group (the default), "restricted", where we only will try to get idle connections from other thread groups when we're using reverse HTTP, and "full", where we always try to get connections from other thread groups. Unless there is a special need, it is advised to use "none" (or restricted if we're using reverse HTTP) as using connections from other thread groups may have a performance impact.
2025-08-06 07:07:04 +02:00 · 2025-01-30 11:16:40 +01:00 · 2025-01-30 11:16:40 +01:00 · 8de8ed4f48
commit 8de8ed4f48
parent d31b1650ae
6 changed files with 108 additions and 13 deletions
--- a/doc/configuration.txt
+++ b/doc/configuration.txt
@ -1724,6 +1724,7 @@ The following keywords are supported in the "global" section :
   - tune.ssl.ssl-ctx-cache-size
   - tune.ssl.ocsp-update.maxdelay (deprecated)
   - tune.ssl.ocsp-update.mindelay (deprecated)
+   - tune.takeover-other-tg-connections
   - tune.vars.global-max-size
   - tune.vars.proc-max-size
   - tune.vars.reqres-max-size
@ -4584,6 +4585,17 @@ tune.stick-counters <number>
  no "track-sc" rules are used, the value may be lowered (0 being valid to
  entirely disable stick-counters).

+tune.takeover-other-tg-connections <value>
+  By default, we won't attempt to use idle connections from other thread groups.
+  This can however be changed. Valid values for <value> are : "none", the
+  default, if used, no attempt will be made to use idle connections from
+  other thread groups, "restricted" where we will only attempt to get an idle
+  connection from another thread if we're using protocols that can't create
+  new connections, such as reverse http, and "full" where we will always look
+  in other thread groups for idle connections.
+  Note that using connections from other thread groups can occur performance
+  penalties, so it should not be used unless really needed.
+
 tune.vars.global-max-size <size>
 tune.vars.proc-max-size <size>
 tune.vars.reqres-max-size <size>
--- a/include/haproxy/fd.h
+++ b/include/haproxy/fd.h
@ -448,6 +448,22 @@ static inline void fd_claim_tgid(int fd, uint desired_tgid)
 	}
 }

+/*
+ * Update the FD's TGID.
+ * This should be called with the lock held, and will drop the lock once
+ * the TGID is updated.
+ * The reference counter is however preserved.
+ */
+static inline void fd_update_tgid(int fd, uint desired_tgid)
+{
+	unsigned int orig_tgid = fdtab[fd].refc_tgid;
+	unsigned int new_tgid;
+	/* Remove the lock, and switch to the new tgid */
+	do {
+		new_tgid = (orig_tgid & 0xffff0000) | desired_tgid;
+	} while (!_HA_ATOMIC_CAS(&fdtab[fd].refc_tgid, &orig_tgid, new_tgid) && __ha_cpu_relax());
+}
+
 /* atomically read the running mask if the tgid matches, or returns zero if it
 * does not match. This is meant for use in code paths where the bit is expected
 * to be present and will be sufficient to protect against a short-term group
--- a/include/haproxy/global-t.h
+++ b/include/haproxy/global-t.h
@ -109,6 +109,13 @@ enum {
 	SSL_SERVER_VERIFY_REQUIRED = 1,
 };

+/* Takeover across thread groups */
+enum threadgroup_takeover {
+	NO_THREADGROUP_TAKEOVER = 0,
+	RESTRICTED_THREADGROUP_TAKEOVER = 1,
+	FULL_THREADGROUP_TAKEOVER = 2,
+};
+
 /* bit values to go with "warned" above */
 #define WARN_ANY                    0x00000001 /* any warning was emitted */
 #define WARN_FORCECLOSE_DEPRECATED  0x00000002
@ -199,6 +206,7 @@ struct global {
 		int default_shards; /* default shards for listeners, or -1 (by-thread) or -2 (by-group) */
 		uint max_checks_per_thread; /* if >0, no more than this concurrent checks per thread */
 		uint ring_queues;   /* if >0, #ring queues, otherwise equals #thread groups */
+		enum threadgroup_takeover tg_takeover; /* Policy for threadgroup takeover */
 #ifdef USE_QUIC
 		unsigned int quic_backend_max_idle_timeout;
 		unsigned int quic_frontend_max_idle_timeout;
--- a/src/backend.c
+++ b/src/backend.c
@ -1263,7 +1263,9 @@ int alloc_bind_address(struct sockaddr_storage **ss,
 */
 struct connection *conn_backend_get(struct stream *s, struct server *srv, int is_safe, int64_t hash)
 {
+	const struct tgroup_info *curtg = tg;
 	struct connection *conn = NULL;
+	unsigned int curtgid = tgid;
 	int i; // thread number
 	int found = 0;
 	int stop;
@ -1318,10 +1320,10 @@ struct connection *conn_backend_get(struct stream *s, struct server *srv, int is
 	 * unvisited thread, but always staying in the same group.
 	 */
 	stop = srv->per_tgrp[tgid - 1].next_takeover;
-	if (stop >= tg->count)
-		stop %= tg->count;
-
-	stop += tg->base;
+	if (stop >= curtg->count)
+		stop %= curtg->count;
+	stop += curtg->base;
+check_tgid:
 	i = stop;
 	do {
 		if (!srv->curr_idle_thr[i] || i == tid)
@ -1356,8 +1358,21 @@ struct connection *conn_backend_get(struct stream *s, struct server *srv, int is
 			}
 		}
 		HA_SPIN_UNLOCK(IDLE_CONNS_LOCK, &idle_conns[i].idle_conns_lock);
-	} while (!found && (i = (i + 1 == tg->base + tg->count) ? tg->base : i + 1) != stop);
+	} while (!found && (i = (i + 1 == curtg->base + curtg->count) ? curtg->base : i + 1) != stop);

+	if (!found && (global.tune.tg_takeover == FULL_THREADGROUP_TAKEOVER ||
+	    (global.tune.tg_takeover == RESTRICTED_THREADGROUP_TAKEOVER &&
+	    srv->flags & SRV_F_RHTTP))) {
+		curtgid = curtgid + 1;
+		if (curtgid == global.nbtgroups + 1)
+			curtgid = 1;
+		/* If we haven't looped yet */
+		if (curtgid != tgid) {
+			curtg = &ha_tgroup_info[curtgid - 1];
+			stop = curtg->base;
+			goto check_tgid;
+		}
+	}
 	if (!found)
 		conn = NULL;
 done:
--- a/src/cfgparse-global.c
+++ b/src/cfgparse-global.c
@ -1348,6 +1348,22 @@ static int cfg_parse_global_tune_opts(char **args, int section_type,
 			return -1;
 		}
 	}
+	else if (strcmp(args[0], "tune.takeover-other-tg-connections") == 0) {
+		if (*(args[1]) == 0) {
+			memprintf(err, "'%s' expects 'none', 'restricted', or 'full'", args[0]);
+			return -1;
+		}
+		if (strcmp(args[1], "none") == 0)
+			global.tune.tg_takeover = NO_THREADGROUP_TAKEOVER;
+		else if (strcmp(args[1], "restricted") == 0)
+			global.tune.tg_takeover = RESTRICTED_THREADGROUP_TAKEOVER;
+		else if (strcmp(args[1], "full") == 0)
+			global.tune.tg_takeover = FULL_THREADGROUP_TAKEOVER;
+		else {
+			memprintf(err, "'%s' expects 'none', 'restricted', or 'full', got '%s'", args[0], args[1]);
+			return -1;
+		}
+	}
 	else {
 		BUG_ON(1, "Triggered in cfg_parse_global_tune_opts() by unsupported keyword.\n");
 		return -1;
@ -1716,6 +1732,7 @@ static struct cfg_kw_list cfg_kws = {ILH, {
 	{ CFG_GLOBAL, "tune.http.maxhdr", cfg_parse_global_tune_opts },
 	{ CFG_GLOBAL, "tune.comp.maxlevel", cfg_parse_global_tune_opts },
 	{ CFG_GLOBAL, "tune.pattern.cache-size", cfg_parse_global_tune_opts },
+	{ CFG_GLOBAL, "tune.takeover-other-tg-connections", cfg_parse_global_tune_opts },
 	{ CFG_GLOBAL, "tune.disable-fast-forward", cfg_parse_global_tune_forward_opts },
 	{ CFG_GLOBAL, "tune.disable-zero-copy-forwarding", cfg_parse_global_tune_forward_opts },
 	{ CFG_GLOBAL, "tune.chksize", cfg_parse_global_unsupported_opts },
--- a/src/fd.c
+++ b/src/fd.c
@ -512,6 +512,8 @@ void fd_migrate_on(int fd, uint new_tid)
 int fd_takeover(int fd, void *expected_owner)
 {
 	unsigned long old;
+	int changing_tgid = 0;
+	int old_ltid, old_tgid;

 	/* protect ourself against a delete then an insert for the same fd,
 	 * if it happens, then the owner will no longer be the expected
@ -520,17 +522,31 @@ int fd_takeover(int fd, void *expected_owner)
 	if (fdtab[fd].owner != expected_owner)
 		return -1;

-	/* we must be alone to work on this idle FD. If not, it means that its
-	 * poller is currently waking up and is about to use it, likely to
-	 * close it on shut/error, but maybe also to process any unexpectedly
-	 * pending data. It's also possible that the FD was closed and
-	 * reassigned to another thread group, so let's be careful.
-	 */
-	if (unlikely(!fd_grab_tgid(fd, ti->tgid)))
-		return -1;
+	/* We're taking a connection from a different thread group */
+	if ((fdtab[fd].refc_tgid & 0x7fff) != tgid) {
+		changing_tgid = 1;
+
+		old_tgid = fd_tgid(fd);
+		BUG_ON(atleast2(fdtab[fd].thread_mask));
+		old_ltid = my_ffsl(fdtab[fd].thread_mask) - 1;
+
+		if (unlikely(!fd_lock_tgid_cur(fd)))
+			return -1;
+	} else {
+		/* we must be alone to work on this idle FD. If not, it means that its
+		 * poller is currently waking up and is about to use it, likely to
+		 * close it on shut/error, but maybe also to process any unexpectedly
+		 * pending data. It's also possible that the FD was closed and
+		 * reassigned to another thread group, so let's be careful.
+		 */
+		if (unlikely(!fd_grab_tgid(fd, ti->tgid)))
+			return -1;
+	}

 	old = 0;
 	if (!HA_ATOMIC_CAS(&fdtab[fd].running_mask, &old, ti->ltid_bit)) {
+		if (changing_tgid)
+			fd_unlock_tgid(fd);
 		fd_drop_tgid(fd);
 		return -1;
 	}
@ -538,6 +554,17 @@ int fd_takeover(int fd, void *expected_owner)
 	/* success, from now on it's ours */
 	HA_ATOMIC_STORE(&fdtab[fd].thread_mask, ti->ltid_bit);

+	/*
+	 * Change the tgid to our own tgid.
+	 * This removes the lock, we don't need it anymore, but we keep
+	 * the refcount.
+	 */
+	if (changing_tgid) {
+		fd_update_tgid(fd, tgid);
+		if (cur_poller.fixup_tgid_takeover)
+			cur_poller.fixup_tgid_takeover(&cur_poller, fd, old_ltid, old_tgid);
+	}
+
 	/* Make sure the FD doesn't have the active bit. It is possible that
 	 * the fd is polled by the thread that used to own it, the new thread
 	 * is supposed to call subscribe() later, to activate polling.