MEDIUM: config: permit to start a bind on multiple groups at once

Now it's possible for a bind line to span multiple thread groups. When this happens, the first one will become the reference and will be entirely set up, and the subsequent ones will be duplicated from this reference, so that they can be registered in distinct groups. The reference is always setup and started first so it is always available when the other ones are started. The doc was updated to reflect this new possibility with its limitations and impacts, and the differences with the "shards" option.
2025-11-11 14:01:06 +01:00 · 2023-02-27 16:42:32 +01:00 · 2023-02-27 16:42:32 +01:00 · 09b52d1c3d
commit 09b52d1c3d
parent 09e266e6f5
2 changed files with 62 additions and 13 deletions
--- a/doc/configuration.txt
+++ b/doc/configuration.txt
@ -15394,10 +15394,21 @@ thread [<thread-group>/]<thread-set>[,...]
  an absolute notation or a relative one, as those not set will be resolved at
  the end of the parsing.

+  It is important to know that each listener described by a "bind" line creates
+  at least one socket represented by at least one file descriptor. Since file
+  descriptors cannot span multiple thread groups, if a "bind" line specifies a
+  thread range that covers more than one group, several file descriptors will
+  automatically be created so that there is at least one per group. Technically
+  speaking they all refer to the same socket in the kernel, but they will get a
+  distinct identifier in haproxy and will even have a dedicated stats entry if
+  "option socket-stats" is used.
+
  The main purpose is to have multiple bind lines sharing the same IP:port but
  not the same thread in a listener, so that the system can distribute the
  incoming connections into multiple queues, bypassing haproxy's internal queue
  load balancing. Currently Linux 3.9 and above is known for supporting this.
+  See also the "shards" keyword above that automates duplication of "bind"
+  lines and their assignment to multiple groups of threads.

 tls-ticket-keys <keyfile>
  Sets the TLS ticket keys file to load the keys from. The keys need to be 48
--- a/src/cfgparse.c
+++ b/src/cfgparse.c
@ -2995,10 +2995,10 @@ init_proxies_list_stage1:

 			/* apply thread masks and groups to all receivers */
 			list_for_each_entry(li, &bind_conf->listeners, by_bind) {
-				struct listener *new_li;
+				struct listener *new_li, *ref;
 				struct thread_set new_ts;
-				int shard, shards, todo, done, grp;
-				ulong mask, bit;
+				int shard, shards, todo, done, grp, dups;
+				ulong mask, gmask, bit;

 				shards = bind_conf->settings.shards;
 				todo = thread_set_count(&bind_conf->thread_set);
@ -3042,17 +3042,55 @@ init_proxies_list_stage1:

 					BUG_ON(!new_ts.grps); // no more bits left unassigned

-					if (atleast2(new_ts.grps)) {
-						ha_alert("Proxy '%s': shard number %d spans %d groups in 'bind %s' at [%s:%d]\n",
-							 curproxy->id, shard, my_popcountl(new_ts.grps), bind_conf->arg, bind_conf->file, bind_conf->line);
-						cfgerr++;
-						err_code |= ERR_FATAL | ERR_ALERT;
-						goto out;
-					}
+					/* Create all required listeners for all bound groups. If more than one group is
+					 * needed, the first receiver serves as a reference, and subsequent ones point to
+					 * it. We already have a listener available in new_li() so we only allocate a new
+					 * one if we're not on the last one. We count the remaining groups by copying their
+					 * mask into <gmask> and dropping the lowest bit at the end of the loop until there
+					 * is no more. Ah yes, it's not pretty :-/
+					 */
+					ref = new_li;
+					gmask = new_ts.grps;
+					for (dups = 0; gmask; dups++) {
+						/* assign the first (and only) thread and group */
+						new_li->rx.bind_thread = thread_set_nth_tmask(&new_ts, dups);
+						new_li->rx.bind_tgroup = thread_set_nth_group(&new_ts, dups);

-					/* assign the first (and only) thread and group */
-					new_li->rx.bind_thread = thread_set_nth_tmask(&new_ts, 0);
-					new_li->rx.bind_tgroup = thread_set_nth_group(&new_ts, 0);
+						if (dups) {
+							/* it has been allocated already in the previous round */
+							shard_info_attach(&new_li->rx, ref->rx.shard_info);
+							new_li->rx.flags |= RX_F_MUST_DUP;
+						}
+
+						gmask &= gmask - 1; // drop lowest bit
+						if (gmask) {
+							/* yet another listener expected in this shard, let's
+							 * chain it.
+							 */
+							struct listener *tmp_li = clone_listener(new_li);
+
+							if (!tmp_li) {
+								ha_alert("Out of memory while trying to allocate extra listener for group %u of shard %d in %s %s\n",
+									 new_li->rx.bind_tgroup, shard, proxy_type_str(curproxy), curproxy->id);
+								cfgerr++;
+								err_code |= ERR_FATAL | ERR_ALERT;
+								goto out;
+							}
+
+							/* if we're forced to create at least two listeners, we have to
+							 * allocate a shared shard_info that's linked to from the reference
+							 * and each other listener, so we'll create it here.
+							 */
+							if (!shard_info_attach(&ref->rx, NULL)) {
+								ha_alert("Out of memory while trying to allocate shard_info for listener for group %u of shard %d in %s %s\n",
+									 new_li->rx.bind_tgroup, shard, proxy_type_str(curproxy), curproxy->id);
+								cfgerr++;
+								err_code |= ERR_FATAL | ERR_ALERT;
+								goto out;
+							}
+							new_li = tmp_li;
+						}
+					}
 					done -= todo;

 					shard++;