diff --git a/doc/configuration.txt b/doc/configuration.txt index b7d97d0ec..18f5bd465 100644 --- a/doc/configuration.txt +++ b/doc/configuration.txt @@ -1724,6 +1724,7 @@ The following keywords are supported in the "global" section : - tune.ssl.ssl-ctx-cache-size - tune.ssl.ocsp-update.maxdelay (deprecated) - tune.ssl.ocsp-update.mindelay (deprecated) + - tune.takeover-other-tg-connections - tune.vars.global-max-size - tune.vars.proc-max-size - tune.vars.reqres-max-size @@ -4584,6 +4585,17 @@ tune.stick-counters no "track-sc" rules are used, the value may be lowered (0 being valid to entirely disable stick-counters). +tune.takeover-other-tg-connections + By default, we won't attempt to use idle connections from other thread groups. + This can however be changed. Valid values for are : "none", the + default, if used, no attempt will be made to use idle connections from + other thread groups, "restricted" where we will only attempt to get an idle + connection from another thread if we're using protocols that can't create + new connections, such as reverse http, and "full" where we will always look + in other thread groups for idle connections. + Note that using connections from other thread groups can occur performance + penalties, so it should not be used unless really needed. + tune.vars.global-max-size tune.vars.proc-max-size tune.vars.reqres-max-size diff --git a/include/haproxy/fd.h b/include/haproxy/fd.h index b9bdc8f4f..2912b7288 100644 --- a/include/haproxy/fd.h +++ b/include/haproxy/fd.h @@ -448,6 +448,22 @@ static inline void fd_claim_tgid(int fd, uint desired_tgid) } } +/* + * Update the FD's TGID. + * This should be called with the lock held, and will drop the lock once + * the TGID is updated. + * The reference counter is however preserved. + */ +static inline void fd_update_tgid(int fd, uint desired_tgid) +{ + unsigned int orig_tgid = fdtab[fd].refc_tgid; + unsigned int new_tgid; + /* Remove the lock, and switch to the new tgid */ + do { + new_tgid = (orig_tgid & 0xffff0000) | desired_tgid; + } while (!_HA_ATOMIC_CAS(&fdtab[fd].refc_tgid, &orig_tgid, new_tgid) && __ha_cpu_relax()); +} + /* atomically read the running mask if the tgid matches, or returns zero if it * does not match. This is meant for use in code paths where the bit is expected * to be present and will be sufficient to protect against a short-term group diff --git a/include/haproxy/global-t.h b/include/haproxy/global-t.h index 508ef846a..e3a078b28 100644 --- a/include/haproxy/global-t.h +++ b/include/haproxy/global-t.h @@ -109,6 +109,13 @@ enum { SSL_SERVER_VERIFY_REQUIRED = 1, }; +/* Takeover across thread groups */ +enum threadgroup_takeover { + NO_THREADGROUP_TAKEOVER = 0, + RESTRICTED_THREADGROUP_TAKEOVER = 1, + FULL_THREADGROUP_TAKEOVER = 2, +}; + /* bit values to go with "warned" above */ #define WARN_ANY 0x00000001 /* any warning was emitted */ #define WARN_FORCECLOSE_DEPRECATED 0x00000002 @@ -199,6 +206,7 @@ struct global { int default_shards; /* default shards for listeners, or -1 (by-thread) or -2 (by-group) */ uint max_checks_per_thread; /* if >0, no more than this concurrent checks per thread */ uint ring_queues; /* if >0, #ring queues, otherwise equals #thread groups */ + enum threadgroup_takeover tg_takeover; /* Policy for threadgroup takeover */ #ifdef USE_QUIC unsigned int quic_backend_max_idle_timeout; unsigned int quic_frontend_max_idle_timeout; diff --git a/src/backend.c b/src/backend.c index 56381a49f..6e9537a87 100644 --- a/src/backend.c +++ b/src/backend.c @@ -1263,7 +1263,9 @@ int alloc_bind_address(struct sockaddr_storage **ss, */ struct connection *conn_backend_get(struct stream *s, struct server *srv, int is_safe, int64_t hash) { + const struct tgroup_info *curtg = tg; struct connection *conn = NULL; + unsigned int curtgid = tgid; int i; // thread number int found = 0; int stop; @@ -1318,10 +1320,10 @@ struct connection *conn_backend_get(struct stream *s, struct server *srv, int is * unvisited thread, but always staying in the same group. */ stop = srv->per_tgrp[tgid - 1].next_takeover; - if (stop >= tg->count) - stop %= tg->count; - - stop += tg->base; + if (stop >= curtg->count) + stop %= curtg->count; + stop += curtg->base; +check_tgid: i = stop; do { if (!srv->curr_idle_thr[i] || i == tid) @@ -1356,8 +1358,21 @@ struct connection *conn_backend_get(struct stream *s, struct server *srv, int is } } HA_SPIN_UNLOCK(IDLE_CONNS_LOCK, &idle_conns[i].idle_conns_lock); - } while (!found && (i = (i + 1 == tg->base + tg->count) ? tg->base : i + 1) != stop); + } while (!found && (i = (i + 1 == curtg->base + curtg->count) ? curtg->base : i + 1) != stop); + if (!found && (global.tune.tg_takeover == FULL_THREADGROUP_TAKEOVER || + (global.tune.tg_takeover == RESTRICTED_THREADGROUP_TAKEOVER && + srv->flags & SRV_F_RHTTP))) { + curtgid = curtgid + 1; + if (curtgid == global.nbtgroups + 1) + curtgid = 1; + /* If we haven't looped yet */ + if (curtgid != tgid) { + curtg = &ha_tgroup_info[curtgid - 1]; + stop = curtg->base; + goto check_tgid; + } + } if (!found) conn = NULL; done: diff --git a/src/cfgparse-global.c b/src/cfgparse-global.c index 4064b2b9c..8e698ba1e 100644 --- a/src/cfgparse-global.c +++ b/src/cfgparse-global.c @@ -1348,6 +1348,22 @@ static int cfg_parse_global_tune_opts(char **args, int section_type, return -1; } } + else if (strcmp(args[0], "tune.takeover-other-tg-connections") == 0) { + if (*(args[1]) == 0) { + memprintf(err, "'%s' expects 'none', 'restricted', or 'full'", args[0]); + return -1; + } + if (strcmp(args[1], "none") == 0) + global.tune.tg_takeover = NO_THREADGROUP_TAKEOVER; + else if (strcmp(args[1], "restricted") == 0) + global.tune.tg_takeover = RESTRICTED_THREADGROUP_TAKEOVER; + else if (strcmp(args[1], "full") == 0) + global.tune.tg_takeover = FULL_THREADGROUP_TAKEOVER; + else { + memprintf(err, "'%s' expects 'none', 'restricted', or 'full', got '%s'", args[0], args[1]); + return -1; + } + } else { BUG_ON(1, "Triggered in cfg_parse_global_tune_opts() by unsupported keyword.\n"); return -1; @@ -1716,6 +1732,7 @@ static struct cfg_kw_list cfg_kws = {ILH, { { CFG_GLOBAL, "tune.http.maxhdr", cfg_parse_global_tune_opts }, { CFG_GLOBAL, "tune.comp.maxlevel", cfg_parse_global_tune_opts }, { CFG_GLOBAL, "tune.pattern.cache-size", cfg_parse_global_tune_opts }, + { CFG_GLOBAL, "tune.takeover-other-tg-connections", cfg_parse_global_tune_opts }, { CFG_GLOBAL, "tune.disable-fast-forward", cfg_parse_global_tune_forward_opts }, { CFG_GLOBAL, "tune.disable-zero-copy-forwarding", cfg_parse_global_tune_forward_opts }, { CFG_GLOBAL, "tune.chksize", cfg_parse_global_unsupported_opts }, diff --git a/src/fd.c b/src/fd.c index 97d58e797..cd30b34d4 100644 --- a/src/fd.c +++ b/src/fd.c @@ -512,6 +512,8 @@ void fd_migrate_on(int fd, uint new_tid) int fd_takeover(int fd, void *expected_owner) { unsigned long old; + int changing_tgid = 0; + int old_ltid, old_tgid; /* protect ourself against a delete then an insert for the same fd, * if it happens, then the owner will no longer be the expected @@ -520,17 +522,31 @@ int fd_takeover(int fd, void *expected_owner) if (fdtab[fd].owner != expected_owner) return -1; - /* we must be alone to work on this idle FD. If not, it means that its - * poller is currently waking up and is about to use it, likely to - * close it on shut/error, but maybe also to process any unexpectedly - * pending data. It's also possible that the FD was closed and - * reassigned to another thread group, so let's be careful. - */ - if (unlikely(!fd_grab_tgid(fd, ti->tgid))) - return -1; + /* We're taking a connection from a different thread group */ + if ((fdtab[fd].refc_tgid & 0x7fff) != tgid) { + changing_tgid = 1; + + old_tgid = fd_tgid(fd); + BUG_ON(atleast2(fdtab[fd].thread_mask)); + old_ltid = my_ffsl(fdtab[fd].thread_mask) - 1; + + if (unlikely(!fd_lock_tgid_cur(fd))) + return -1; + } else { + /* we must be alone to work on this idle FD. If not, it means that its + * poller is currently waking up and is about to use it, likely to + * close it on shut/error, but maybe also to process any unexpectedly + * pending data. It's also possible that the FD was closed and + * reassigned to another thread group, so let's be careful. + */ + if (unlikely(!fd_grab_tgid(fd, ti->tgid))) + return -1; + } old = 0; if (!HA_ATOMIC_CAS(&fdtab[fd].running_mask, &old, ti->ltid_bit)) { + if (changing_tgid) + fd_unlock_tgid(fd); fd_drop_tgid(fd); return -1; } @@ -538,6 +554,17 @@ int fd_takeover(int fd, void *expected_owner) /* success, from now on it's ours */ HA_ATOMIC_STORE(&fdtab[fd].thread_mask, ti->ltid_bit); + /* + * Change the tgid to our own tgid. + * This removes the lock, we don't need it anymore, but we keep + * the refcount. + */ + if (changing_tgid) { + fd_update_tgid(fd, tgid); + if (cur_poller.fixup_tgid_takeover) + cur_poller.fixup_tgid_takeover(&cur_poller, fd, old_ltid, old_tgid); + } + /* Make sure the FD doesn't have the active bit. It is possible that * the fd is polled by the thread that used to own it, the new thread * is supposed to call subscribe() later, to activate polling.