diff --git a/doc/configuration.txt b/doc/configuration.txt index 0041a5a9c..ccaafbba9 100644 --- a/doc/configuration.txt +++ b/doc/configuration.txt @@ -16382,6 +16382,25 @@ ca-verify-file be defined with intermediate certificates, and "ca-verify-file" with certificates to ending the chain, like root CA. +cc + This setting is only available on systems which define TCP_CONGESTION, and + was validated on Linux and FreeBSD. It takes the name of a TCP congestion + control algorithm and configures the listener to use this algorithm on all + connections that are accepted from this listener. Typical names include + "reno", "cubic" and will depend on the operating system. On some systems, + special permissions may be required to configure certain algorithms. On + Linux, the list of available algorithms may be found in the sysctl + "net.ipv4.tcp_available_congestion_control", and the list of those permitted + without privileges is in "net.ipv4.tcp_allowed_congestion_control". In order + to access algorithms requiring extra permissions, the "cap_net_admin" + capability might be required (see "setcap" in the global section). In case of + failure to configure a specific congestion control algorithm, the default one + will remain unchanged and a warning will be emitted to report the problem. + Example: + + frontend public + bind :443 cc bbr # use the BBR algorithm for high bandwidths + ciphers This setting is only available when support for OpenSSL was built in. It sets the string describing the list of cipher algorithms ("cipher suite") that are diff --git a/include/haproxy/listener-t.h b/include/haproxy/listener-t.h index c36108713..0fbed6c7f 100644 --- a/include/haproxy/listener-t.h +++ b/include/haproxy/listener-t.h @@ -195,6 +195,7 @@ struct bind_conf { int maxseg; /* for TCP, advertised MSS */ int tcp_ut; /* for TCP, user timeout */ char *tcp_md5sig; /* TCP MD5 signature password (RFC2385) */ + char *cc_algo; /* TCP congestion control algorithm ("cc" parameter) */ int idle_ping; /* MUX idle-ping interval in ms */ int maxaccept; /* if set, max number of connections accepted at once (-1 when disabled) */ unsigned int backlog; /* if set, listen backlog */ diff --git a/src/cfgparse-tcp.c b/src/cfgparse-tcp.c index 40419cc51..15b8818f5 100644 --- a/src/cfgparse-tcp.c +++ b/src/cfgparse-tcp.c @@ -61,6 +61,26 @@ static int bind_parse_transparent(char **args, int cur_arg, struct proxy *px, st } #endif +#if defined(TCP_CONGESTION) +/* parse the "cc" bind keyword */ +static int bind_parse_cc(char **args, int cur_arg, struct proxy *px, struct bind_conf *conf, char **err) +{ + if (!*args[cur_arg + 1]) { + memprintf(err, "'%s' : missing TCP congestion control algorithm", args[cur_arg]); + return ERR_ALERT | ERR_FATAL; + } + + ha_free(&conf->cc_algo); + conf->cc_algo = strdup(args[cur_arg + 1]); + if (!conf->cc_algo) { + memprintf(err, "'%s %s' : out of memory", args[cur_arg], args[cur_arg + 1]); + return ERR_ALERT | ERR_FATAL; + } + + return 0; +} +#endif + #if defined(TCP_DEFER_ACCEPT) || defined(SO_ACCEPTFILTER) /* parse the "defer-accept" bind keyword */ static int bind_parse_defer_accept(char **args, int cur_arg, struct proxy *px, struct bind_conf *conf, char **err) @@ -278,6 +298,9 @@ static int srv_parse_tcp_ut(char **args, int *cur_arg, struct proxy *px, struct * not enabled. */ static struct bind_kw_list bind_kws = { "TCP", { }, { +#if defined(TCP_CONGESTION) + { "cc", bind_parse_cc, 1 }, /* set TCP congestion control algorithm */ +#endif #if defined(TCP_DEFER_ACCEPT) || defined(SO_ACCEPTFILTER) { "defer-accept", bind_parse_defer_accept, 0 }, /* wait for some data for 1 second max before doing accept */ #endif diff --git a/src/listener.c b/src/listener.c index 089137b66..03c5ba52c 100644 --- a/src/listener.c +++ b/src/listener.c @@ -2091,6 +2091,7 @@ struct bind_conf *bind_conf_alloc(struct proxy *fe, const char *file, bind_conf->rhttp_srvname = NULL; bind_conf->tcp_md5sig = NULL; + bind_conf->cc_algo = NULL; return bind_conf; diff --git a/src/proto_tcp.c b/src/proto_tcp.c index ea9f4c9fe..b2b9b2121 100644 --- a/src/proto_tcp.c +++ b/src/proto_tcp.c @@ -732,6 +732,22 @@ int tcp_bind_listener(struct listener *listener, char *errmsg, int errlen) } } #endif + +#if defined(TCP_CONGESTION) + if (listener->bind_conf->cc_algo) { + /* Changing congestion control might fail due to loaded + * algorithms or permission. In this case the default algorithm + * remains active, but we can emit a warning about it to give a + * chance to the user to fix it. + */ + if (setsockopt(fd, IPPROTO_TCP, TCP_CONGESTION, listener->bind_conf->cc_algo, strlen(listener->bind_conf->cc_algo)) < 0) { + chunk_appendf(msg, "%scannot set TCP congestion control algorithm, (%s)", msg->data ? ", " : "", + strerror(errno)); + err |= ERR_WARN; + } + } +#endif + #if defined(__linux__) && defined(TCP_MD5SIG) if (listener->bind_conf->tcp_md5sig) { struct tcp_md5sig md5; diff --git a/src/proxy.c b/src/proxy.c index 527ae09c4..03894d24e 100644 --- a/src/proxy.c +++ b/src/proxy.c @@ -411,6 +411,7 @@ void deinit_proxy(struct proxy *p) free(bind_conf->guid_prefix); free(bind_conf->rhttp_srvname); free(bind_conf->tcp_md5sig); + free(bind_conf->cc_algo); #ifdef USE_QUIC free(bind_conf->quic_cc_algo); #endif