diff --git a/doc/configuration.txt b/doc/configuration.txt index ccaafbba9..946ce8ace 100644 --- a/doc/configuration.txt +++ b/doc/configuration.txt @@ -16396,7 +16396,7 @@ cc capability might be required (see "setcap" in the global section). In case of failure to configure a specific congestion control algorithm, the default one will remain unchanged and a warning will be emitted to report the problem. - Example: + See also: the "cc" server keyword (section 5.2). Example: frontend public bind :443 cc bbr # use the BBR algorithm for high bandwidths @@ -17535,6 +17535,22 @@ ca-file could be used in place of the cafile. The location of this directory could be overwritten by setting the SSL_CERT_DIR environment variable. +cc + May be used in the following contexts: tcp, http, log, peers, ring + + This setting is only available on systems which define TCP_CONGESTION, and + was validated on Linux and FreeBSD. It takes the name of a TCP congestion + control algorithm and configures outgoing connections to use this algorithm. + Typical names include "reno" or "cubic" and will depend on the operating + system. On some systems, special permissions may be required to configure + certain algorithms. On Linux, available algorithms are listed in sysctl + "net.ipv4.tcp_available_congestion_control", and those permitted without + privileges are in "net.ipv4.tcp_allowed_congestion_control". In order to + access algorithms requiring extra permissions, the "cap_net_admin" capability + might be required (see "setcap" in the global section). In case of failure to + configure a specific congestion control algorithm, the default one remains + unchanged. See also: the "cc" bind keyword (section 5.1). + check May be used in the following contexts: tcp, http, log diff --git a/include/haproxy/server-t.h b/include/haproxy/server-t.h index 3609d10aa..e2036c9c7 100644 --- a/include/haproxy/server-t.h +++ b/include/haproxy/server-t.h @@ -447,6 +447,7 @@ struct server { int tcp_ut; /* for TCP, user timeout */ char *tcp_md5sig; /* TCP MD5 signature password (RFC2385) */ + char *cc_algo; /* TCP congestion control algorithm ("cc" argument) */ int do_check; /* temporary variable used during parsing to denote if health checks must be enabled */ int do_agent; /* temporary variable used during parsing to denote if an auxiliary agent check must be enabled */ diff --git a/src/cfgparse-tcp.c b/src/cfgparse-tcp.c index 15b8818f5..cf6b50c02 100644 --- a/src/cfgparse-tcp.c +++ b/src/cfgparse-tcp.c @@ -224,6 +224,25 @@ static int bind_parse_namespace(char **args, int cur_arg, struct proxy *px, stru } #endif +#if defined(TCP_CONGESTION) +/* parse the "cc" server keyword */ +static int srv_parse_cc(char **args, int *cur_arg, struct proxy *px, struct server *newsrv, char **err) +{ + if (!*args[*cur_arg + 1]) { + memprintf(err, "'%s' : missing TCP congestion control algorithm", args[*cur_arg]); + return ERR_ALERT | ERR_FATAL; + } + + ha_free(&newsrv->cc_algo); + newsrv->cc_algo = strdup(args[*cur_arg + 1]); + if (!newsrv->cc_algo) { + memprintf(err, "'%s %s' : out of memory", args[*cur_arg], args[*cur_arg + 1]); + return ERR_ALERT | ERR_FATAL; + } + return 0; +} +#endif + #if defined(__linux__) && defined(TCP_MD5SIG) /* parse the "tcp-md5sig" server keyword */ static int srv_parse_tcp_md5sig(char **args, int *cur_arg, struct proxy *px, struct server *newsrv, char **err) @@ -342,6 +361,9 @@ static struct bind_kw_list bind_kws = { "TCP", { }, { INITCALL1(STG_REGISTER, bind_register_keywords, &bind_kws); static struct srv_kw_list srv_kws = { "TCP", { }, { +#if defined(TCP_CONGESTION) + { "cc", srv_parse_cc, 1, 1, 0 }, /* set TCP congestion control algorithm */ +#endif #if defined(__linux__) && defined(TCP_MD5SIG) { "tcp-md5sig", srv_parse_tcp_md5sig, 1, 1, 0 }, /* set TCP MD5 signature password on server */ #endif diff --git a/src/proto_tcp.c b/src/proto_tcp.c index b2b9b2121..93115f42f 100644 --- a/src/proto_tcp.c +++ b/src/proto_tcp.c @@ -531,6 +531,17 @@ int tcp_connect_server(struct connection *conn, int flags) setsockopt(fd, IPPROTO_TCP, TCP_QUICKACK, &zero, sizeof(zero)); #endif +#if defined(TCP_CONGESTION) + if (srv && srv->cc_algo) { + /* Changing congestion control might fail due to loaded + * algorithms or permission. In this case the default algorithm + * remains active so we silently ignore it. Note: it would be + * nice to have warning counters on servers. + */ + setsockopt(fd, IPPROTO_TCP, TCP_CONGESTION, srv->cc_algo, strlen(srv->cc_algo)); + } +#endif + #if defined(__linux__) && defined(TCP_MD5SIG) /* if it fails, the connection will fail, so reported an error */ if (srv && srv->tcp_md5sig) { diff --git a/src/server.c b/src/server.c index 6db223e9e..471a620b3 100644 --- a/src/server.c +++ b/src/server.c @@ -3028,6 +3028,8 @@ void srv_settings_cpy(struct server *srv, const struct server *src, int srv_tmpl if (src->tcp_md5sig != NULL) srv->tcp_md5sig = strdup(src->tcp_md5sig); #endif + if (src->cc_algo != NULL) + srv->cc_algo = strdup(src->cc_algo); #ifdef TCP_USER_TIMEOUT srv->tcp_ut = src->tcp_ut; #endif @@ -3168,6 +3170,7 @@ void srv_free_params(struct server *srv) free(srv->pool_conn_name); release_sample_expr(srv->pool_conn_name_expr); free(srv->resolvers_id); + free(srv->cc_algo); free(srv->tcp_md5sig); free(srv->addr_key); free(srv->lb_nodes);