From 91ed52976c0507b2c6b174ff29b7b3885686a12c Mon Sep 17 00:00:00 2001 From: Willy Tarreau Date: Tue, 3 Oct 2023 15:33:46 +0200 Subject: [PATCH] MINOR: dgram: allow to set rcv/sndbuf for dgram sockets as well tune.rcvbuf.client and tune.rcvbuf.server are not suitable for shared dgram sockets because they're per connection so their units are not the same. However, QUIC's listener and log servers are not connected and take per-thread or per-process traffic where a socket log buffer might be too small, causing undesirable packet losses and retransmits in the case of QUIC. This essentially manifests in listener mode with new connections taking a lot of time to set up under heavy traffic due to the small queues causing delays. Let's add a few new settings allowing to set these shared socket sizes on the frontend and backend side (which reminds that these are per-front/back and not per client/server hence not per connection). --- doc/configuration.txt | 36 ++++++++++++++++++++++++++++ include/haproxy/global-t.h | 4 ++++ src/dgram.c | 49 ++++++++++++++++++++++++++++++++++++++ src/log.c | 3 +++ src/proto_quic.c | 6 +++++ src/proto_udp.c | 7 ++++++ 6 files changed, 105 insertions(+) diff --git a/doc/configuration.txt b/doc/configuration.txt index 54905263b..5f8d47323 100644 --- a/doc/configuration.txt +++ b/doc/configuration.txt @@ -1212,12 +1212,16 @@ The following keywords are supported in the "global" section : - tune.quic.max-frame-loss - tune.quic.retry-threshold - tune.quic.socket-owner + - tune.rcvbuf.backend - tune.rcvbuf.client + - tune.rcvbuf.frontend - tune.rcvbuf.server - tune.recv_enough - tune.runqueue-depth - tune.sched.low-latency + - tune.sndbuf.backend - tune.sndbuf.client + - tune.sndbuf.frontend - tune.sndbuf.server - tune.stick-counters - tune.ssl.cachesize @@ -3377,6 +3381,22 @@ tune.quic.socket-owner { connection | listener } is used globally, it will be forced on every listener instance, regardless of their individual configuration. +tune.rcvbuf.backend +tune.rcvbuf.frontend + For the kernel socket receive buffer size on non-connected sockets to this + size. This can be used QUIC in listener mode and log-forward on the frontend. + The default system buffers might sometimes be too small for sockets receiving + lots of aggregated traffic, causing some losses and possibly retransmits (in + case of QUIC), possibly slowing down connection establishment under heavy + traffic. The value is expressed in bytes, applied to each socket. In listener + mode, sockets are shared between all connections, and the total number of + sockets depends on the "shards" value of the "bind" line. There's no good + value, a good one corresponds to an expected size per connection multiplied + by the expected number of connections. The kernel may trim large values. See + also "tune.rcvbuf.client" and "tune.rcvbuf.server" for their connected socket + counter parts, as well as "tune.sndbuf.backend" and "tune.sndbuf.frontend" + for the send setting. + tune.rcvbuf.client tune.rcvbuf.server Forces the kernel socket receive buffer size on the client or the server side @@ -3415,6 +3435,22 @@ tune.sched.low-latency { on | off } massive traffic, at the expense of a higher impact on this large traffic. For regular usage it is better to leave this off. The default value is off. +tune.sndbuf.backend +tune.sndbuf.frontend + For the kernel socket send buffer size on non-connected sockets to this size. + This can be used for UNIX socket and UDP logging on the backend side, and for + QUIC in listener mode on the frontend. The default system buffers might + sometimes be too small for sockets shared between many connections (or log + senders), causing some losses and possibly retransmits, slowing down new + connection establishment under high traffic. The value is expressed in bytes, + applied to each socket. In listener mode, sockets are shared between all + connections, and the total number of sockets depends on the "shards" value of + the "bind" line. There's no good value, a good one corresponds to an expected + size per connection multiplied by the expected number of connections. The + kernel may trim large values. See also "tune.sndbuf.client" and + "tune.sndbuf.server" for their connected socket counter parts, as well as + "tune.rcvbuf.backend" and "tune.rcvbuf.frontend" for the receive setting. + tune.sndbuf.client tune.sndbuf.server Forces the kernel socket send buffer size on the client or the server side to diff --git a/include/haproxy/global-t.h b/include/haproxy/global-t.h index cf17c4481..e0f64d5cc 100644 --- a/include/haproxy/global-t.h +++ b/include/haproxy/global-t.h @@ -156,6 +156,10 @@ struct global { int client_rcvbuf; /* set client rcvbuf to this value if not null */ int server_sndbuf; /* set server sndbuf to this value if not null */ int server_rcvbuf; /* set server rcvbuf to this value if not null */ + int frontend_sndbuf; /* set frontend dgram sndbuf to this value if not null */ + int frontend_rcvbuf; /* set frontend dgram rcvbuf to this value if not null */ + int backend_sndbuf; /* set backend dgram sndbuf to this value if not null */ + int backend_rcvbuf; /* set backend dgram rcvbuf to this value if not null */ int pipesize; /* pipe size in bytes, system defaults if zero */ int max_http_hdr; /* max number of HTTP headers, use MAX_HTTP_HDR if zero */ int requri_len; /* max len of request URI, use REQURI_LEN if zero */ diff --git a/src/dgram.c b/src/dgram.c index 54823d157..c983c03b2 100644 --- a/src/dgram.c +++ b/src/dgram.c @@ -11,7 +11,10 @@ */ #include +#include #include +#include +#include /* datagram handler callback */ void dgram_fd_handler(int fd) @@ -28,3 +31,49 @@ void dgram_fd_handler(int fd) return; } + +/* config parser for global "tune.{rcv,snd}buf.{frontend,backend}" */ +static int dgram_parse_tune_bufs(char **args, int section_type, struct proxy *curpx, + const struct proxy *defpx, const char *file, int line, + char **err) +{ + int *valptr; + int val; + + if (too_many_args(1, args, err, NULL)) + return -1; + + /* "tune.rcvbuf.frontend", "tune.rcvbuf.backend", + * "tune.sndbuf.frontend", "tune.sndbuf.backend" + */ + valptr = (args[0][5] == 'r' && args[0][12] == 'f') ? &global.tune.frontend_rcvbuf : + (args[0][5] == 'r' && args[0][12] == 'b') ? &global.tune.backend_rcvbuf : + (args[0][5] == 's' && args[0][12] == 'f') ? &global.tune.frontend_sndbuf : + &global.tune.backend_sndbuf; + + if (*valptr != 0) { + memprintf(err, "parsing [%s:%d] : ignoring '%s' which was already specified.\n", file, line, args[0]); + return 1; + } + + val = atoi(args[1]); + + if (*(args[1]) == 0 || val <= 0) { + memprintf(err, "parsing [%s:%d] : '%s' expects a strictly positive integer argument.\n", file, line, args[0]); + return -1; + } + + *valptr = val; + return 0; +} + +/* register "global" section keywords */ +static struct cfg_kw_list dgram_cfg_kws = {ILH, { + { CFG_GLOBAL, "tune.rcvbuf.backend", dgram_parse_tune_bufs }, + { CFG_GLOBAL, "tune.rcvbuf.frontend", dgram_parse_tune_bufs }, + { CFG_GLOBAL, "tune.sndbuf.backend", dgram_parse_tune_bufs }, + { CFG_GLOBAL, "tune.sndbuf.frontend", dgram_parse_tune_bufs }, + { 0, NULL, NULL } +}}; + +INITCALL1(STG_REGISTER, cfg_register_keywords, &dgram_cfg_kws); diff --git a/src/log.c b/src/log.c index bcae3a697..8ada05089 100644 --- a/src/log.c +++ b/src/log.c @@ -2053,6 +2053,9 @@ static inline void __do_send_log(struct log_target *target, struct log_header hd } else { /* we don't want to receive anything on this socket */ setsockopt(*plogfd, SOL_SOCKET, SO_RCVBUF, &zero, sizeof(zero)); + /* we may want to adjust the output buffer (tune.sndbuf.backend) */ + if (global.tune.backend_sndbuf) + setsockopt(*plogfd, SOL_SOCKET, SO_SNDBUF, &global.tune.backend_sndbuf, sizeof(global.tune.backend_sndbuf)); /* does nothing under Linux, maybe needed for others */ shutdown(*plogfd, SHUT_RD); fd_set_cloexec(*plogfd); diff --git a/src/proto_quic.c b/src/proto_quic.c index 73a48e44b..701dcb295 100644 --- a/src/proto_quic.c +++ b/src/proto_quic.c @@ -680,6 +680,12 @@ static int quic_bind_listener(struct listener *listener, char *errmsg, int errle global.tune.options &= ~GTUNE_QUIC_SOCK_PER_CONN; } + if (global.tune.frontend_rcvbuf) + setsockopt(fd, SOL_SOCKET, SO_RCVBUF, &global.tune.frontend_rcvbuf, sizeof(global.tune.frontend_rcvbuf)); + + if (global.tune.frontend_sndbuf) + setsockopt(fd, SOL_SOCKET, SO_SNDBUF, &global.tune.frontend_sndbuf, sizeof(global.tune.frontend_sndbuf)); + listener_set_state(listener, LI_LISTEN); udp_return: diff --git a/src/proto_udp.c b/src/proto_udp.c index 54f87d0ca..98559745c 100644 --- a/src/proto_udp.c +++ b/src/proto_udp.c @@ -148,6 +148,13 @@ int udp_bind_listener(struct listener *listener, char *errmsg, int errlen) goto udp_return; } + /* we may want to adjust the output buffer (tune.sndbuf.backend) */ + if (global.tune.frontend_rcvbuf) + setsockopt(listener->rx.fd, SOL_SOCKET, SO_RCVBUF, &global.tune.frontend_rcvbuf, sizeof(global.tune.frontend_rcvbuf)); + + if (global.tune.frontend_sndbuf) + setsockopt(listener->rx.fd, SOL_SOCKET, SO_SNDBUF, &global.tune.frontend_sndbuf, sizeof(global.tune.frontend_sndbuf)); + listener_set_state(listener, LI_LISTEN); udp_return: