MINOR: epoll: permit to mask certain specific events

A few times in the past we've seen cases where epoll was caught reporting
a wrong event that caused trouble (e.g. spuriously reporting HUP or RDHUP
after a successful connect()). The new tune.epoll.mask-events directive
permits to mask events such as ERR, HUP and RDHUP and convert them to IN
events that are processed by the regular receive path. This should help
better diagnose and troubleshoot issues such as this one, as well as rule
out such a cause when similar issues are reported:

   https://github.com/haproxy/haproxy/issues/2368
   https://www.spinics.net/lists/netdev/msg876470.html

It should be harmless to backport this if necessary.
This commit is contained in:
Willy Tarreau 2025-01-27 15:41:26 +01:00
parent e768a531b7
commit 7fa70da06d
2 changed files with 71 additions and 0 deletions

View File

@ -1639,6 +1639,7 @@ The following keywords are supported in the "global" section :
- tune.comp.maxlevel
- tune.disable-fast-forward
- tune.disable-zero-copy-forwarding
- tune.epoll.mask-events
- tune.events.max-events-at-once
- tune.fail-alloc
- tune.fd.edge-triggered
@ -3549,6 +3550,31 @@ tune.disable-zero-copy-forwarding
tune.h1.zero-copy-fwd-recv, tune.h1.zero-copy-fwd-send,
tune.h2.zero-copy-fwd-send, tune.quic.zero-copy-fwd-send
tune.epoll.mask-events <event[,...]>
Along HAProxy's history, a few complex issues were met that were caused by
bugs in the epoll mechanism in the Linux kernel. These ones usually are very
rare and unreproducible outside the reporter's environment, and may only be
worked around by disabling epoll and switching to poll instead, which is not
very satisfying for high performance environments. Each time, issues affect
only very specific (and rare) event types, and offering the ability to mask
them can constitute a more acceptable work-around. This options offers this
possibility by permitting to silently ignore events a few uncommon events
and replace them with an input (which reports an unspecified incoming event).
The effect is to avoid the fast error processing paths in certain places and
only use the common paths. This should never be used unless being invited to
do so by an expert in order to diagnose or work around a kernel bug.
The option takes a single argument which is a comma-delimited list of words
each designating an event to be masked. The currently supported list of
events is:
- "err": mask the EPOLLERR event
- "hup": mask the EPOLLHUP events
- "rdhup": mask the EPOLLRDHUP events
Example:
# mask all non-traffic epoll events:
tune.epoll.mask-events err,hup,rdhup
tune.events.max-events-at-once <number>
Sets the number of events that may be processed at once by an asynchronous
task handler (from event_hdl API). <number> should be included between 1

View File

@ -16,6 +16,7 @@
#include <haproxy/activity.h>
#include <haproxy/api.h>
#include <haproxy/cfgparse.h>
#include <haproxy/clock.h>
#include <haproxy/fd.h>
#include <haproxy/global.h>
@ -28,6 +29,7 @@
/* private data */
static THREAD_LOCAL struct epoll_event *epoll_events = NULL;
static int epoll_fd[MAX_THREADS] __read_mostly; // per-thread epoll_fd
static uint epoll_mask = 0; // events to be masked and turned to EPOLLIN
#ifndef EPOLLRDHUP
/* EPOLLRDHUP was defined late in libc, and it appeared in kernel 2.6.17 */
@ -150,6 +152,7 @@ static void _update_fd(int fd)
ev.events |= EPOLLOUT;
done:
ev.events &= ~epoll_mask;
ev.data.fd = fd;
epoll_ctl(epoll_fd[tid], opcode, fd, &ev);
}
@ -259,6 +262,11 @@ static void _do_poll(struct poller *p, int exp, int wake)
#ifdef DEBUG_FD
_HA_ATOMIC_INC(&fdtab[fd].event_count);
#endif
if (e & epoll_mask) {
e |= EPOLLIN;
e &= ~epoll_mask;
}
n = ((e & EPOLLIN) ? FD_EV_READY_R : 0) |
((e & EPOLLOUT) ? FD_EV_READY_W : 0) |
((e & EPOLLRDHUP) ? FD_EV_SHUT_R : 0) |
@ -404,6 +412,43 @@ static void _do_register(void)
p->fork = _do_fork;
}
/* config parser for global "tune.epoll.mask-events", accepts "err", "hup", "rdhup" */
static int cfg_parse_tune_epoll_mask_events(char **args, int section_type, struct proxy *curpx,
const struct proxy *defpx, const char *file, int line,
char **err)
{
char *comma, *kw;
if (too_many_args(1, args, err, NULL))
return -1;
epoll_mask = 0;
for (kw = args[1]; kw && *kw; kw = comma) {
comma = strchr(kw, ',');
if (comma)
*(comma++) = 0;
if (strcmp(kw, "err") == 0)
epoll_mask |= EPOLLERR;
else if (strcmp(kw, "hup") == 0)
epoll_mask |= EPOLLHUP;
else if (strcmp(kw, "rdhup") == 0)
epoll_mask |= EPOLLRDHUP;
else {
memprintf(err, "'%s' expects a comma-delimited list of 'err', 'hup' and 'rdhup' but got '%s'.", args[0], kw);
return -1;
}
}
return 0;
}
/* config keyword parsers */
static struct cfg_kw_list cfg_kws = {ILH, {
{ CFG_GLOBAL, "tune.epoll.mask-events", cfg_parse_tune_epoll_mask_events },
{ 0, NULL, NULL }
}};
INITCALL1(STG_REGISTER, cfg_register_keywords, &cfg_kws);
INITCALL0(STG_REGISTER, _do_register);