diff --git a/doc/configuration.txt b/doc/configuration.txt index 2db2f36d6..da9d8b540 100644 --- a/doc/configuration.txt +++ b/doc/configuration.txt @@ -1639,6 +1639,7 @@ The following keywords are supported in the "global" section : - tune.comp.maxlevel - tune.disable-fast-forward - tune.disable-zero-copy-forwarding + - tune.epoll.mask-events - tune.events.max-events-at-once - tune.fail-alloc - tune.fd.edge-triggered @@ -3549,6 +3550,31 @@ tune.disable-zero-copy-forwarding tune.h1.zero-copy-fwd-recv, tune.h1.zero-copy-fwd-send, tune.h2.zero-copy-fwd-send, tune.quic.zero-copy-fwd-send +tune.epoll.mask-events + Along HAProxy's history, a few complex issues were met that were caused by + bugs in the epoll mechanism in the Linux kernel. These ones usually are very + rare and unreproducible outside the reporter's environment, and may only be + worked around by disabling epoll and switching to poll instead, which is not + very satisfying for high performance environments. Each time, issues affect + only very specific (and rare) event types, and offering the ability to mask + them can constitute a more acceptable work-around. This options offers this + possibility by permitting to silently ignore events a few uncommon events + and replace them with an input (which reports an unspecified incoming event). + The effect is to avoid the fast error processing paths in certain places and + only use the common paths. This should never be used unless being invited to + do so by an expert in order to diagnose or work around a kernel bug. + + The option takes a single argument which is a comma-delimited list of words + each designating an event to be masked. The currently supported list of + events is: + - "err": mask the EPOLLERR event + - "hup": mask the EPOLLHUP events + - "rdhup": mask the EPOLLRDHUP events + + Example: + # mask all non-traffic epoll events: + tune.epoll.mask-events err,hup,rdhup + tune.events.max-events-at-once Sets the number of events that may be processed at once by an asynchronous task handler (from event_hdl API). should be included between 1 diff --git a/src/ev_epoll.c b/src/ev_epoll.c index 9e7050c73..11e99310a 100644 --- a/src/ev_epoll.c +++ b/src/ev_epoll.c @@ -16,6 +16,7 @@ #include #include +#include #include #include #include @@ -28,6 +29,7 @@ /* private data */ static THREAD_LOCAL struct epoll_event *epoll_events = NULL; static int epoll_fd[MAX_THREADS] __read_mostly; // per-thread epoll_fd +static uint epoll_mask = 0; // events to be masked and turned to EPOLLIN #ifndef EPOLLRDHUP /* EPOLLRDHUP was defined late in libc, and it appeared in kernel 2.6.17 */ @@ -150,6 +152,7 @@ static void _update_fd(int fd) ev.events |= EPOLLOUT; done: + ev.events &= ~epoll_mask; ev.data.fd = fd; epoll_ctl(epoll_fd[tid], opcode, fd, &ev); } @@ -259,6 +262,11 @@ static void _do_poll(struct poller *p, int exp, int wake) #ifdef DEBUG_FD _HA_ATOMIC_INC(&fdtab[fd].event_count); #endif + if (e & epoll_mask) { + e |= EPOLLIN; + e &= ~epoll_mask; + } + n = ((e & EPOLLIN) ? FD_EV_READY_R : 0) | ((e & EPOLLOUT) ? FD_EV_READY_W : 0) | ((e & EPOLLRDHUP) ? FD_EV_SHUT_R : 0) | @@ -404,6 +412,43 @@ static void _do_register(void) p->fork = _do_fork; } +/* config parser for global "tune.epoll.mask-events", accepts "err", "hup", "rdhup" */ +static int cfg_parse_tune_epoll_mask_events(char **args, int section_type, struct proxy *curpx, + const struct proxy *defpx, const char *file, int line, + char **err) +{ + char *comma, *kw; + + if (too_many_args(1, args, err, NULL)) + return -1; + + epoll_mask = 0; + for (kw = args[1]; kw && *kw; kw = comma) { + comma = strchr(kw, ','); + if (comma) + *(comma++) = 0; + + if (strcmp(kw, "err") == 0) + epoll_mask |= EPOLLERR; + else if (strcmp(kw, "hup") == 0) + epoll_mask |= EPOLLHUP; + else if (strcmp(kw, "rdhup") == 0) + epoll_mask |= EPOLLRDHUP; + else { + memprintf(err, "'%s' expects a comma-delimited list of 'err', 'hup' and 'rdhup' but got '%s'.", args[0], kw); + return -1; + } + } + return 0; +} + +/* config keyword parsers */ +static struct cfg_kw_list cfg_kws = {ILH, { + { CFG_GLOBAL, "tune.epoll.mask-events", cfg_parse_tune_epoll_mask_events }, + { 0, NULL, NULL } +}}; + +INITCALL1(STG_REGISTER, cfg_register_keywords, &cfg_kws); INITCALL0(STG_REGISTER, _do_register);