diff --git a/doc/configuration.txt b/doc/configuration.txt index 0b9776cfb..f8c29b241 100644 --- a/doc/configuration.txt +++ b/doc/configuration.txt @@ -675,6 +675,7 @@ The following keywords are supported in the "global" section : - tune.bufsize - tune.chksize - tune.comp.maxlevel + - tune.fd.edge-triggered - tune.h2.header-table-size - tune.h2.initial-window-size - tune.h2.max-concurrent-streams @@ -1874,6 +1875,13 @@ tune.fail-alloc success). This is useful to debug and make sure memory failures are handled gracefully. +tune.fd.edge-triggered { on | off } [ EXPERIMENTAL ] + Enables ('on') or disables ('off') the edge-triggered polling mode for FDs + that support it. This is currently only support with epoll. It may noticeably + reduce the number of epoll_ctl() calls and slightly improve performance in + certain scenarios. This is still experimental, it may result in frozen + connections if bugs are still present, and is disabled by default. + tune.h2.header-table-size Sets the HTTP/2 dynamic header table size. It defaults to 4096 bytes and cannot be larger than 65536 bytes. A larger value may help certain clients diff --git a/include/haproxy/fd-t.h b/include/haproxy/fd-t.h index 5e17b6fe0..97b383ceb 100644 --- a/include/haproxy/fd-t.h +++ b/include/haproxy/fd-t.h @@ -133,6 +133,7 @@ struct fdtab { unsigned char linger_risk:1; /* 1 if we must kill lingering before closing */ unsigned char cloned:1; /* 1 if a cloned socket, requires EPOLL_CTL_DEL on close */ unsigned char initialized:1; /* 1 if init phase was done on this fd (e.g. set non-blocking) */ + unsigned char et_possible:1; /* 1 if edge-triggered is possible on this FD */ } THREAD_ALIGNED(64); /* polled mask, one bit per thread and per direction for each FD */ diff --git a/include/haproxy/fd.h b/include/haproxy/fd.h index 0f1799d34..f7af4e162 100644 --- a/include/haproxy/fd.h +++ b/include/haproxy/fd.h @@ -30,6 +30,7 @@ #include #include #include +#include #include #include #include @@ -435,6 +436,7 @@ static inline void fd_update_events(int fd, unsigned char evts) static inline void fd_insert(int fd, void *owner, void (*iocb)(int fd), unsigned long thread_mask) { int locked = fdtab[fd].running_mask != tid_bit; + extern void conn_fd_handler(int); if (locked) fd_set_running_excl(fd); @@ -443,6 +445,12 @@ static inline void fd_insert(int fd, void *owner, void (*iocb)(int fd), unsigned fdtab[fd].ev = 0; fdtab[fd].linger_risk = 0; fdtab[fd].cloned = 0; + fdtab[fd].et_possible = 0; + + /* conn_fd_handler should support edge-triggered FDs */ + if ((global.tune.options & GTUNE_FD_ET) && fdtab[fd].iocb == conn_fd_handler) + fdtab[fd].et_possible = 1; + fdtab[fd].thread_mask = thread_mask; /* note: do not reset polled_mask here as it indicates which poller * still knows this FD from a possible previous round. diff --git a/include/haproxy/global-t.h b/include/haproxy/global-t.h index f5bf21629..c7591b467 100644 --- a/include/haproxy/global-t.h +++ b/include/haproxy/global-t.h @@ -66,6 +66,7 @@ #define GTUNE_STRICT_LIMITS (1<<15) #define GTUNE_INSECURE_FORK (1<<16) #define GTUNE_INSECURE_SETUID (1<<17) +#define GTUNE_FD_ET (1<<18) /* SSL server verify mode */ enum { diff --git a/src/ev_epoll.c b/src/ev_epoll.c index 5102b1076..92c000f85 100644 --- a/src/ev_epoll.c +++ b/src/ev_epoll.c @@ -59,6 +59,20 @@ static void _update_fd(int fd) en = fdtab[fd].state; + /* Try to force EPOLLET on FDs that support it */ + if (fdtab[fd].et_possible) { + /* already done ? */ + if (polled_mask[fd].poll_recv & polled_mask[fd].poll_send & tid_bit) + return; + + /* enable ET polling in both directions */ + _HA_ATOMIC_OR(&polled_mask[fd].poll_recv, tid_bit); + _HA_ATOMIC_OR(&polled_mask[fd].poll_send, tid_bit); + opcode = EPOLL_CTL_ADD; + ev.events = EPOLLIN | EPOLLRDHUP | EPOLLOUT | EPOLLET; + goto done; + } + /* if we're already polling or are going to poll for this FD and it's * neither active nor ready, force it to be active so that we don't * needlessly unsubscribe then re-subscribe it. @@ -120,6 +134,7 @@ static void _update_fd(int fd) if (en & FD_EV_ACTIVE_W) ev.events |= EPOLLOUT; + done: ev.data.fd = fd; epoll_ctl(epoll_fd[tid], opcode, fd, &ev); } diff --git a/src/fd.c b/src/fd.c index 1e1c0cbc5..60ad69901 100644 --- a/src/fd.c +++ b/src/fd.c @@ -88,9 +88,11 @@ #endif #include +#include #include #include #include +#include struct fdtab *fdtab = NULL; /* array of all the file descriptors */ @@ -807,6 +809,33 @@ int fork_poller() return 1; } +/* config parser for global "tune.fd.edge-triggered", accepts "on" or "off" */ +static int cfg_parse_tune_fd_edge_triggered(char **args, int section_type, struct proxy *curpx, + struct proxy *defpx, const char *file, int line, + char **err) +{ + if (too_many_args(1, args, err, NULL)) + return -1; + + if (strcmp(args[1], "on") == 0) + global.tune.options |= GTUNE_FD_ET; + else if (strcmp(args[1], "off") == 0) + global.tune.options &= ~GTUNE_FD_ET; + else { + memprintf(err, "'%s' expects either 'on' or 'off' but got '%s'.", args[0], args[1]); + return -1; + } + return 0; +} + +/* config keyword parsers */ +static struct cfg_kw_list cfg_kws = {ILH, { + { CFG_GLOBAL, "tune.fd.edge-triggered", cfg_parse_tune_fd_edge_triggered }, + { 0, NULL, NULL } +}}; + +INITCALL1(STG_REGISTER, cfg_register_keywords, &cfg_kws); + REGISTER_PER_THREAD_ALLOC(alloc_pollers_per_thread); REGISTER_PER_THREAD_INIT(init_pollers_per_thread); REGISTER_PER_THREAD_DEINIT(deinit_pollers_per_thread);