diff --git a/doc/internals/polling-states.fig b/doc/internals/polling-states.fig index a2230161d..4df93868d 100644 --- a/doc/internals/polling-states.fig +++ b/doc/internals/polling-states.fig @@ -1,12 +1,23 @@ -#FIG 3.2 +#FIG 3.2 Produced by xfig version 3.2.7a Portrait Center Metric -A4 +A4 100.00 Single -2 1200 2 +6 2520 990 4725 3645 +4 0 0 50 -1 16 10 0.0000 4 150 855 2520 1125 R=ready flag\001 +4 0 0 50 -1 16 10 0.0000 4 150 885 2520 1290 A=active flag\001 +4 0 0 50 -1 16 10 0.0000 4 150 1365 2520 2475 fd_want sets A flag\001 +4 0 0 50 -1 16 10 0.0000 4 150 1440 2520 2640 fd_stop clears A flag\001 +4 0 0 50 -1 16 10 0.0000 4 150 1995 2520 3465 fd_done does what's best to\001 +4 0 0 50 -1 16 10 0.0000 4 120 2025 2700 3630 minimize the amount of work.\001 +4 0 0 50 -1 16 10 0.0000 4 150 1905 2520 3300 update() updates the poller.\001 +4 0 0 50 -1 16 10 0.0000 4 150 2190 2520 2970 fd_cant clears R flag (EAGAIN)\001 +4 0 0 50 -1 16 10 0.0000 4 150 2115 2520 3135 fd_rdy sets R flag (poll return)\001 +-6 2 1 0 1 0 7 50 -1 -1 0.000 1 0 -1 1 0 2 1 1 1.00 90.00 180.00 1125 1350 1125 1800 @@ -16,99 +27,38 @@ Single 2 1 0 1 0 7 50 -1 -1 0.000 1 0 -1 1 0 2 1 1 1.00 90.00 180.00 1125 3150 1125 3600 -2 1 0 1 0 7 50 -1 -1 0.000 1 0 -1 1 0 2 - 1 1 1.00 90.00 180.00 - 2925 3150 2925 3600 -2 1 0 1 0 7 50 -1 -1 0.000 1 0 -1 1 0 2 - 1 1 1.00 90.00 180.00 - 2925 2250 2925 2700 -2 1 0 1 0 7 50 -1 -1 0.000 1 0 -1 1 0 2 - 1 1 1.00 90.00 180.00 - 2925 1350 2925 1800 2 1 0 1 0 7 50 -1 -1 0.000 1 0 -1 1 0 2 1 1 1.00 90.00 180.00 1575 1800 1575 1350 2 1 0 1 0 7 50 -1 -1 0.000 1 0 -1 1 0 2 1 1 1.00 90.00 180.00 1575 3600 1575 3150 -2 1 0 1 0 7 50 -1 -1 0.000 1 0 -1 1 0 2 - 1 1 1.00 90.00 180.00 - 3375 2700 3375 2250 -2 1 0 1 0 7 50 -1 -1 0.000 1 0 -1 1 0 2 - 1 1 1.00 90.00 180.00 - 3375 1800 3375 1350 -2 1 0 1 0 7 50 -1 -1 0.000 1 0 -1 1 0 2 - 1 1 1.00 90.00 180.00 - 2700 1125 1800 1125 -2 1 0 1 0 7 50 -1 -1 0.000 1 0 -1 1 0 2 - 1 1 1.00 90.00 180.00 - 2700 3825 1800 3825 -2 4 0 1 0 7 51 -1 20 0.000 0 0 7 0 0 5 - 3600 1350 2700 1350 2700 900 3600 900 3600 1350 2 4 0 1 0 7 51 -1 20 0.000 0 0 7 0 0 5 1800 1350 900 1350 900 900 1800 900 1800 1350 2 4 0 1 0 7 51 -1 20 0.000 0 0 7 0 0 5 1800 2250 900 2250 900 1800 1800 1800 1800 2250 -2 4 0 1 0 7 51 -1 20 0.000 0 0 7 0 0 5 - 3600 2250 2700 2250 2700 1800 3600 1800 3600 2250 -2 4 0 1 0 7 51 -1 20 0.000 0 0 7 0 0 5 - 3600 3150 2700 3150 2700 2700 3600 2700 3600 3150 -2 4 0 1 0 7 51 -1 20 0.000 0 0 7 0 0 5 - 3600 4050 2700 4050 2700 3600 3600 3600 3600 4050 2 4 0 1 0 7 51 -1 20 0.000 0 0 7 0 0 5 1800 4050 900 4050 900 3600 1800 3600 1800 4050 2 4 0 1 0 7 51 -1 20 0.000 0 0 7 0 0 5 1800 3150 900 3150 900 2700 1800 2700 1800 3150 2 1 0 1 0 7 50 -1 -1 0.000 1 0 -1 1 0 2 1 1 1.00 90.00 180.00 - 1800 2925 2700 2925 + 1350 450 1350 900 2 1 0 1 0 7 50 -1 -1 0.000 1 0 -1 1 0 2 1 1 1.00 90.00 180.00 - 1350 450 1350 900 -4 1 0 50 -1 16 8 0.0000 4 120 330 2250 1080 update\001 -4 1 0 50 -1 16 8 0.0000 4 120 330 2250 3780 update\001 -4 2 0 50 -1 16 8 0.0000 4 75 240 2880 1485 want\001 -4 0 0 50 -1 16 8 0.0000 4 105 210 3420 1755 stop\001 -4 0 0 50 -1 16 8 0.0000 4 120 585 3420 2610 poll()=>rdy\001 -4 2 0 50 -1 16 8 0.0000 4 75 210 2835 2385 cant\001 -4 2 0 50 -1 16 8 0.0000 4 90 285 2835 2655 done*\001 -4 2 0 50 -1 16 8 0.0000 4 90 255 2835 2520 wait*\001 -4 2 0 50 -1 16 8 0.0000 4 75 240 1080 1485 want\001 -4 0 0 50 -1 16 8 0.0000 4 105 210 1665 1755 stop\001 -4 0 0 50 -1 16 8 0.0000 4 90 240 1665 1620 done\001 -4 2 0 50 -1 16 8 0.0000 4 75 210 1035 2385 cant\001 -4 2 0 50 -1 16 8 0.0000 4 90 255 1035 2520 wait*\001 -4 2 0 50 -1 16 8 0.0000 4 105 210 1035 3285 stop\001 -4 0 0 50 -1 16 8 0.0000 4 75 240 1665 3510 want\001 -4 2 0 50 -1 16 8 0.0000 4 105 210 2835 3285 stop\001 -4 1 0 50 -1 16 10 0.0000 4 105 735 1350 1080 STOPPED\001 -4 1 0 50 -1 16 10 0.0000 4 105 630 3150 1080 PAUSED\001 -4 1 0 50 -1 16 10 0.0000 4 105 555 1350 1980 ACTIVE\001 -4 1 0 50 -1 16 10 0.0000 4 105 525 3150 1980 READY\001 -4 1 0 50 -1 16 10 0.0000 4 105 825 1350 2880 MUSTPOLL\001 -4 1 0 50 -1 16 10 0.0000 4 105 615 3150 2880 POLLED\001 -4 1 0 50 -1 16 10 0.0000 4 105 765 1350 3780 DISABLED\001 -4 1 0 50 -1 16 10 0.0000 4 105 525 3150 3780 ABORT\001 -4 1 0 50 -1 16 8 0.0000 4 105 360 1350 1260 R,!A,!P\001 -4 1 0 50 -1 16 8 0.0000 4 105 330 1350 2160 R,A,!P\001 -4 1 0 50 -1 16 8 0.0000 4 105 330 3150 1260 R,!A,P\001 -4 1 0 50 -1 16 8 0.0000 4 105 300 3150 2160 R,A,P\001 -4 1 0 50 -1 16 8 0.0000 4 105 330 3150 3060 !R,A,P\001 -4 1 0 50 -1 16 8 0.0000 4 105 360 1350 3060 !R,A,!P\001 -4 1 0 50 -1 16 8 0.0000 4 105 390 1350 3960 !R,!A,!P\001 -4 1 0 50 -1 16 8 0.0000 4 105 360 3150 3960 !R,!A,P\001 -4 1 0 50 -1 16 8 0.0000 4 120 330 2250 2880 update\001 -4 0 0 50 -1 16 10 0.0000 4 135 885 4275 1125 R=ready flag\001 -4 0 0 50 -1 16 10 0.0000 4 135 900 4275 1290 A=active flag\001 -4 0 0 50 -1 16 10 0.0000 4 135 915 4275 1455 P=polled flag\001 -4 0 0 50 -1 16 10 0.0000 4 135 2250 4275 1785 Transitions marked with a star (*)\001 -4 0 0 50 -1 16 10 0.0000 4 135 2505 4275 1950 are only possible with level-triggered\001 -4 0 0 50 -1 16 10 0.0000 4 135 495 4275 2115 pollers.\001 -4 0 0 50 -1 16 10 0.0000 4 135 1335 4275 2475 fd_want sets A flag\001 -4 0 0 50 -1 16 10 0.0000 4 135 1425 4275 2640 fd_stop clears A flag\001 -4 0 0 50 -1 16 10 0.0000 4 135 2340 4275 2805 fd_wait clears R flag on LT pollers\001 -4 0 0 50 -1 16 10 0.0000 4 135 1980 4275 3465 fd_done does what's best to\001 -4 0 0 50 -1 16 10 0.0000 4 105 2010 4455 3630 minimize the amount of work.\001 -4 0 0 50 -1 16 10 0.0000 4 135 1935 4275 3300 update() updates the poller.\001 -4 0 0 50 -1 16 10 0.0000 4 135 2145 4275 2970 fd_cant clears R flag (EAGAIN)\001 -4 0 0 50 -1 16 10 0.0000 4 135 2040 4275 3135 fd_rdy sets R flag (poll return)\001 + 1575 2700 1575 2250 +4 2 0 50 -1 16 8 0.0000 4 105 270 1080 1485 want\001 +4 2 0 50 -1 16 8 0.0000 4 120 255 1035 3285 stop\001 +4 0 0 50 -1 16 8 0.0000 4 105 270 1665 3510 want\001 +4 1 0 50 -1 16 10 0.0000 4 120 735 1350 1080 STOPPED\001 +4 1 0 50 -1 16 10 0.0000 4 120 795 1350 3780 DISABLED\001 +4 1 0 50 -1 16 10 0.0000 4 120 555 1350 2880 ACTIVE\001 +4 1 0 50 -1 16 10 0.0000 4 120 540 1350 1980 READY\001 +4 0 0 50 -1 16 8 0.0000 4 90 210 1665 2565 may\001 +4 2 0 50 -1 16 8 0.0000 4 105 240 1035 2430 cant\001 +4 1 0 50 -1 16 8 0.0000 4 120 240 1350 1260 R,!A\001 +4 1 0 50 -1 16 8 0.0000 4 120 210 1350 2160 R,A\001 +4 1 0 50 -1 16 8 0.0000 4 120 240 1350 3060 !R,A\001 +4 1 0 50 -1 16 8 0.0000 4 120 270 1350 3960 !R,!A\001 +4 0 0 50 -1 16 8 0.0000 4 120 255 1665 1710 stop\001 +4 2 0 50 -1 16 8 0.0000 4 105 285 1035 2610 done\001 diff --git a/include/proto/connection.h b/include/proto/connection.h index 059d5d85a..7e4cb03bf 100644 --- a/include/proto/connection.h +++ b/include/proto/connection.h @@ -268,20 +268,6 @@ static inline void __conn_xprt_stop_recv(struct connection *c) c->flags &= ~CO_FL_XPRT_RD_ENA; } -/* this one is used only to stop speculative recv(). It doesn't stop it if the - * fd is already polled in order to avoid expensive polling status changes. - * Since it might require the upper layer to re-enable reading, we'll return 1 - * if we've really stopped something otherwise zero. - */ -static inline int __conn_xprt_done_recv(struct connection *c) -{ - if (!conn_ctrl_ready(c) || !fd_recv_polled(c->handle.fd)) { - c->flags &= ~CO_FL_XPRT_RD_ENA; - return 1; - } - return 0; -} - static inline void __conn_xprt_want_send(struct connection *c) { c->flags |= CO_FL_XPRT_WR_ENA; diff --git a/include/proto/fd.h b/include/proto/fd.h index 9038be511..0b56bdccc 100644 --- a/include/proto/fd.h +++ b/include/proto/fd.h @@ -183,14 +183,6 @@ static inline int fd_recv_ready(const int fd) return (unsigned)fdtab[fd].state & FD_EV_READY_R; } -/* - * returns true if the FD is polled for recv - */ -static inline int fd_recv_polled(const int fd) -{ - return (unsigned)fdtab[fd].state & FD_EV_POLLED_R; -} - /* * returns the FD's send state (FD_EV_*) */ @@ -215,14 +207,6 @@ static inline int fd_send_ready(const int fd) return (unsigned)fdtab[fd].state & FD_EV_READY_W; } -/* - * returns true if the FD is polled for send - */ -static inline int fd_send_polled(const int fd) -{ - return (unsigned)fdtab[fd].state & FD_EV_POLLED_W; -} - /* * returns true if the FD is active for recv or send */ @@ -241,11 +225,8 @@ static inline void fd_stop_recv(int fd) if (!(old & FD_EV_ACTIVE_R)) return; new = old & ~FD_EV_ACTIVE_R; - new &= ~FD_EV_POLLED_R; } while (unlikely(!_HA_ATOMIC_CAS(&fdtab[fd].state, &old, new))); - - if ((old ^ new) & FD_EV_POLLED_R) - updt_fd_polling(fd); + updt_fd_polling(fd); } /* Disable processing send events on fd */ @@ -258,11 +239,8 @@ static inline void fd_stop_send(int fd) if (!(old & FD_EV_ACTIVE_W)) return; new = old & ~FD_EV_ACTIVE_W; - new &= ~FD_EV_POLLED_W; } while (unlikely(!_HA_ATOMIC_CAS(&fdtab[fd].state, &old, new))); - - if ((old ^ new) & FD_EV_POLLED_W) - updt_fd_polling(fd); + updt_fd_polling(fd); } /* Disable processing of events on fd for both directions. */ @@ -275,11 +253,8 @@ static inline void fd_stop_both(int fd) if (!(old & FD_EV_ACTIVE_RW)) return; new = old & ~FD_EV_ACTIVE_RW; - new &= ~FD_EV_POLLED_RW; } while (unlikely(!_HA_ATOMIC_CAS(&fdtab[fd].state, &old, new))); - - if ((old ^ new) & FD_EV_POLLED_RW) - updt_fd_polling(fd); + updt_fd_polling(fd); } /* Report that FD cannot receive anymore without polling (EAGAIN detected). */ @@ -292,12 +267,7 @@ static inline void fd_cant_recv(const int fd) if (!(old & FD_EV_READY_R)) return; new = old & ~FD_EV_READY_R; - if (new & FD_EV_ACTIVE_R) - new |= FD_EV_POLLED_R; } while (unlikely(!_HA_ATOMIC_CAS(&fdtab[fd].state, &old, new))); - - if ((old ^ new) & FD_EV_POLLED_R) - updt_fd_polling(fd); } /* Report that FD may receive again without polling. */ @@ -320,15 +290,11 @@ static inline void fd_done_recv(const int fd) old = fdtab[fd].state; do { - if ((old & (FD_EV_POLLED_R|FD_EV_READY_R)) != (FD_EV_POLLED_R|FD_EV_READY_R)) + if ((old & (FD_EV_ACTIVE_R|FD_EV_READY_R)) != (FD_EV_ACTIVE_R|FD_EV_READY_R)) return; new = old & ~FD_EV_READY_R; - if (new & FD_EV_ACTIVE_R) - new |= FD_EV_POLLED_R; } while (unlikely(!_HA_ATOMIC_CAS(&fdtab[fd].state, &old, new))); - - if ((old ^ new) & FD_EV_POLLED_R) - updt_fd_polling(fd); + updt_fd_polling(fd); } /* Report that FD cannot send anymore without polling (EAGAIN detected). */ @@ -341,12 +307,7 @@ static inline void fd_cant_send(const int fd) if (!(old & FD_EV_READY_W)) return; new = old & ~FD_EV_READY_W; - if (new & FD_EV_ACTIVE_W) - new |= FD_EV_POLLED_W; } while (unlikely(!_HA_ATOMIC_CAS(&fdtab[fd].state, &old, new))); - - if ((old ^ new) & FD_EV_POLLED_W) - updt_fd_polling(fd); } /* Report that FD may send again without polling (EAGAIN not detected). */ @@ -367,11 +328,9 @@ static inline void fd_want_recv(int fd) do { if (old & FD_EV_ACTIVE_R) return; - new = old | FD_EV_ACTIVE_R | FD_EV_POLLED_R; + new = old | FD_EV_ACTIVE_R; } while (unlikely(!_HA_ATOMIC_CAS(&fdtab[fd].state, &old, new))); - - if ((old ^ new) & FD_EV_POLLED_R) - updt_fd_polling(fd); + updt_fd_polling(fd); } /* Prepare FD to try to send */ @@ -383,11 +342,9 @@ static inline void fd_want_send(int fd) do { if (old & FD_EV_ACTIVE_W) return; - new = old | FD_EV_ACTIVE_W | FD_EV_POLLED_W; + new = old | FD_EV_ACTIVE_W; } while (unlikely(!_HA_ATOMIC_CAS(&fdtab[fd].state, &old, new))); - - if ((old ^ new) & FD_EV_POLLED_W) - updt_fd_polling(fd); + updt_fd_polling(fd); } /* Update events seen for FD and its state if needed. This should be called diff --git a/include/types/fd.h b/include/types/fd.h index b96abc09d..6794d742d 100644 --- a/include/types/fd.h +++ b/include/types/fd.h @@ -47,22 +47,18 @@ enum { #define FD_POLL_DATA (FD_POLL_IN | FD_POLL_OUT) #define FD_POLL_STICKY (FD_POLL_ERR | FD_POLL_HUP) +/* FD bits used for different polling states in each direction */ #define FD_EV_ACTIVE 1U #define FD_EV_READY 2U -#define FD_EV_POLLED 4U /* bits positions for a few flags */ #define FD_EV_READY_R_BIT 1 #define FD_EV_READY_W_BIT 5 -#define FD_EV_STATUS (FD_EV_ACTIVE | FD_EV_POLLED | FD_EV_READY) +#define FD_EV_STATUS (FD_EV_ACTIVE | FD_EV_READY) #define FD_EV_STATUS_R (FD_EV_STATUS) #define FD_EV_STATUS_W (FD_EV_STATUS << 4) -#define FD_EV_POLLED_R (FD_EV_POLLED) -#define FD_EV_POLLED_W (FD_EV_POLLED << 4) -#define FD_EV_POLLED_RW (FD_EV_POLLED_R | FD_EV_POLLED_W) - #define FD_EV_ACTIVE_R (FD_EV_ACTIVE) #define FD_EV_ACTIVE_W (FD_EV_ACTIVE << 4) #define FD_EV_ACTIVE_RW (FD_EV_ACTIVE_R | FD_EV_ACTIVE_W) @@ -71,17 +67,6 @@ enum { #define FD_EV_READY_W (FD_EV_READY << 4) #define FD_EV_READY_RW (FD_EV_READY_R | FD_EV_READY_W) -enum fd_states { - FD_ST_DISABLED = 0, - FD_ST_MUSTPOLL, - FD_ST_STOPPED, - FD_ST_ACTIVE, - FD_ST_ABORT, - FD_ST_POLLED, - FD_ST_PAUSED, - FD_ST_READY -}; - /* This is the value used to mark a file descriptor as dead. This value is * negative, this is important so that tests on fd < 0 properly match. It diff --git a/src/cli.c b/src/cli.c index ddc7f4c73..231b6123a 100644 --- a/src/cli.c +++ b/src/cli.c @@ -1000,13 +1000,11 @@ static int cli_io_handler_show_fd(struct appctx *appctx) li = fdt.owner; chunk_printf(&trash, - " %5d : st=0x%02x(R:%c%c%c W:%c%c%c) ev=0x%02x(%c%c%c%c%c) [%c%c] tmask=0x%lx umask=0x%lx owner=%p iocb=%p(%s)", + " %5d : st=0x%02x(R:%c%c W:%c%c) ev=0x%02x(%c%c%c%c%c) [%c%c] tmask=0x%lx umask=0x%lx owner=%p iocb=%p(%s)", fd, fdt.state, - (fdt.state & FD_EV_POLLED_R) ? 'P' : 'p', (fdt.state & FD_EV_READY_R) ? 'R' : 'r', (fdt.state & FD_EV_ACTIVE_R) ? 'A' : 'a', - (fdt.state & FD_EV_POLLED_W) ? 'P' : 'p', (fdt.state & FD_EV_READY_W) ? 'R' : 'r', (fdt.state & FD_EV_ACTIVE_W) ? 'A' : 'a', fdt.ev, diff --git a/src/ev_epoll.c b/src/ev_epoll.c index dd3a561b2..5172aecec 100644 --- a/src/ev_epoll.c +++ b/src/ev_epoll.c @@ -69,7 +69,7 @@ static void _update_fd(int fd) en = fdtab[fd].state; if ((polled_mask[fd].poll_send | polled_mask[fd].poll_recv) & tid_bit) { - if (!(fdtab[fd].thread_mask & tid_bit) || !(en & FD_EV_POLLED_RW)) { + if (!(fdtab[fd].thread_mask & tid_bit) || !(en & FD_EV_ACTIVE_RW)) { /* fd removed from poll list */ opcode = EPOLL_CTL_DEL; if (polled_mask[fd].poll_recv & tid_bit) @@ -78,19 +78,19 @@ static void _update_fd(int fd) _HA_ATOMIC_AND(&polled_mask[fd].poll_send, ~tid_bit); } else { - if (((en & FD_EV_POLLED_R) != 0) == + if (((en & FD_EV_ACTIVE_R) != 0) == ((polled_mask[fd].poll_recv & tid_bit) != 0) && - ((en & FD_EV_POLLED_W) != 0) == + ((en & FD_EV_ACTIVE_W) != 0) == ((polled_mask[fd].poll_send & tid_bit) != 0)) return; - if (en & FD_EV_POLLED_R) { + if (en & FD_EV_ACTIVE_R) { if (!(polled_mask[fd].poll_recv & tid_bit)) _HA_ATOMIC_OR(&polled_mask[fd].poll_recv, tid_bit); } else { if (polled_mask[fd].poll_recv & tid_bit) _HA_ATOMIC_AND(&polled_mask[fd].poll_recv, ~tid_bit); } - if (en & FD_EV_POLLED_W) { + if (en & FD_EV_ACTIVE_W) { if (!(polled_mask[fd].poll_send & tid_bit)) _HA_ATOMIC_OR(&polled_mask[fd].poll_send, tid_bit); } else { @@ -101,12 +101,12 @@ static void _update_fd(int fd) opcode = EPOLL_CTL_MOD; } } - else if ((fdtab[fd].thread_mask & tid_bit) && (en & FD_EV_POLLED_RW)) { + else if ((fdtab[fd].thread_mask & tid_bit) && (en & FD_EV_ACTIVE_RW)) { /* new fd in the poll list */ opcode = EPOLL_CTL_ADD; - if (en & FD_EV_POLLED_R) + if (en & FD_EV_ACTIVE_R) _HA_ATOMIC_OR(&polled_mask[fd].poll_recv, tid_bit); - if (en & FD_EV_POLLED_W) + if (en & FD_EV_ACTIVE_W) _HA_ATOMIC_OR(&polled_mask[fd].poll_send, tid_bit); } else { @@ -115,10 +115,10 @@ static void _update_fd(int fd) /* construct the epoll events based on new state */ ev.events = 0; - if (en & FD_EV_POLLED_R) + if (en & FD_EV_ACTIVE_R) ev.events |= EPOLLIN | EPOLLRDHUP; - if (en & FD_EV_POLLED_W) + if (en & FD_EV_ACTIVE_W) ev.events |= EPOLLOUT; ev.data.fd = fd; diff --git a/src/ev_evports.c b/src/ev_evports.c index d9d1637d4..46edb6962 100644 --- a/src/ev_evports.c +++ b/src/ev_evports.c @@ -46,9 +46,9 @@ static inline int evports_state_to_events(int state) { int events = 0; - if (state & FD_EV_POLLED_W) + if (state & FD_EV_ACTIVE_W) events |= POLLOUT; - if (state & FD_EV_POLLED_R) + if (state & FD_EV_ACTIVE_R) events |= POLLIN; return (events); @@ -73,7 +73,7 @@ static void _update_fd(int fd) en = fdtab[fd].state; - if (!(fdtab[fd].thread_mask & tid_bit) || !(en & FD_EV_POLLED_RW)) { + if (!(fdtab[fd].thread_mask & tid_bit) || !(en & FD_EV_ACTIVE_RW)) { if (!(polled_mask[fd].poll_recv & tid_bit) && !(polled_mask[fd].poll_send & tid_bit)) { /* fd was not watched, it's still not */ @@ -89,14 +89,14 @@ static void _update_fd(int fd) else { /* OK fd has to be monitored, it was either added or changed */ events = evports_state_to_events(en); - if (en & FD_EV_POLLED_R) { + if (en & FD_EV_ACTIVE_R) { if (!(polled_mask[fd].poll_recv & tid_bit)) _HA_ATOMIC_OR(&polled_mask[fd].poll_recv, tid_bit); } else { if (polled_mask[fd].poll_recv & tid_bit) _HA_ATOMIC_AND(&polled_mask[fd].poll_recv, ~tid_bit); } - if (en & FD_EV_POLLED_W) { + if (en & FD_EV_ACTIVE_W) { if (!(polled_mask[fd].poll_send & tid_bit)) _HA_ATOMIC_OR(&polled_mask[fd].poll_send, tid_bit); } else { diff --git a/src/ev_kqueue.c b/src/ev_kqueue.c index d634b6728..191ddce56 100644 --- a/src/ev_kqueue.c +++ b/src/ev_kqueue.c @@ -43,7 +43,7 @@ static int _update_fd(int fd, int start) en = fdtab[fd].state; - if (!(fdtab[fd].thread_mask & tid_bit) || !(en & FD_EV_POLLED_RW)) { + if (!(fdtab[fd].thread_mask & tid_bit) || !(en & FD_EV_ACTIVE_RW)) { if (!(polled_mask[fd].poll_recv & tid_bit) && !(polled_mask[fd].poll_send & tid_bit)) { /* fd was not watched, it's still not */ @@ -60,7 +60,7 @@ static int _update_fd(int fd, int start) else { /* OK fd has to be monitored, it was either added or changed */ - if (en & FD_EV_POLLED_R) { + if (en & FD_EV_ACTIVE_R) { if (!(polled_mask[fd].poll_recv & tid_bit)) { EV_SET(&kev[changes++], fd, EVFILT_READ, EV_ADD, 0, 0, NULL); _HA_ATOMIC_OR(&polled_mask[fd].poll_recv, tid_bit); @@ -71,7 +71,7 @@ static int _update_fd(int fd, int start) HA_ATOMIC_AND(&polled_mask[fd].poll_recv, ~tid_bit); } - if (en & FD_EV_POLLED_W) { + if (en & FD_EV_ACTIVE_W) { if (!(polled_mask[fd].poll_send & tid_bit)) { EV_SET(&kev[changes++], fd, EVFILT_WRITE, EV_ADD, 0, 0, NULL); _HA_ATOMIC_OR(&polled_mask[fd].poll_send, tid_bit); diff --git a/src/ev_poll.c b/src/ev_poll.c index d4a1351a2..6c5de9bea 100644 --- a/src/ev_poll.c +++ b/src/ev_poll.c @@ -57,7 +57,7 @@ static void _update_fd(int fd, int *max_add_fd) * don't check the tid_bit. First thread to see the update * takes it for every other one. */ - if (!(en & FD_EV_POLLED_RW)) { + if (!(en & FD_EV_ACTIVE_RW)) { if (!(polled_mask[fd].poll_recv | polled_mask[fd].poll_send)) { /* fd was not watched, it's still not */ return; @@ -70,7 +70,7 @@ static void _update_fd(int fd, int *max_add_fd) } else { /* OK fd has to be monitored, it was either added or changed */ - if (!(en & FD_EV_POLLED_R)) { + if (!(en & FD_EV_ACTIVE_R)) { hap_fd_clr(fd, fd_evts[DIR_RD]); if (polled_mask[fd].poll_recv & tid_bit) _HA_ATOMIC_AND(&polled_mask[fd].poll_recv, ~tid_bit); @@ -80,7 +80,7 @@ static void _update_fd(int fd, int *max_add_fd) _HA_ATOMIC_OR(&polled_mask[fd].poll_recv, tid_bit); } - if (!(en & FD_EV_POLLED_W)) { + if (!(en & FD_EV_ACTIVE_W)) { hap_fd_clr(fd, fd_evts[DIR_WR]); if (polled_mask[fd].poll_send & tid_bit) _HA_ATOMIC_AND(&polled_mask[fd].poll_send, ~tid_bit); diff --git a/src/ev_select.c b/src/ev_select.c index 89a661c54..7ee915ea5 100644 --- a/src/ev_select.c +++ b/src/ev_select.c @@ -48,7 +48,7 @@ static void _update_fd(int fd, int *max_add_fd) * don't check the tid_bit. First thread to see the update * takes it for every other one. */ - if (!(en & FD_EV_POLLED_RW)) { + if (!(en & FD_EV_ACTIVE_RW)) { if (!(polled_mask[fd].poll_recv | polled_mask[fd].poll_send)) { /* fd was not watched, it's still not */ return; @@ -61,7 +61,7 @@ static void _update_fd(int fd, int *max_add_fd) } else { /* OK fd has to be monitored, it was either added or changed */ - if (!(en & FD_EV_POLLED_R)) { + if (!(en & FD_EV_ACTIVE_R)) { hap_fd_clr(fd, fd_evts[DIR_RD]); if (polled_mask[fd].poll_recv & tid_bit) _HA_ATOMIC_AND(&polled_mask[fd].poll_recv, ~tid_bit); @@ -71,7 +71,7 @@ static void _update_fd(int fd, int *max_add_fd) _HA_ATOMIC_OR(&polled_mask[fd].poll_recv, tid_bit); } - if (!(en & FD_EV_POLLED_W)) { + if (!(en & FD_EV_ACTIVE_W)) { hap_fd_clr(fd, fd_evts[DIR_WR]); if (polled_mask[fd].poll_send & tid_bit) _HA_ATOMIC_AND(&polled_mask[fd].poll_send, ~tid_bit); diff --git a/src/fd.c b/src/fd.c index 58d73d4ab..183c57375 100644 --- a/src/fd.c +++ b/src/fd.c @@ -14,90 +14,64 @@ * be considered even if its changes are reverted in the middle or if the fd is * replaced. * - * It is important to understand that as long as all expected events are - * processed, they might starve the polled events, especially because polled - * I/O starvation quickly induces more cached I/O. One solution to this - * consists in only processing a part of the events at once, but one drawback - * is that unhandled events will still wake the poller up. Using an edge- - * triggered poller such as EPOLL_ET will solve this issue though. - * * The event state for an FD, as found in fdtab[].state, is maintained for each * direction. The state field is built this way, with R bits in the low nibble * and W bits in the high nibble for ease of access and debugging : * * 7 6 5 4 3 2 1 0 - * [ 0 | PW | RW | AW | 0 | PR | RR | AR ] + * [ 0 | 0 | RW | AW | 0 | 0 | RR | AR ] * * A* = active *R = read - * P* = polled *W = write - * R* = ready + * R* = ready *W = write * * An FD is marked "active" when there is a desire to use it. - * An FD is marked "polled" when it is registered in the polling. * An FD is marked "ready" when it has not faced a new EAGAIN since last wake-up - * (it is a cache of the last EAGAIN regardless of polling changes). + * (it is a cache of the last EAGAIN regardless of polling changes). Each poller + * has its own "polled" state for the same fd, as stored in the polled_mask. * - * We have 8 possible states for each direction based on these 3 flags : + * We have 4 possible states for each direction based on these 2 flags : * - * +---+---+---+----------+---------------------------------------------+ - * | P | R | A | State | Description | - * +---+---+---+----------+---------------------------------------------+ - * | 0 | 0 | 0 | DISABLED | No activity desired, not ready. | - * | 0 | 0 | 1 | MUSTPOLL | Activity desired via polling. | - * | 0 | 1 | 0 | STOPPED | End of activity without polling. | - * | 0 | 1 | 1 | ACTIVE | Activity desired without polling. | - * | 1 | 0 | 0 | ABORT | Aborted poll(). Not frequently seen. | - * | 1 | 0 | 1 | POLLED | FD is being polled. | - * | 1 | 1 | 0 | PAUSED | FD was paused while ready (eg: buffer full) | - * | 1 | 1 | 1 | READY | FD was marked ready by poll() | - * +---+---+---+----------+---------------------------------------------+ + * +---+---+----------+---------------------------------------------+ + * | R | A | State | Description | + * +---+---+----------+---------------------------------------------+ + * | 0 | 0 | DISABLED | No activity desired, not ready. | + * | 0 | 1 | ACTIVE | Activity desired. | + * | 1 | 0 | STOPPED | End of activity. | + * | 1 | 1 | READY | Activity desired and reported. | + * +---+---+----------+---------------------------------------------+ * * The transitions are pretty simple : * - fd_want_*() : set flag A * - fd_stop_*() : clear flag A * - fd_cant_*() : clear flag R (when facing EAGAIN) * - fd_may_*() : set flag R (upon return from poll()) - * - sync() : if (A) { if (!R) P := 1 } else { P := 0 } * - * The PAUSED, ABORT and MUSTPOLL states are transient for level-trigerred - * pollers and are fixed by the sync() which happens at the beginning of the - * poller. For event-triggered pollers, only the MUSTPOLL state will be - * transient and ABORT will lead to PAUSED. The ACTIVE state is the only stable - * one which has P != A. + * Each poller then computes its own polled state : + * if (A) { if (!R) P := 1 } else { P := 0 } * - * The READY state is a bit special as activity on the FD might be notified - * both by the poller or by the cache. But it is needed for some multi-layer - * protocols (eg: SSL) where connection activity is not 100% linked to FD - * activity. Also some pollers might prefer to implement it as ACTIVE if - * enabling/disabling the FD is cheap. The READY and ACTIVE states are the - * two states for which a cache entry is allocated. + * The state transitions look like the diagram below. * - * The state transitions look like the diagram below. Only the 4 right states - * have polling enabled : - * - * (POLLED=0) (POLLED=1) - * - * +----------+ sync +-------+ - * | DISABLED | <----- | ABORT | (READY=0, ACTIVE=0) - * +----------+ +-------+ - * clr | ^ set | ^ - * | | | | - * v | set v | clr - * +----------+ sync +--------+ - * | MUSTPOLL | -----> | POLLED | (READY=0, ACTIVE=1) - * +----------+ +--------+ - * ^ poll | ^ - * | | | - * | EAGAIN v | EAGAIN - * +--------+ +-------+ - * | ACTIVE | | READY | (READY=1, ACTIVE=1) - * +--------+ +-------+ - * clr | ^ set | ^ - * | | | | - * v | set v | clr - * +---------+ sync +--------+ - * | STOPPED | <------ | PAUSED | (READY=1, ACTIVE=0) - * +---------+ +--------+ + * may +----------+ + * ,----| DISABLED | (READY=0, ACTIVE=0) + * | +----------+ + * | want | ^ + * | | | + * | v | stop + * | +----------+ + * | | ACTIVE | (READY=0, ACTIVE=1) + * | +----------+ + * | | ^ + * | may | | + * | v | EAGAIN (cant) + * | +--------+ + * | | READY | (READY=1, ACTIVE=1) + * | +--------+ + * | stop | ^ + * | | | + * | v | want + * | +---------+ + * `--->| STOPPED | (READY=1, ACTIVE=0) + * +---------+ */ #include