diff --git a/Makefile b/Makefile index faa37c059..d3d5c9789 100644 --- a/Makefile +++ b/Makefile @@ -11,6 +11,16 @@ TARGET = linux24 #TARGET = linux22 #TARGET = solaris +USE_POLL = 1 + +ifeq ($(TARGET),linux24e) +USE_EPOLL = 1 +endif + +ifeq ($(TARGET),linux26) +USE_EPOLL = 1 +endif + # pass CPU= to make to optimize for a particular CPU CPU = generic #CPU = i586 @@ -35,26 +45,26 @@ PCREDIR := $(shell pcre-config --prefix 2>/dev/null || :) TCPSPLICEDIR := # This is for standard Linux 2.6 with netfilter and epoll() -COPTS.linux26 = -DNETFILTER -DENABLE_POLL -DENABLE_EPOLL +COPTS.linux26 = -DNETFILTER LIBS.linux26 = # This is for enhanced Linux 2.4 with netfilter and epoll() patch. # Warning! If kernel is 2.4 with epoll-lt <= 0.21, then you must add # -DEPOLL_CTL_MOD_WORKAROUND to workaround a very rare bug. -#COPTS.linux24e = -DNETFILTER -DENABLE_POLL -DENABLE_EPOLL -DUSE_MY_EPOLL -DEPOLL_CTL_MOD_WORKAROUND -COPTS.linux24e = -DNETFILTER -DENABLE_POLL -DENABLE_EPOLL -DUSE_MY_EPOLL +#COPTS.linux24e = -DNETFILTER -DUSE_MY_EPOLL -DEPOLL_CTL_MOD_WORKAROUND +COPTS.linux24e = -DNETFILTER -DUSE_MY_EPOLL LIBS.linux24e = # This is for standard Linux 2.4 with netfilter but without epoll() -COPTS.linux24 = -DNETFILTER -DENABLE_POLL +COPTS.linux24 = -DNETFILTER LIBS.linux24 = # This is for Linux 2.2 -COPTS.linux22 = -DUSE_GETSOCKNAME -DENABLE_POLL +COPTS.linux22 = -DUSE_GETSOCKNAME LIBS.linux22 = # This is for Solaris 8 -COPTS.solaris = -fomit-frame-pointer -DENABLE_POLL -DFD_SETSIZE=65536 +COPTS.solaris = -fomit-frame-pointer -DFD_SETSIZE=65536 LIBS.solaris = -lnsl -lsocket # CPU dependant optimizations @@ -92,7 +102,6 @@ ADDINC = ADDLIB = # set some defines when needed. -# Known ones are -DENABLE_POLL, -DENABLE_EPOLL, and -DUSE_MY_EPOLL # - use -DTPROXY to compile with transparent proxy support. DEFINE = -DTPROXY @@ -136,10 +145,12 @@ endif ifneq ($(USE_POLL),) OPTIONS += -DENABLE_POLL +OPT_OBJS += src/ev_poll.o endif ifneq ($(USE_EPOLL),) OPTIONS += -DENABLE_EPOLL +OPT_OBJS += src/ev_epoll.o endif ifneq ($(USE_MY_EPOLL),) @@ -199,7 +210,7 @@ OBJS = src/haproxy.o src/list.o src/chtbl.o src/hashpjw.o src/base64.o \ src/time.o src/fd.o src/regex.o src/cfgparse.o src/server.o \ src/checks.o src/queue.o src/capture.o src/client.o src/proxy.o \ src/proto_http.o src/stream_sock.o src/appsession.o src/backend.o \ - src/session.o src/hdr_idx.o src/rbtree.o + src/session.o src/hdr_idx.o src/rbtree.o src/ev_select.o haproxy: $(OBJS) $(OPT_OBJS) $(LD) $(LDFLAGS) -o $@ $^ $(LIBS) diff --git a/Makefile.bsd b/Makefile.bsd index 67f51d9ca..043ff04dc 100644 --- a/Makefile.bsd +++ b/Makefile.bsd @@ -87,7 +87,7 @@ OBJS = src/haproxy.o src/list.o src/chtbl.o src/hashpjw.o src/base64.o \ src/time.o src/fd.o src/regex.o src/cfgparse.o src/server.o \ src/checks.o src/queue.o src/capture.o src/client.o src/proxy.o \ src/proto_http.o src/stream_sock.o src/appsession.o src/backend.o \ - src/session.o src/hdr_idx.o src/rbtree.o + src/session.o src/hdr_idx.o src/rbtree.o src/ev_select.o src/ev_poll.o all: haproxy diff --git a/Makefile.osx b/Makefile.osx index a4fda97ad..cb79d5779 100644 --- a/Makefile.osx +++ b/Makefile.osx @@ -87,7 +87,7 @@ OBJS = src/haproxy.o src/list.o src/chtbl.o src/hashpjw.o src/base64.o \ src/time.o src/fd.o src/regex.o src/cfgparse.o src/server.o \ src/checks.o src/queue.o src/capture.o src/client.o src/proxy.o \ src/proto_http.o src/stream_sock.o src/appsession.o src/backend.o \ - src/session.o src/hdr_idx.o src/rbtree.o + src/session.o src/hdr_idx.o src/rbtree.o src/ev_select.o src/ev_poll.o all: haproxy diff --git a/include/proto/fd.h b/include/proto/fd.h index e41fcd151..03a7add8e 100644 --- a/include/proto/fd.h +++ b/include/proto/fd.h @@ -2,7 +2,7 @@ include/proto/fd.h File descriptors states. - Copyright (C) 2000-2006 Willy Tarreau - w@1wt.eu + Copyright (C) 2000-2007 Willy Tarreau - w@1wt.eu This library is free software; you can redistribute it and/or modify it under the terms of the GNU Lesser General Public @@ -34,29 +34,41 @@ */ void fd_delete(int fd); +/* registers all known pollers */ +void register_pollers(); + +/* disable the specified poller */ +void disable_poller(const char *poller_name); /* - * Benchmarks performed on a Pentium-M notebook show that using functions - * instead of the usual macros improve the FD_* performance by about 80%, - * and that marking them regparm(2) adds another 20%. + * Initialize the pollers till the best one is found. + * If none works, returns 0, otherwise 1. */ -#if defined(CONFIG_HAP_INLINE_FD_SET) +int init_pollers(); -# define MY_FD_SET FD_SET -# define MY_FD_CLR FD_CLR -# define MY_FD_ISSET FD_ISSET +/* + * Runs the polling loop + */ +void run_poller(); -#else -# define MY_FD_SET my_fd_set -# define MY_FD_CLR my_fd_clr -# define MY_FD_ISSET my_fd_isset +/* FIXME: dirty hack during code transition */ +#define dir_StaticWriteEvent DIR_WR +#define dir_StaticReadEvent DIR_RD +#define dir_DIR_RD DIR_RD +#define dir_DIR_WR DIR_WR -REGPRM2 void my_fd_set(const int fd, fd_set *ev); -REGPRM2 void my_fd_clr(const int fd, fd_set *ev); -REGPRM2 int my_fd_isset(const int fd, const fd_set *ev); +#define MY_FD_SET(fd, ev) (cur_poller.set((fd), dir_##ev)) +#define MY_FD_CLR(fd, ev) (cur_poller.clr((fd), dir_##ev)) +#define MY_FD_ISSET(fd, ev) (cur_poller.isset((fd), dir_##ev)) -#endif +#define EV_FD_SET(fd, ev) (cur_poller.set((fd), dir_##ev)) +#define EV_FD_CLR(fd, ev) (cur_poller.clr((fd), dir_##ev)) +#define EV_FD_ISSET(fd, ev) (cur_poller.isset((fd), dir_##ev)) +#define EV_FD_COND_S(fd, ev) (cur_poller.cond_s((fd), dir_##ev)) +#define EV_FD_COND_C(fd, ev) (cur_poller.cond_c((fd), dir_##ev)) +#define EV_FD_REM(fd) (cur_poller.rem(fd)) +#define EV_FD_CLO(fd) (cur_poller.clo(fd)) /* recomputes the maxfd limit from the fd */ diff --git a/include/types/fd.h b/include/types/fd.h index ae8872bee..6d8b31fb7 100644 --- a/include/types/fd.h +++ b/include/types/fd.h @@ -53,6 +53,43 @@ struct fdtab { int state; /* the state of this fd */ }; +/* + * Poller descriptors. + * - is initialized by the poller's register() function, and should not + * be allocated, just linked to. + * - is initialized by the poller's register() function. It is set to 0 + * by default, meaning the poller is disabled. init() should set it to 0 in + * case of failure. term() must set it to 0. A generic unoptimized select() + * poller should set it to 100. + * - is initialized by the poller's init() function, and cleaned by + * the term() function. + * - cond_s() checks if fd was not set then sets it and returns 1. Otherwise 0. + * - cond_c() checks if fd was set then clears it and returns 1. Otherwise 0. + * - clo() should be used to do indicate the poller that fd will be closed. It + * may be the same as rem() on some pollers. + * - poll() calls the poller, waiting at most wait_time ms. + */ +struct poller { + void *private; /* any private data for the poller */ + REGPRM2 int (*isset)(const int fd, const int dir); /* check if is being polled for dir */ + REGPRM2 void (*set)(const int fd, const int dir); /* set polling on for */ + REGPRM2 void (*clr)(const int fd, const int dir); /* clear polling on for */ + REGPRM2 int (*cond_s)(const int fd, const int dir); /* set polling on for if unset */ + REGPRM2 int (*cond_c)(const int fd, const int dir); /* clear polling on for if set */ + REGPRM1 void (*rem)(const int fd); /* remove any polling on */ + REGPRM1 void (*clo)(const int fd); /* mark as closed */ + REGPRM2 void (*poll)(struct poller *p, int wait_time); /* the poller itself */ + REGPRM1 int (*init)(struct poller *p); /* poller initialization */ + REGPRM1 void (*term)(struct poller *p); /* termination of this poller */ + const char *name; /* poller name */ + int pref; /* try pollers with higher preference first */ +}; + +extern struct poller cur_poller; /* the current poller */ +extern int nbpollers; +#define MAX_POLLERS 10 +extern struct poller pollers[MAX_POLLERS]; /* all registered pollers */ + extern struct fdtab *fdtab; /* array of all the file descriptors */ extern int maxfd; /* # of the highest fd + 1 */ extern int totalconn; /* total # of terminated sessions */ diff --git a/include/types/polling.h b/include/types/polling.h index 821698ee7..ed3cf64dc 100644 --- a/include/types/polling.h +++ b/include/types/polling.h @@ -2,7 +2,7 @@ include/types/polling.h File descriptors and polling definitions. - Copyright (C) 2000-2006 Willy Tarreau - w@1wt.eu + Copyright (C) 2000-2007 Willy Tarreau - w@1wt.eu This library is free software; you can redistribute it and/or modify it under the terms of the GNU Lesser General Public @@ -53,8 +53,6 @@ #define POLL_USE_POLL (1<<1) #define POLL_USE_EPOLL (1<<2) -/* fd states */ -extern fd_set *StaticReadEvent, *StaticWriteEvent; extern int cfg_polling_mechanism; /* POLL_USE_{SELECT|POLL|EPOLL} */ diff --git a/src/ev_epoll.c b/src/ev_epoll.c new file mode 100644 index 000000000..ff49505e2 --- /dev/null +++ b/src/ev_epoll.c @@ -0,0 +1,355 @@ +/* + * FD polling functions for linux epoll() + * + * Copyright 2000-2007 Willy Tarreau + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + * + */ + +#include +#include +#include + +#include +#include +#include + +#include +#include + +#include +#include +#include + +#if defined(USE_MY_EPOLL) +#include +#include +_syscall1 (int, epoll_create, int, size); +_syscall4 (int, epoll_ctl, int, epfd, int, op, int, fd, struct epoll_event *, event); +_syscall4 (int, epoll_wait, int, epfd, struct epoll_event *, events, int, maxevents, int, timeout); +#endif + + +static fd_set *StaticReadEvent, *StaticWriteEvent; +static fd_set *PrevReadEvent, *PrevWriteEvent; + +/* private data */ +static struct epoll_event *epoll_events; +static int epoll_fd; + + +/* + * Benchmarks performed on a Pentium-M notebook show that using functions + * instead of the usual macros improve the FD_* performance by about 80%, + * and that marking them regparm(2) adds another 20%. + */ +REGPRM2 static int __fd_isset(const int fd, const int dir) +{ + fd_set *ev; + if (dir == DIR_RD) + ev = StaticReadEvent; + else + ev = StaticWriteEvent; + + return FD_ISSET(fd, ev); +} + +REGPRM2 static void __fd_set(const int fd, const int dir) +{ + fd_set *ev; + if (dir == DIR_RD) + ev = StaticReadEvent; + else + ev = StaticWriteEvent; + + FD_SET(fd, ev); +} + +REGPRM2 static void __fd_clr(const int fd, const int dir) +{ + fd_set *ev; + if (dir == DIR_RD) + ev = StaticReadEvent; + else + ev = StaticWriteEvent; + + FD_CLR(fd, ev); +} + +REGPRM2 static int __fd_cond_s(const int fd, const int dir) +{ + int ret; + fd_set *ev; + if (dir == DIR_RD) + ev = StaticReadEvent; + else + ev = StaticWriteEvent; + + ret = !FD_ISSET(fd, ev); + if (ret) + FD_SET(fd, ev); + return ret; +} + +REGPRM2 static int __fd_cond_c(const int fd, const int dir) +{ + int ret; + fd_set *ev; + if (dir == DIR_RD) + ev = StaticReadEvent; + else + ev = StaticWriteEvent; + + ret = FD_ISSET(fd, ev); + if (ret) + FD_CLR(fd, ev); + return ret; +} + +REGPRM1 static void __fd_rem(const int fd) +{ + FD_CLR(fd, StaticReadEvent); + FD_CLR(fd, StaticWriteEvent); +} + +REGPRM1 static void __fd_clo(const int fd) +{ + FD_CLR(fd, StaticReadEvent); + FD_CLR(fd, StaticWriteEvent); + FD_CLR(fd, PrevReadEvent); + FD_CLR(fd, PrevWriteEvent); +} + + + +/* + * Initialization of the epoll() poller. + * Returns 0 in case of failure, non-zero in case of success. If it fails, it + * disables the poller by setting its pref to 0. + */ +REGPRM1 static int epoll_init(struct poller *p) +{ + __label__ fail_pwevt, fail_prevt, fail_swevt, fail_srevt, fail_ee, fail_fd; + int fd_set_bytes; + + p->private = NULL; + fd_set_bytes = sizeof(fd_set) * (global.maxsock + FD_SETSIZE - 1) / FD_SETSIZE; + + epoll_fd = epoll_create(global.maxsock + 1); + if (epoll_fd < 0) + goto fail_fd; + + epoll_events = (struct epoll_event*) + calloc(1, sizeof(struct epoll_event) * global.maxsock); + + if (epoll_events == NULL) + goto fail_ee; + + if ((PrevReadEvent = (fd_set *)calloc(1, fd_set_bytes)) == NULL) + goto fail_prevt; + + if ((PrevWriteEvent = (fd_set *)calloc(1, fd_set_bytes)) == NULL) + goto fail_pwevt; + + if ((StaticReadEvent = (fd_set *)calloc(1, fd_set_bytes)) == NULL) + goto fail_srevt; + + if ((StaticWriteEvent = (fd_set *)calloc(1, fd_set_bytes)) == NULL) + goto fail_swevt; + + return 1; + + fail_swevt: + free(StaticReadEvent); + fail_srevt: + free(PrevWriteEvent); + fail_pwevt: + free(PrevReadEvent); + fail_prevt: + free(epoll_events); + fail_ee: + close(epoll_fd); + epoll_fd = 0; + fail_fd: + p->pref = 0; + return 0; +} + +/* + * Termination of the epoll() poller. + * Memory is released and the poller is marked as unselectable. + */ +REGPRM1 static void epoll_term(struct poller *p) +{ + if (StaticWriteEvent) + free(StaticWriteEvent); + + if (StaticReadEvent) + free(StaticReadEvent); + + if (PrevWriteEvent) + free(PrevWriteEvent); + + if (PrevReadEvent) + free(PrevReadEvent); + + if (epoll_events) + free(epoll_events); + + close(epoll_fd); + epoll_fd = 0; + + p->private = NULL; + p->pref = 0; +} + +/* + * epoll() poller + */ +REGPRM2 static void epoll_poll(struct poller *p, int wait_time) +{ + int status; + int fd; + + int fds, count; + int pr, pw, sr, sw; + unsigned rn, ro, wn, wo; /* read new, read old, write new, write old */ + struct epoll_event ev; + + for (fds = 0; (fds << INTBITS) < maxfd; fds++) { + + rn = ((int*)StaticReadEvent)[fds]; ro = ((int*)PrevReadEvent)[fds]; + wn = ((int*)StaticWriteEvent)[fds]; wo = ((int*)PrevWriteEvent)[fds]; + + if ((ro^rn) | (wo^wn)) { + for (count = 0, fd = fds << INTBITS; count < (1<> count) & 1; + pw = (wo >> count) & 1; + sr = (rn >> count) & 1; + sw = (wn >> count) & 1; +#else + pr = FD_ISSET(fd&((1<name = "epoll"; + p->pref = 300; + p->private = NULL; + + p->init = epoll_init; + p->term = epoll_term; + p->poll = epoll_poll; + p->isset = __fd_isset; + p->set = __fd_set; + p->clr = __fd_clr; + p->rem = __fd_rem; + p->clo = __fd_clo; + p->cond_s = __fd_cond_s; + p->cond_c = __fd_cond_c; + return 1; +} + + +/* + * Local variables: + * c-indent-level: 8 + * c-basic-offset: 8 + * End: + */ diff --git a/src/ev_poll.c b/src/ev_poll.c new file mode 100644 index 000000000..2d6d98474 --- /dev/null +++ b/src/ev_poll.c @@ -0,0 +1,264 @@ +/* + * FD polling functions for generic poll() + * + * Copyright 2000-2007 Willy Tarreau + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + * + */ + +#include +#include +#include + +#include +#include +#include + +#include +#include + +#include +#include +#include + + +static fd_set *StaticReadEvent, *StaticWriteEvent; + +/* private data */ +static struct pollfd *poll_events = NULL; + + +/* + * Benchmarks performed on a Pentium-M notebook show that using functions + * instead of the usual macros improve the FD_* performance by about 80%, + * and that marking them regparm(2) adds another 20%. + */ +REGPRM2 static int __fd_isset(const int fd, const int dir) +{ + fd_set *ev; + if (dir == DIR_RD) + ev = StaticReadEvent; + else + ev = StaticWriteEvent; + + return FD_ISSET(fd, ev); +} + +REGPRM2 static void __fd_set(const int fd, const int dir) +{ + fd_set *ev; + if (dir == DIR_RD) + ev = StaticReadEvent; + else + ev = StaticWriteEvent; + + FD_SET(fd, ev); +} + +REGPRM2 static void __fd_clr(const int fd, const int dir) +{ + fd_set *ev; + if (dir == DIR_RD) + ev = StaticReadEvent; + else + ev = StaticWriteEvent; + + FD_CLR(fd, ev); +} + +REGPRM2 static int __fd_cond_s(const int fd, const int dir) +{ + int ret; + fd_set *ev; + if (dir == DIR_RD) + ev = StaticReadEvent; + else + ev = StaticWriteEvent; + + ret = !FD_ISSET(fd, ev); + if (ret) + FD_SET(fd, ev); + return ret; +} + +REGPRM2 static int __fd_cond_c(const int fd, const int dir) +{ + int ret; + fd_set *ev; + if (dir == DIR_RD) + ev = StaticReadEvent; + else + ev = StaticWriteEvent; + + ret = FD_ISSET(fd, ev); + if (ret) + FD_CLR(fd, ev); + return ret; +} + +REGPRM1 static void __fd_rem(const int fd) +{ + FD_CLR(fd, StaticReadEvent); + FD_CLR(fd, StaticWriteEvent); +} + + + +/* + * Initialization of the poll() poller. + * Returns 0 in case of failure, non-zero in case of success. If it fails, it + * disables the poller by setting its pref to 0. + */ +REGPRM1 static int poll_init(struct poller *p) +{ + __label__ fail_swevt, fail_srevt, fail_pe; + int fd_set_bytes; + + p->private = NULL; + fd_set_bytes = sizeof(fd_set) * (global.maxsock + FD_SETSIZE - 1) / FD_SETSIZE; + + poll_events = (struct pollfd*) + calloc(1, sizeof(struct pollfd) * global.maxsock); + + if (poll_events == NULL) + goto fail_pe; + + if ((StaticReadEvent = (fd_set *)calloc(1, fd_set_bytes)) == NULL) + goto fail_srevt; + + if ((StaticWriteEvent = (fd_set *)calloc(1, fd_set_bytes)) == NULL) + goto fail_swevt; + + return 1; + + fail_swevt: + free(StaticReadEvent); + fail_srevt: + free(poll_events); + fail_pe: + p->pref = 0; + return 0; +} + +/* + * Termination of the poll() poller. + * Memory is released and the poller is marked as unselectable. + */ +REGPRM1 static void poll_term(struct poller *p) +{ + if (StaticWriteEvent) + free(StaticWriteEvent); + if (StaticReadEvent) + free(StaticReadEvent); + if (poll_events) + free(poll_events); + p->private = NULL; + p->pref = 0; +} + +/* + * Poll() poller + */ +REGPRM2 static void poll_poll(struct poller *p, int wait_time) +{ + int status; + int fd, nbfd; + + int fds, count; + int sr, sw; + unsigned rn, wn; /* read new, write new */ + + nbfd = 0; + for (fds = 0; (fds << INTBITS) < maxfd; fds++) { + + rn = ((int*)StaticReadEvent)[fds]; + wn = ((int*)StaticWriteEvent)[fds]; + + if ((rn|wn)) { + for (count = 0, fd = fds << INTBITS; count < (1<> count) & 1; + sw = (wn >> count) & 1; +#else + sr = FD_ISSET(fd&((1< 0 && count < nbfd; count++) { + fd = poll_events[count].fd; + + if (!(poll_events[count].revents & ( POLLOUT | POLLIN | POLLERR | POLLHUP ))) + continue; + + /* ok, we found one active fd */ + status--; + + if (FD_ISSET(fd, StaticReadEvent)) { + if (fdtab[fd].state == FD_STCLOSE) + continue; + if (poll_events[count].revents & ( POLLIN | POLLERR | POLLHUP )) + fdtab[fd].cb[DIR_RD].f(fd); + } + + if (FD_ISSET(fd, StaticWriteEvent)) { + if (fdtab[fd].state == FD_STCLOSE) + continue; + if (poll_events[count].revents & ( POLLOUT | POLLERR | POLLHUP )) + fdtab[fd].cb[DIR_WR].f(fd); + } + } + +} + +/* + * The only exported function. Returns 1. + */ +int poll_register(struct poller *p) +{ + p->name = "poll"; + p->pref = 200; + p->private = NULL; + + p->init = poll_init; + p->term = poll_term; + p->poll = poll_poll; + p->isset = __fd_isset; + p->set = __fd_set; + p->clr = __fd_clr; + p->clo = p->rem = __fd_rem; + p->cond_s = __fd_cond_s; + p->cond_c = __fd_cond_c; + return 1; +} + + +/* + * Local variables: + * c-indent-level: 8 + * c-basic-offset: 8 + * End: + */ diff --git a/src/ev_select.c b/src/ev_select.c new file mode 100644 index 000000000..b1cd44ef3 --- /dev/null +++ b/src/ev_select.c @@ -0,0 +1,264 @@ +/* + * FD polling functions for generic select() + * + * Copyright 2000-2007 Willy Tarreau + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + * + */ + +#include +#include +#include + +#include +#include +#include + +#include +#include + +#include +#include +#include + + +static fd_set *ReadEvent, *WriteEvent; +static fd_set *StaticReadEvent, *StaticWriteEvent; + + +/* + * Benchmarks performed on a Pentium-M notebook show that using functions + * instead of the usual macros improve the FD_* performance by about 80%, + * and that marking them regparm(2) adds another 20%. + */ +REGPRM2 static int __fd_isset(const int fd, const int dir) +{ + fd_set *ev; + if (dir == DIR_RD) + ev = StaticReadEvent; + else + ev = StaticWriteEvent; + + return FD_ISSET(fd, ev); +} + +REGPRM2 static void __fd_set(const int fd, const int dir) +{ + fd_set *ev; + if (dir == DIR_RD) + ev = StaticReadEvent; + else + ev = StaticWriteEvent; + + FD_SET(fd, ev); +} + +REGPRM2 static void __fd_clr(const int fd, const int dir) +{ + fd_set *ev; + if (dir == DIR_RD) + ev = StaticReadEvent; + else + ev = StaticWriteEvent; + + FD_CLR(fd, ev); +} + +REGPRM2 static int __fd_cond_s(const int fd, const int dir) +{ + int ret; + fd_set *ev; + if (dir == DIR_RD) + ev = StaticReadEvent; + else + ev = StaticWriteEvent; + + ret = !FD_ISSET(fd, ev); + if (ret) + FD_SET(fd, ev); + return ret; +} + +REGPRM2 static int __fd_cond_c(const int fd, const int dir) +{ + int ret; + fd_set *ev; + if (dir == DIR_RD) + ev = StaticReadEvent; + else + ev = StaticWriteEvent; + + ret = FD_ISSET(fd, ev); + if (ret) + FD_CLR(fd, ev); + return ret; +} + +REGPRM1 static void __fd_rem(const int fd) +{ + FD_CLR(fd, StaticReadEvent); + FD_CLR(fd, StaticWriteEvent); +} + + +/* + * Initialization of the select() poller. + * Returns 0 in case of failure, non-zero in case of success. If it fails, it + * disables the poller by setting its pref to 0. + */ +REGPRM1 static int select_init(struct poller *p) +{ + __label__ fail_swevt, fail_srevt, fail_wevt, fail_revt; + int fd_set_bytes; + + p->private = NULL; + fd_set_bytes = sizeof(fd_set) * (global.maxsock + FD_SETSIZE - 1) / FD_SETSIZE; + + if ((ReadEvent = (fd_set *)calloc(1, fd_set_bytes)) == NULL) + goto fail_revt; + + if ((WriteEvent = (fd_set *)calloc(1, fd_set_bytes)) == NULL) + goto fail_wevt; + + if ((StaticReadEvent = (fd_set *)calloc(1, fd_set_bytes)) == NULL) + goto fail_srevt; + + if ((StaticWriteEvent = (fd_set *)calloc(1, fd_set_bytes)) == NULL) + goto fail_swevt; + + return 1; + + fail_swevt: + free(StaticReadEvent); + fail_srevt: + free(WriteEvent); + fail_wevt: + free(ReadEvent); + fail_revt: + p->pref = 0; + return 0; +} + +/* + * Termination of the select() poller. + * Memory is released and the poller is marked as unselectable. + */ +REGPRM1 static void select_term(struct poller *p) +{ + if (StaticWriteEvent) + free(StaticWriteEvent); + if (StaticReadEvent) + free(StaticReadEvent); + if (WriteEvent) + free(WriteEvent); + if (ReadEvent) + free(ReadEvent); + p->private = NULL; + p->pref = 0; +} + +/* + * Select() poller + */ +REGPRM2 static void select_poll(struct poller *p, int wait_time) +{ + int status; + int fd, i; + struct timeval delta; + int readnotnull, writenotnull; + int fds; + char count; + + /* allow select to return immediately when needed */ + delta.tv_sec = delta.tv_usec = 0; + if (wait_time > 0) { /* FIXME */ + /* Convert to timeval */ + /* to avoid eventual select loops due to timer precision */ + wait_time += SCHEDULER_RESOLUTION; + delta.tv_sec = wait_time / 1000; + delta.tv_usec = (wait_time % 1000) * 1000; + } + + /* let's restore fdset state */ + + readnotnull = 0; writenotnull = 0; + for (i = 0; i < (maxfd + FD_SETSIZE - 1)/(8*sizeof(int)); i++) { + readnotnull |= (*(((int*)ReadEvent)+i) = *(((int*)StaticReadEvent)+i)) != 0; + writenotnull |= (*(((int*)WriteEvent)+i) = *(((int*)StaticWriteEvent)+i)) != 0; + } + + // /* just a verification code, needs to be removed for performance */ + // for (i=0; i= 0) ? &delta : NULL); + + tv_now(&now); + + if (status <= 0) + return; + + for (fds = 0; (fds << INTBITS) < maxfd; fds++) { + if ((((int *)(ReadEvent))[fds] | ((int *)(WriteEvent))[fds]) == 0) + continue; + + for (count = 1<name = "select"; + p->pref = 150; + p->private = NULL; + + p->init = select_init; + p->term = select_term; + p->poll = select_poll; + p->isset = __fd_isset; + p->set = __fd_set; + p->clr = __fd_clr; + p->clo = p->rem = __fd_rem; + p->cond_s = __fd_cond_s; + p->cond_c = __fd_cond_c; + return 1; +} + + +/* + * Local variables: + * c-indent-level: 8 + * c-basic-offset: 8 + * End: + */ diff --git a/src/fd.c b/src/fd.c index 5fd3c27c0..18ccb2456 100644 --- a/src/fd.c +++ b/src/fd.c @@ -1,7 +1,7 @@ /* * File descriptors management functions. * - * Copyright 2000-2006 Willy Tarreau + * Copyright 2000-2007 Willy Tarreau * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License @@ -10,501 +10,49 @@ * */ -/* - * FIXME: - * - we still use 'listeners' to check whether we want to stop or not. - * - the various pollers should be moved to other external files, possibly - * dynamic libs. - */ - +#include #include -#include #include #include #include -#include #include #include #include -#include -#include struct fdtab *fdtab = NULL; /* array of all the file descriptors */ int maxfd; /* # of the highest fd + 1 */ int totalconn; /* total # of terminated sessions */ int actconn; /* # of active sessions */ -fd_set *StaticReadEvent, *StaticWriteEvent; int cfg_polling_mechanism = 0; /* POLL_USE_{SELECT|POLL|EPOLL} */ - -/****************************** - * pollers - ******************************/ - - -#if !defined(CONFIG_HAP_INLINE_FD_SET) -/* - * Benchmarks performed on a Pentium-M notebook show that using functions - * instead of the usual macros improve the FD_* performance by about 80%, - * and that marking them regparm(2) adds another 20%. - */ -REGPRM2 void my_fd_set(const int fd, fd_set *ev) -{ - FD_SET(fd, ev); -} - -REGPRM2 void my_fd_clr(const int fd, fd_set *ev) -{ - FD_CLR(fd, ev); -} - -REGPRM2 int my_fd_isset(const int fd, const fd_set *ev) -{ - return FD_ISSET(fd, ev); -} -#endif - - -/* - * FIXME: this is dirty, but at the moment, there's no other solution to remove - * the old FDs from outside the loop. Perhaps we should export a global 'poll' - * structure with pointers to functions such as init_fd() and close_fd(), plus - * a private structure with several pointers to places such as below. - */ - -#if defined(ENABLE_EPOLL) -fd_set *PrevReadEvent = NULL, *PrevWriteEvent = NULL; - -#if defined(USE_MY_EPOLL) -#include -#include -_syscall1 (int, epoll_create, int, size); -_syscall4 (int, epoll_ctl, int, epfd, int, op, int, fd, struct epoll_event *, event); -_syscall4 (int, epoll_wait, int, epfd, struct epoll_event *, events, int, maxevents, int, timeout); -#endif - -/* - * Main epoll() loop. - * does 3 actions : - * 0 (POLL_LOOP_ACTION_INIT) : initializes necessary private structures - * 1 (POLL_LOOP_ACTION_RUN) : runs the loop - * 2 (POLL_LOOP_ACTION_CLEAN) : cleans up - * - * returns 0 if initialization failed, !0 otherwise. - */ - -int epoll_loop(int action) -{ - int next_time; - int status; - int fd; - - int fds, count; - int pr, pw, sr, sw; - unsigned rn, ro, wn, wo; /* read new, read old, write new, write old */ - struct epoll_event ev; - - /* private data */ - static struct epoll_event *epoll_events = NULL; - static int epoll_fd; - - if (action == POLL_LOOP_ACTION_INIT) { - epoll_fd = epoll_create(global.maxsock + 1); - if (epoll_fd < 0) - return 0; - else { - epoll_events = (struct epoll_event*) - calloc(1, sizeof(struct epoll_event) * global.maxsock); - PrevReadEvent = (fd_set *) - calloc(1, sizeof(fd_set) * (global.maxsock + FD_SETSIZE - 1) / FD_SETSIZE); - PrevWriteEvent = (fd_set *) - calloc(1, sizeof(fd_set) * (global.maxsock + FD_SETSIZE - 1) / FD_SETSIZE); - } - return 1; - } - else if (action == POLL_LOOP_ACTION_CLEAN) { - if (PrevWriteEvent) free(PrevWriteEvent); - if (PrevReadEvent) free(PrevReadEvent); - if (epoll_events) free(epoll_events); - close(epoll_fd); - epoll_fd = 0; - return 1; - } - - /* OK, it's POLL_LOOP_ACTION_RUN */ - - tv_now(&now); - - while (1) { - next_time = process_runnable_tasks(); - - /* stop when there's no connection left and we don't allow them anymore */ - if (!actconn && listeners == 0) - break; - - for (fds = 0; (fds << INTBITS) < maxfd; fds++) { - - rn = ((int*)StaticReadEvent)[fds]; ro = ((int*)PrevReadEvent)[fds]; - wn = ((int*)StaticWriteEvent)[fds]; wo = ((int*)PrevWriteEvent)[fds]; - - if ((ro^rn) | (wo^wn)) { - for (count = 0, fd = fds << INTBITS; count < (1<> count) & 1; - pw = (wo >> count) & 1; - sr = (rn >> count) & 1; - sw = (wn >> count) & 1; -#else - pr = FD_ISSET(fd&((1<> count) & 1; - sw = (wn >> count) & 1; -#else - sr = FD_ISSET(fd&((1< 0 && count < nbfd; count++) { - fd = poll_events[count].fd; - - if (!(poll_events[count].revents & ( POLLOUT | POLLIN | POLLERR | POLLHUP ))) - continue; - - /* ok, we found one active fd */ - status--; - - if (FD_ISSET(fd, StaticReadEvent)) { - if (fdtab[fd].state == FD_STCLOSE) - continue; - if (poll_events[count].revents & ( POLLIN | POLLERR | POLLHUP )) - fdtab[fd].cb[DIR_RD].f(fd); - } - - if (FD_ISSET(fd, StaticWriteEvent)) { - if (fdtab[fd].state == FD_STCLOSE) - continue; - if (poll_events[count].revents & ( POLLOUT | POLLERR | POLLHUP )) - fdtab[fd].cb[DIR_WR].f(fd); - } - } - } - return 1; -} -#endif - - - -/* - * Main select() loop. - * does 3 actions : - * 0 (POLL_LOOP_ACTION_INIT) : initializes necessary private structures - * 1 (POLL_LOOP_ACTION_RUN) : runs the loop - * 2 (POLL_LOOP_ACTION_CLEAN) : cleans up - * - * returns 0 if initialization failed, !0 otherwise. - */ - - -int select_loop(int action) -{ - int next_time; - int status; - int fd,i; - struct timeval delta; - int readnotnull, writenotnull; - static fd_set *ReadEvent = NULL, *WriteEvent = NULL; - - if (action == POLL_LOOP_ACTION_INIT) { - ReadEvent = (fd_set *) - calloc(1, sizeof(fd_set) * (global.maxsock + FD_SETSIZE - 1) / FD_SETSIZE); - WriteEvent = (fd_set *) - calloc(1, sizeof(fd_set) * (global.maxsock + FD_SETSIZE - 1) / FD_SETSIZE); - return 1; - } - else if (action == POLL_LOOP_ACTION_CLEAN) { - if (WriteEvent) free(WriteEvent); - if (ReadEvent) free(ReadEvent); - return 1; - } - - /* OK, it's POLL_LOOP_ACTION_RUN */ - - tv_now(&now); - - while (1) { - next_time = process_runnable_tasks(); - - /* stop when there's no connection left and we don't allow them anymore */ - if (!actconn && listeners == 0) - break; - - if (next_time > 0) { /* FIXME */ - /* Convert to timeval */ - /* to avoid eventual select loops due to timer precision */ - next_time += SCHEDULER_RESOLUTION; - delta.tv_sec = next_time / 1000; - delta.tv_usec = (next_time % 1000) * 1000; - } - else if (next_time == 0) { /* allow select to return immediately when needed */ - delta.tv_sec = delta.tv_usec = 0; - } - - - /* let's restore fdset state */ - - readnotnull = 0; writenotnull = 0; - for (i = 0; i < (maxfd + FD_SETSIZE - 1)/(8*sizeof(int)); i++) { - readnotnull |= (*(((int*)ReadEvent)+i) = *(((int*)StaticReadEvent)+i)) != 0; - writenotnull |= (*(((int*)WriteEvent)+i) = *(((int*)StaticWriteEvent)+i)) != 0; - } - - // /* just a verification code, needs to be removed for performance */ - // for (i=0; i= 0) ? &delta : NULL); - - /* this is an experiment on the separation of the select work */ - // status = (readnotnull ? select(maxfd, ReadEvent, NULL, NULL, (next_time >= 0) ? &delta : NULL) : 0); - // status |= (writenotnull ? select(maxfd, NULL, WriteEvent, NULL, (next_time >= 0) ? &delta : NULL) : 0); - - tv_now(&now); - - if (status > 0) { /* must proceed with events */ - - int fds; - char count; - - for (fds = 0; (fds << INTBITS) < maxfd; fds++) - if ((((int *)(ReadEvent))[fds] | ((int *)(WriteEvent))[fds]) != 0) - for (count = 1< bp->pref)) + bp = &pollers[p]; + + if (!bp || bp->pref == 0) + break; + + if (bp->init(bp)) { + memcpy(&cur_poller, bp, sizeof(*bp)); + return 1; + } + } while (!bp || bp->pref == 0); + return 0; +} + /* * Local variables: * c-indent-level: 8 diff --git a/src/haproxy.c b/src/haproxy.c index 5b3ade2b2..7b23e2fa5 100644 --- a/src/haproxy.c +++ b/src/haproxy.c @@ -507,18 +507,34 @@ void init(int argc, char **argv) if (global.nbproc < 1) global.nbproc = 1; - StaticReadEvent = (fd_set *)calloc(1, - sizeof(fd_set) * - (global.maxsock + FD_SETSIZE - 1) / FD_SETSIZE); - StaticWriteEvent = (fd_set *)calloc(1, - sizeof(fd_set) * - (global.maxsock + FD_SETSIZE - 1) / FD_SETSIZE); - fdtab = (struct fdtab *)calloc(1, sizeof(struct fdtab) * (global.maxsock)); for (i = 0; i < global.maxsock; i++) { fdtab[i].state = FD_STCLOSE; } + + register_pollers(); + /* Note: we could register external pollers here */ + + if (!(cfg_polling_mechanism & POLL_USE_EPOLL)) + disable_poller("epoll"); + + if (!(cfg_polling_mechanism & POLL_USE_POLL)) + disable_poller("poll"); + + if (!(cfg_polling_mechanism & POLL_USE_SELECT)) + disable_poller("select"); + + /* Note: we could disable any poller by name here */ + + if (!init_pollers()) { + Alert("No polling mechanism available\n"); + exit(1); + } + if (global.mode & MODE_DEBUG) { + printf("Note: using %s() as the polling mechanism.\n", cur_poller.name); + } + } void deinit(void) @@ -603,8 +619,6 @@ void deinit(void) if (global.chroot) free(global.chroot); if (global.pidfile) free(global.pidfile); - if (StaticReadEvent) free(StaticReadEvent); - if (StaticWriteEvent) free(StaticWriteEvent); if (fdtab) free(fdtab); pool_destroy(pool_session); @@ -628,6 +642,30 @@ static void tell_old_pids(int sig) kill(oldpids[p], sig); } +/* + * Runs the polling loop + * + * FIXME: + * - we still use 'listeners' to check whether we want to stop or not. + * + */ +void run_poll_loop() +{ + int next_time; + tv_now(&now); + + while (1) { + next_time = process_runnable_tasks(); + + /* stop when there's no connection left and we don't allow them anymore */ + if (!actconn && listeners == 0) + break; + + cur_poller.poll(&cur_poller, next_time); + } +} + + int main(int argc, char **argv) { int err, retry; @@ -860,41 +898,10 @@ int main(int argc, char **argv) setsid(); } -#if defined(ENABLE_EPOLL) - if (cfg_polling_mechanism & POLL_USE_EPOLL) { - if (epoll_loop(POLL_LOOP_ACTION_INIT)) { - epoll_loop(POLL_LOOP_ACTION_RUN); - epoll_loop(POLL_LOOP_ACTION_CLEAN); - cfg_polling_mechanism &= POLL_USE_EPOLL; - } - else { - Warning("epoll() is not available. Using poll()/select() instead.\n"); - cfg_polling_mechanism &= ~POLL_USE_EPOLL; - } - } -#endif - -#if defined(ENABLE_POLL) - if (cfg_polling_mechanism & POLL_USE_POLL) { - if (poll_loop(POLL_LOOP_ACTION_INIT)) { - poll_loop(POLL_LOOP_ACTION_RUN); - poll_loop(POLL_LOOP_ACTION_CLEAN); - cfg_polling_mechanism &= POLL_USE_POLL; - } - else { - Warning("poll() is not available. Using select() instead.\n"); - cfg_polling_mechanism &= ~POLL_USE_POLL; - } - } -#endif - if (cfg_polling_mechanism & POLL_USE_SELECT) { - if (select_loop(POLL_LOOP_ACTION_INIT)) { - select_loop(POLL_LOOP_ACTION_RUN); - select_loop(POLL_LOOP_ACTION_CLEAN); - cfg_polling_mechanism &= POLL_USE_SELECT; - } - } - + /* + * That's it : the central polling loop. Run until we stop. + */ + run_poll_loop(); /* Free all Hash Keys and all Hash elements */ appsession_cleanup();