diff --git a/Makefile b/Makefile index c068c5cf9..da5bee503 100644 --- a/Makefile +++ b/Makefile @@ -17,7 +17,6 @@ # USE_REGPARM : enable regparm optimization. Recommended on x86. # USE_SEPOLL : enable speculative epoll(). Automatic. # USE_STATIC_PCRE : enable static libpcre. Recommended. -# USE_TCPSPLICE : enable tcp_splice() on Linux (needs kernel patch). # USE_TPROXY : enable transparent proxy. Automatic. # USE_LINUX_TPROXY : enable full transparent proxy (need kernel patch). # USE_LINUX_SPLICE : enable kernel 2.6 splicing (broken on old kernels) @@ -301,11 +300,7 @@ BUILD_OPTIONS = ignore_implicit = $(patsubst %=implicit,,$(1)=$($(1))) ifneq ($(USE_TCPSPLICE),) -# This is the directory hosting libtcpsplice.[ah] -TCPSPLICEDIR := -OPTIONS_CFLAGS += -DCONFIG_HAP_TCPSPLICE -I$(TCPSPLICEDIR) -OPTIONS_LDFLAGS += -L$(TCPSPLICEDIR) -ltcpsplice -BUILD_OPTIONS += $(call ignore_implicit,USE_TCPSPLICE) +$(error experimental option USE_TCPSPLICE has been removed, check USE_LINUX_SPLICE) endif ifneq ($(USE_LINUX_SPLICE),) diff --git a/doc/configuration.txt b/doc/configuration.txt index 03f4e7ab7..50f82e577 100644 --- a/doc/configuration.txt +++ b/doc/configuration.txt @@ -710,7 +710,6 @@ option ssl-hello-chk X - X X accept X X X - option tcpka X X X X option tcplog X X X X -[no] option tcpsplice X X X X [no] option transparent X - X X persist rdp-cookie X - X X rate-limit sessions X X X - @@ -2907,31 +2906,6 @@ option tcplog See also : "option httplog", and section 8 about logging. -option tcpsplice [ experimental ] - Enable linux kernel-based acceleration of data relaying - May be used in sections : defaults | frontend | listen | backend - yes | yes | yes | yes - Arguments : none - - This option is only available when HAProxy has been built for use on Linux - with USE_TCPSPLICE=1. This option requires a kernel patch which is available - on http://www.linux-l7sw.org/. - - When "option tcpsplice" is set, as soon as the server's response headers have - been transferred, the session handling is transferred to the kernel which - will forward all subsequent data from the server to the client untill the - session closes. This leads to much faster data transfers between client and - server since the data is not copied twice between kernel and user space, but - there are some limitations such as the lack of information about the number - of bytes transferred and the total transfer time. - - This is an experimental feature. It happens to reliably work but issues - caused by corner cases are to be expected. - - Note that this option requires that the process permanently runs with - CAP_NETADMIN privileges, which most often translates into running as root. - - option transparent no option transparent Enable client-side transparent proxying diff --git a/doc/tcp-splicing.txt b/doc/tcp-splicing.txt deleted file mode 100644 index ffdb2565e..000000000 --- a/doc/tcp-splicing.txt +++ /dev/null @@ -1,196 +0,0 @@ - Using Linux TCP Splicing with HAProxy - Willy Tarreau - - 2007/01/06 - - - -Alexandre Cassen has started a project called Linux Layer7 Switching (L7SW), -whose goal is to provide kernel services to help userland proxies achieving -very high performance. Right now, the project consists in a loadable kernel -module providing TCP Splicing under Linux. - -TCP Splicing is a method by which a userland proxy can tell the kernel that -it considers it has no added value on the data part of a connection, and that -the kernel can perform the transfers it itself, thus relieving the proxy from -a potentially heavy job. There are two advantages to this method : - - - it reduces the number of process wakeups - - it reduces the number of data copies between user-space and kernel buffers - -This method is particularly suited to protocols in which data is sent till -the end of the session. This is the case for FTP data for instance, and it -is also the case for the BODY part of HTTP/1.0. - -The great news is that haproxy has been designed from the beginning with a -clear distinction between the headers and the DATA phase, so it was a child's -game to add hooks to Alex's library in it - -Be careful! Both versions are to be considered BETA software ! Run them on -your systems if you want, but do not complain if it crashes twice a day ! -Anyway, it seems stable on our test machines. - -In order to use TCP Splicing on haproxy, you need : - - - Linux Layer7 Switching code version 0.1.1 : [ http://linux-l7sw.sf.net/ ] - - Haproxy version 1.3.5 : [ http://haproxy.1wt.eu/download/1.3/src/ ] - -Then, you must untar both packages in any location, let's assume you'll -be using /tmp. First extract l7sw and : - - $ cd /tmp - $ tar zxf layer7switch-0.1.1.tar.gz - $ cd layer7switch-0.1.1 - -L7SW currently only supports Linux kernel 2.6.19+. If you prefer to use it -on a more stable kernel, such as 2.6.16.X, you can apply this patch to the -L7SW directory : - - [ http://haproxy.1wt.eu/download/patches/tcp_splice-0.1.1-linux-2.6.16.diff ] - - $ patch -p1 -d kernel < tcp_splice-0.1.1-linux-2.6.16.diff - -Alternatively, if you prefer to run it on 2.4.33+, you can apply this patch -to the L7SW directory : - - [ http://haproxy.1wt.eu/download/patches/tcp_splice-0.1.1-linux-2.4.33.diff ] - - $ patch -p1 -d kernel < tcp_splice-0.1.1-linux-2.4.33.diff - -Then build the kernel module as described in the L7SW README. Basically, you -just have to do this once your tree has been patched : - - $ cd kernel - $ make - -You can either install the resulting module (tcp_splice) or load it now. During -early testing periods, it might be preferable to avoid installing anything and -just load it manually : - - $ sudo insmod tcp_splice.*o - $ cd .. - -Now that the module is loaded, you need to build the libtcpsplice library on -which haproxy currently relies : - - $ cd userland/libtcpsplice - $ make - $ cd .. - -For the adventurous, there's also a proof of concept in the userlan/switchd -directory, it may be useful if you encounter problems with haproxy for -instance. But it is not needed at all here. - -OK, L7SW is ready. Now you have to extract haproxy and tell it to build using -libtcpsplice : - - $ cd /tmp - $ tar zxf haproxy-1.3.5.tar.gz - $ cd haproxy-1.3.5 - $ make USE_TCPSPLICE=1 TCPSPLICEDIR=/tmp/layer7switch-0.1.1/userland/libtcpsplice - -There are other options to make, which are hugely recommended, such as -CPU=, REGEX=, and above all, TARGET= so that you use the best syscalls and -functions for your system. Generally you will use TARGET=linux26, but 2.4 users -with an epoll-patched kernel will use TARGET=linux24e. This is very important -because failing to specify those options will disable important optimizations -which might hide the tcpsplice benefits ! Please consult the haproxy's README. - -Now that you have haproxy built with support for tcpsplice, and that the module -is loaded, you have to write a config. There is an example in the 'examples' -directory. Basically, you just have to add the "option tcpsplice" keyword BOTH -in the frontend AND in the backend sections that you want to accelerate. - -If the option is specified only in the frontend or in the backend, then no -acceleration will be used. It is designed this way to allow some front-back -combinations to use it without forcing others to use it. Of course, if you use -a single "listen" section, you just have to specify it once. - -As of now (l7sw-0.1.1 and haproxy-1.3.5), you need the CAP_NETADMIN capability -to START and to RUN. For human beings, it means that you have to start haproxy -as root and keep it running as root, so it must not drop its priviledges. This -is somewhat annoying, but we'll try to find a solution later. - -Also, l7sw-0.1.1 does not yet support TCP window scaling nor SACK. So you have -to disable both features on the proxy : - - $ sudo sysctl -w net.ipv4.tcp_window_scaling=0 - $ sudo sysctl -w net.ipv4.tcp_sack=0 - $ sudo sysctl -w net.ipv4.tcp_dsack=0 - $ sudo sysctl -w net.ipv4.tcp_tw_recycle=1 - -You can now check that everything works as expected. Run "vmstat 1" or "top" -in one terminal, and haproxy in another one : - - $ sudo ./haproxy -f examples/tcp-splicing-sample.cfg - -Transfering large file through it should not affect it much. You should observe -something like 10% CPU instead of 95% when transferring 1 MB files at full -speed. You can play with the tcpsplice option in the configuration to see the -effects. - - -Troubleshooting ---------------- - -This software is still beta, and you will probably encounter some caveats. -I personnally ran into a few issues that we'll try to address with Alex. First -of all, I had occasionnal lockups on my SMP machine which I never had on an UP -one. So if you get problems on an SMP machine, please reboot it in UP and do -not lose your time on this. - -I also noticed that sometimes, some sessions remained established even after -the end of the program. You might also see some situtations where even after -the proxy's exit, the traffic still passes through the system. It may happen -when you have a limited source port range and that you reuse a TIME_WAIT -session matching exactly the same source and destinations. This will need -to be addressed too. - -You can play with tcp_splice variables and timeouts here in /proc/sys/net/ : - - $ ls /proc/sys/net/tcp_splice/ - debug_level timeout_established timeout_listen timeout_synsent - timeout_close timeout_finwait timeout_synack timeout_timewait - timeout_closewait timeout_lastack timeout_synrecv - - $ sysctl net/tcp_splice - net.tcp_splice.debug_level = 0 - net.tcp_splice.timeout_synack = 120 - net.tcp_splice.timeout_listen = 120 - net.tcp_splice.timeout_lastack = 30 - net.tcp_splice.timeout_closewait = 60 - net.tcp_splice.timeout_close = 10 - net.tcp_splice.timeout_timewait = 120 - net.tcp_splice.timeout_finwait = 120 - net.tcp_splice.timeout_synrecv = 60 - net.tcp_splice.timeout_synsent = 120 - net.tcp_splice.timeout_established = 900 - -You can also consult the full session list here : - -$ head /proc/net/tcp_splice_conn -FromIP FPrt ToIP TPrt LocalIP LPrt DestIP DPrt State Expires -0A000301 4EBB 0A000302 1F40 0A000302 817B 0A000301 0050 CLOSE 7 -0A000301 4E9B 0A000302 1F40 0A000302 8165 0A000301 0050 CLOSE 7 - -Since a session exists at least in CLOSE state for 10 seconds, you just have -to consult this entry less than 10 seconds after a test to see a session. - -Please report your successes, failures, suggestions or fixes to the L7SW -mailing list here (do not use the list to report other haproxy bugs) : - - https://lists.sourceforge.net/lists/listinfo/linux-l7sw-devel - - -Motivations ------------ - -I've always wanted haproxy to be the fastest and most reliable software load -balancer available. L7SW is an opportunity to make get a huge performance boost -on high traffic sites (eg: photo sharing, streaming, ...). In turn, I find it a -shame that Alex wastes his time redevelopping a proxy as a proof of concept for -his kernel code. While it is a fun game to enter into, it really becomes harder -when you need to get close to customers' needs. So by porting haproxy early to -L7SW, I get both the opportunity to get an idea of what it will soon be capable -of, and help Alex spend more time on the complex kernel part. - -Have fun ! -Willy diff --git a/examples/tcp-splicing-sample.cfg b/examples/tcp-splicing-sample.cfg deleted file mode 100644 index 84d55a307..000000000 --- a/examples/tcp-splicing-sample.cfg +++ /dev/null @@ -1,82 +0,0 @@ -# -# This is a sample configuration -# haproxy >= 1.3.5 required. -# -# It listens on 192.168.1.10:80, and directs all requests for Host 'img' or -# URIs starting with /img or /css to a dedicated group of servers. URIs -# starting with /admin/stats are directed to a backend dedicated to statistics. -# TCP splicing is used on static objects to relieve the process from the heavy -# job. -# - -global - maxconn 10000 - log 127.0.0.1 local0 - uid 200 - gid 200 - chroot /var/empty - daemon - - -# The public 'www' address in the DMZ -frontend public - bind 192.168.1.10:80 - mode http - log global - option httplog - option dontlognull - option httpclose - option tcpsplice - monitor-uri /monitoruri - maxconn 8000 - clitimeout 30000 - - # Host: will use a specific keyword soon - reqisetbe ^Host:\ img static - - # The URI will use a specific keyword soon - reqisetbe ^[^\ ]*\ /(img|css)/ static - reqisetbe ^[^\ ]*\ /admin/stats stats - - default_backend dynamic - - -# The static backend backend for 'Host: img', /img and /css. -# TCP splicing is enabled on this backend because we don't expect to do -# anything interesting with static objects, but we know they can eat much -# bandwidth. -backend static - mode http - balance roundrobin - option tcpsplice - contimeout 5000 - srvtimeout 5000 - redispatch - retries 2 - option httpchk HEAD /favicon.ico - server statsrv1 192.168.1.8:80 check inter 1000 - server statsrv2 192.168.1.9:80 check inter 1000 - - -backend dynamic - mode http - balance roundrobin - contimeout 30000 - srvtimeout 30000 - redispatch - retries 2 - option httpchk HEAD /login.php - cookie DYNSRV insert indirect nocache - fullconn 4000 # the servers will be used at full load above this number of connections - server dynsrv1 192.168.1.1:80 minconn 50 maxconn 500 cookie s1 check inter 1000 - server dynsrv2 192.168.1.2:80 minconn 50 maxconn 500 cookie s2 check inter 1000 - server dynsrv3 192.168.1.3:80 minconn 50 maxconn 500 cookie s3 check inter 1000 - server dynsrv4 192.168.1.4:80 minconn 50 maxconn 500 cookie s4 check inter 1000 - - -backend stats - log global - mode http - stats uri / - balance roundrobin - diff --git a/include/types/global.h b/include/types/global.h index 542abfe35..5c6e98217 100644 --- a/include/types/global.h +++ b/include/types/global.h @@ -42,7 +42,6 @@ #define LSTCHK_CAP_BIND 0x00000001 /* check that we can bind to any port */ #define LSTCHK_CTTPROXY 0x00000002 /* check that tproxy is enabled */ #define LSTCHK_NETADM 0x00000004 /* check that we have CAP_NET_ADMIN */ -#define LSTCHK_TCPSPLICE 0x00000008 /* check that linux tcp_splice is enabled */ /* Global tuning options */ /* available polling mechanisms */ diff --git a/include/types/proxy.h b/include/types/proxy.h index c9bd0a9bb..19d1b044b 100644 --- a/include/types/proxy.h +++ b/include/types/proxy.h @@ -101,7 +101,7 @@ #define PR_O_TPXY_CLI 0x06000000 /* bind to the client's IP+port when connect()ing */ #define PR_O_TPXY_MASK 0x06000000 /* bind to a non-local address when connect()ing */ -#define PR_O_TCPSPLICE 0x08000000 /* delegate data transfer to linux kernel's tcp_splice */ +/* unused : tcpsplice 0x08000000 */ #define PR_O_CONTSTATS 0x10000000 /* continous counters */ #define PR_O_HTTP_PROXY 0x20000000 /* Enable session to use HTTP proxy operations */ #define PR_O_DISABLE404 0x40000000 /* Disable a server on a 404 response to a health-check */ diff --git a/src/backend.c b/src/backend.c index d6d47faee..a67d64846 100644 --- a/src/backend.c +++ b/src/backend.c @@ -44,10 +44,6 @@ #include #include -#ifdef CONFIG_HAP_TCPSPLICE -#include -#endif - static inline void fwrr_remove_from_tree(struct server *s); static inline void fwrr_queue_by_weight(struct eb_root *root, struct server *s); static inline void fwrr_dequeue_srv(struct server *s); @@ -1831,14 +1827,6 @@ int connect_server(struct session *s) return SN_ERR_PRXCOND; /* it is a configuration limit */ } -#ifdef CONFIG_HAP_TCPSPLICE - if ((global.tune.options & GTUNE_USE_SPLICE) && - (s->fe->options & s->be->options) & PR_O_TCPSPLICE) { - /* TCP splicing supported by both FE and BE */ - tcp_splice_initfd(s->req->prod->fd, fd); - } -#endif - if ((fcntl(fd, F_SETFL, O_NONBLOCK)==-1) || (setsockopt(fd, IPPROTO_TCP, TCP_NODELAY, (char *) &one, sizeof(one)) == -1)) { qfprintf(stderr,"Cannot set client socket to non blocking mode.\n"); diff --git a/src/cfgparse.c b/src/cfgparse.c index 8b3d3f183..996ddc1bf 100644 --- a/src/cfgparse.c +++ b/src/cfgparse.c @@ -116,9 +116,6 @@ static const struct cfg_opt cfg_opts[] = { "persist", PR_O_PERSIST, PR_CAP_BE, 0 }, { "redispatch", PR_O_REDISP, PR_CAP_BE, 0 }, { "srvtcpka", PR_O_TCP_SRV_KA, PR_CAP_BE, 0 }, -#ifdef CONFIG_HAP_TCPSPLICE - { "tcpsplice", PR_O_TCPSPLICE, PR_CAP_BE|PR_CAP_FE, LSTCHK_TCPSPLICE|LSTCHK_NETADM }, -#endif #ifdef TPROXY { "transparent", PR_O_TRANSP, PR_CAP_BE, 0 }, #endif diff --git a/src/haproxy.c b/src/haproxy.c index 607ff155f..a592d46f8 100644 --- a/src/haproxy.c +++ b/src/haproxy.c @@ -93,10 +93,6 @@ #include #include -#ifdef CONFIG_HAP_TCPSPLICE -#include -#endif - #ifdef CONFIG_HAP_CTTPROXY #include #endif @@ -218,7 +214,7 @@ void usage(char *name) #if defined(ENABLE_POLL) " -dp disables poll() usage even when available\n" #endif -#if defined(CONFIG_HAP_LINUX_SPLICE) || defined(CONFIG_HAP_TCPSPLICE) +#if defined(CONFIG_HAP_LINUX_SPLICE) " -dS disables splice usage (broken on old kernels)\n" #endif " -sf/-st [pid ]* finishes/terminates old pids. Must be last arguments.\n" @@ -420,7 +416,7 @@ void init(int argc, char **argv) #if defined(ENABLE_KQUEUE) global.tune.options |= GTUNE_USE_KQUEUE; #endif -#if defined(CONFIG_HAP_LINUX_SPLICE) || defined(CONFIG_HAP_TCPSPLICE) +#if defined(CONFIG_HAP_LINUX_SPLICE) global.tune.options |= GTUNE_USE_SPLICE; #endif @@ -459,7 +455,7 @@ void init(int argc, char **argv) else if (*flag == 'd' && flag[1] == 'k') global.tune.options &= ~GTUNE_USE_KQUEUE; #endif -#if defined(CONFIG_HAP_LINUX_SPLICE) || defined(CONFIG_HAP_TCPSPLICE) +#if defined(CONFIG_HAP_LINUX_SPLICE) else if (*flag == 'd' && flag[1] == 'S') global.tune.options &= ~GTUNE_USE_SPLICE; #endif @@ -1042,20 +1038,6 @@ int main(int argc, char **argv) #endif } -#ifdef CONFIG_HAP_TCPSPLICE - if ((global.tune.options & GTUNE_USE_SPLICE) && (global.last_checks & LSTCHK_TCPSPLICE)) { - if (tcp_splice_start() < 0) { - Alert("[%s.main()] Cannot enable tcp_splice.\n" - " Make sure you have enough permissions and that the module is loadable.\n" - " Alternatively, you may disable the 'tcpsplice' options in the configuration\n" - " or add 'nosplice' in the global section, or start with '-dS'.\n" - "", argv[0]); - protocol_unbind_all(); - exit(1); - } - } -#endif - #ifdef CONFIG_HAP_CTTPROXY if (global.last_checks & LSTCHK_CTTPROXY) { int ret; diff --git a/src/proto_http.c b/src/proto_http.c index a3948f51f..0fb759a32 100644 --- a/src/proto_http.c +++ b/src/proto_http.c @@ -56,10 +56,6 @@ #include #include -#ifdef CONFIG_HAP_TCPSPLICE -#include -#endif - /* This is used by remote monitoring */ const char HTTP_200[] = "HTTP/1.0 200 OK\r\n" @@ -3194,12 +3190,6 @@ int process_response(struct session *t) buffer_set_rlim(rep, BUFSIZE); /* no more rewrite needed */ t->logs.t_data = tv_ms_elapsed(&t->logs.tv_accept, &now); -#ifdef CONFIG_HAP_TCPSPLICE - if ((t->fe->options & t->be->options) & PR_O_TCPSPLICE) { - /* TCP splicing supported by both FE and BE */ - tcp_splice_splicefd(rep->cons->fd, rep->prod->fd, 0); - } -#endif /* if the user wants to log as soon as possible, without counting * bytes from the server, then this is the right moment. We have * to temporarily assign bytes_out to log what we currently have. diff --git a/src/session.c b/src/session.c index d116ef216..5213dcdc2 100644 --- a/src/session.c +++ b/src/session.c @@ -35,10 +35,6 @@ #include #include -#ifdef CONFIG_HAP_TCPSPLICE -#include -#endif - struct pool_head *pool2_session; struct list sessions; @@ -331,13 +327,6 @@ void sess_establish(struct session *s, struct stream_interface *si) s->logs.t_close = s->logs.t_connect; /* to get a valid end date */ s->do_log(s); } -#ifdef CONFIG_HAP_TCPSPLICE - if ((global.tune.options & GTUNE_USE_SPLICE) && - (s->fe->options & s->be->options) & PR_O_TCPSPLICE) { - /* TCP splicing supported by both FE and BE */ - tcp_splice_splicefd(req->prod->fd, si->fd, 0); - } -#endif } else { rep->analysers |= AN_RTR_HTTP_HDR;