mirror of
https://git.haproxy.org/git/haproxy.git/
synced 2025-09-24 15:21:29 +02:00
MEDIUM: listener: make the accept function more robust against pauses
During some tests in multi-process mode under Linux, it appeared that issuing "disable frontend foo" on the CLI to pause a listener would make the shutdown(read) of certain processes disturb another process listening on the same socket, resulting in a 100% CPU loop. What happens is that accept() returns EAGAIN without accepting anything. Fortunately, we see that epoll_wait() reports EPOLLIN+EPOLLRDHUP (likely because the FD points to the same file in the kernel), so we can use that to stop the other process from trying to accept connections for a short time and try again later, hoping for the situation to change. We must not disable the FD otherwise there's no way to re-enable it. Additionally, during these tests, a loop was encountered on EINVAL which was not caught. Now if we catch an EINVAL, we proceed the same way, in case the socket is re-enabled later.
This commit is contained in:
parent
2a83111cee
commit
bb66030a30
@ -257,6 +257,7 @@ void listener_accept(int fd)
|
||||
struct listener *l = fdtab[fd].owner;
|
||||
struct proxy *p = l->frontend;
|
||||
int max_accept = l->maxaccept ? l->maxaccept : 1;
|
||||
int expire;
|
||||
int cfd;
|
||||
int ret;
|
||||
#ifdef USE_ACCEPT4
|
||||
@ -270,14 +271,11 @@ void listener_accept(int fd)
|
||||
|
||||
if (!(l->options & LI_O_UNLIMITED) && global.sps_lim) {
|
||||
int max = freq_ctr_remain(&global.sess_per_sec, global.sps_lim, 0);
|
||||
int expire;
|
||||
|
||||
if (unlikely(!max)) {
|
||||
/* frontend accept rate limit was reached */
|
||||
limit_listener(l, &global_listener_queue);
|
||||
expire = tick_add(now_ms, next_event_delay(&global.sess_per_sec, global.sps_lim, 0));
|
||||
task_schedule(global_listener_queue_task, tick_first(expire, global_listener_queue_task->expire));
|
||||
return;
|
||||
goto wait_expire;
|
||||
}
|
||||
|
||||
if (max_accept > max)
|
||||
@ -286,14 +284,11 @@ void listener_accept(int fd)
|
||||
|
||||
if (!(l->options & LI_O_UNLIMITED) && global.cps_lim) {
|
||||
int max = freq_ctr_remain(&global.conn_per_sec, global.cps_lim, 0);
|
||||
int expire;
|
||||
|
||||
if (unlikely(!max)) {
|
||||
/* frontend accept rate limit was reached */
|
||||
limit_listener(l, &global_listener_queue);
|
||||
expire = tick_add(now_ms, next_event_delay(&global.conn_per_sec, global.cps_lim, 0));
|
||||
task_schedule(global_listener_queue_task, tick_first(expire, global_listener_queue_task->expire));
|
||||
return;
|
||||
goto wait_expire;
|
||||
}
|
||||
|
||||
if (max_accept > max)
|
||||
@ -302,14 +297,11 @@ void listener_accept(int fd)
|
||||
#ifdef USE_OPENSSL
|
||||
if (!(l->options & LI_O_UNLIMITED) && global.ssl_lim && l->bind_conf && l->bind_conf->is_ssl) {
|
||||
int max = freq_ctr_remain(&global.ssl_per_sec, global.ssl_lim, 0);
|
||||
int expire;
|
||||
|
||||
if (unlikely(!max)) {
|
||||
/* frontend accept rate limit was reached */
|
||||
limit_listener(l, &global_listener_queue);
|
||||
expire = tick_add(now_ms, next_event_delay(&global.ssl_per_sec, global.ssl_lim, 0));
|
||||
task_schedule(global_listener_queue_task, tick_first(expire, global_listener_queue_task->expire));
|
||||
return;
|
||||
goto wait_expire;
|
||||
}
|
||||
|
||||
if (max_accept > max)
|
||||
@ -365,8 +357,20 @@ void listener_accept(int fd)
|
||||
if (unlikely(cfd == -1)) {
|
||||
switch (errno) {
|
||||
case EAGAIN:
|
||||
if (fdtab[fd].ev & FD_POLL_HUP) {
|
||||
/* the listening socket might have been disabled in a shared
|
||||
* process and we're a collateral victim. We'll just pause for
|
||||
* a while in case it comes back. In the mean time, we need to
|
||||
* clear this sticky flag.
|
||||
*/
|
||||
fdtab[fd].ev &= ~FD_POLL_HUP;
|
||||
goto transient_error;
|
||||
}
|
||||
fd_cant_recv(fd);
|
||||
return; /* nothing more to accept */
|
||||
case EINVAL:
|
||||
/* might be trying to accept on a shut fd (eg: soft stop) */
|
||||
goto transient_error;
|
||||
case EINTR:
|
||||
case ECONNABORTED:
|
||||
continue;
|
||||
@ -375,26 +379,20 @@ void listener_accept(int fd)
|
||||
send_log(p, LOG_EMERG,
|
||||
"Proxy %s reached system FD limit at %d. Please check system tunables.\n",
|
||||
p->id, maxfd);
|
||||
limit_listener(l, &global_listener_queue);
|
||||
task_schedule(global_listener_queue_task, tick_add(now_ms, 100)); /* try again in 100 ms */
|
||||
return;
|
||||
goto transient_error;
|
||||
case EMFILE:
|
||||
if (p)
|
||||
send_log(p, LOG_EMERG,
|
||||
"Proxy %s reached process FD limit at %d. Please check 'ulimit-n' and restart.\n",
|
||||
p->id, maxfd);
|
||||
limit_listener(l, &global_listener_queue);
|
||||
task_schedule(global_listener_queue_task, tick_add(now_ms, 100)); /* try again in 100 ms */
|
||||
return;
|
||||
goto transient_error;
|
||||
case ENOBUFS:
|
||||
case ENOMEM:
|
||||
if (p)
|
||||
send_log(p, LOG_EMERG,
|
||||
"Proxy %s reached system memory limit at %d sockets. Please check system tunables.\n",
|
||||
p->id, maxfd);
|
||||
limit_listener(l, &global_listener_queue);
|
||||
task_schedule(global_listener_queue_task, tick_add(now_ms, 100)); /* try again in 100 ms */
|
||||
return;
|
||||
goto transient_error;
|
||||
default:
|
||||
/* unexpected result, let's give up and let other tasks run */
|
||||
goto stop;
|
||||
@ -442,9 +440,7 @@ void listener_accept(int fd)
|
||||
if (ret == 0) /* successful termination */
|
||||
continue;
|
||||
|
||||
limit_listener(l, &global_listener_queue);
|
||||
task_schedule(global_listener_queue_task, tick_add(now_ms, 100)); /* try again in 100 ms */
|
||||
return;
|
||||
goto transient_error;
|
||||
}
|
||||
|
||||
if (l->nbconn >= l->maxconn) {
|
||||
@ -473,6 +469,15 @@ void listener_accept(int fd)
|
||||
stop:
|
||||
fd_done_recv(fd);
|
||||
return;
|
||||
|
||||
transient_error:
|
||||
/* pause the listener and try again in 100 ms */
|
||||
expire = tick_add(now_ms, 100);
|
||||
|
||||
wait_expire:
|
||||
limit_listener(l, &global_listener_queue);
|
||||
task_schedule(global_listener_queue_task, tick_first(expire, global_listener_queue_task->expire));
|
||||
return;
|
||||
}
|
||||
|
||||
/*
|
||||
|
Loading…
x
Reference in New Issue
Block a user