mirror of
https://git.haproxy.org/git/haproxy.git/
synced 2025-09-24 15:21:29 +02:00
MEDIUM: listener: make the accept function more robust against pauses
During some tests in multi-process mode under Linux, it appeared that issuing "disable frontend foo" on the CLI to pause a listener would make the shutdown(read) of certain processes disturb another process listening on the same socket, resulting in a 100% CPU loop. What happens is that accept() returns EAGAIN without accepting anything. Fortunately, we see that epoll_wait() reports EPOLLIN+EPOLLRDHUP (likely because the FD points to the same file in the kernel), so we can use that to stop the other process from trying to accept connections for a short time and try again later, hoping for the situation to change. We must not disable the FD otherwise there's no way to re-enable it. Additionally, during these tests, a loop was encountered on EINVAL which was not caught. Now if we catch an EINVAL, we proceed the same way, in case the socket is re-enabled later.
This commit is contained in:
parent
2a83111cee
commit
bb66030a30
@ -257,6 +257,7 @@ void listener_accept(int fd)
|
|||||||
struct listener *l = fdtab[fd].owner;
|
struct listener *l = fdtab[fd].owner;
|
||||||
struct proxy *p = l->frontend;
|
struct proxy *p = l->frontend;
|
||||||
int max_accept = l->maxaccept ? l->maxaccept : 1;
|
int max_accept = l->maxaccept ? l->maxaccept : 1;
|
||||||
|
int expire;
|
||||||
int cfd;
|
int cfd;
|
||||||
int ret;
|
int ret;
|
||||||
#ifdef USE_ACCEPT4
|
#ifdef USE_ACCEPT4
|
||||||
@ -270,14 +271,11 @@ void listener_accept(int fd)
|
|||||||
|
|
||||||
if (!(l->options & LI_O_UNLIMITED) && global.sps_lim) {
|
if (!(l->options & LI_O_UNLIMITED) && global.sps_lim) {
|
||||||
int max = freq_ctr_remain(&global.sess_per_sec, global.sps_lim, 0);
|
int max = freq_ctr_remain(&global.sess_per_sec, global.sps_lim, 0);
|
||||||
int expire;
|
|
||||||
|
|
||||||
if (unlikely(!max)) {
|
if (unlikely(!max)) {
|
||||||
/* frontend accept rate limit was reached */
|
/* frontend accept rate limit was reached */
|
||||||
limit_listener(l, &global_listener_queue);
|
|
||||||
expire = tick_add(now_ms, next_event_delay(&global.sess_per_sec, global.sps_lim, 0));
|
expire = tick_add(now_ms, next_event_delay(&global.sess_per_sec, global.sps_lim, 0));
|
||||||
task_schedule(global_listener_queue_task, tick_first(expire, global_listener_queue_task->expire));
|
goto wait_expire;
|
||||||
return;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
if (max_accept > max)
|
if (max_accept > max)
|
||||||
@ -286,14 +284,11 @@ void listener_accept(int fd)
|
|||||||
|
|
||||||
if (!(l->options & LI_O_UNLIMITED) && global.cps_lim) {
|
if (!(l->options & LI_O_UNLIMITED) && global.cps_lim) {
|
||||||
int max = freq_ctr_remain(&global.conn_per_sec, global.cps_lim, 0);
|
int max = freq_ctr_remain(&global.conn_per_sec, global.cps_lim, 0);
|
||||||
int expire;
|
|
||||||
|
|
||||||
if (unlikely(!max)) {
|
if (unlikely(!max)) {
|
||||||
/* frontend accept rate limit was reached */
|
/* frontend accept rate limit was reached */
|
||||||
limit_listener(l, &global_listener_queue);
|
|
||||||
expire = tick_add(now_ms, next_event_delay(&global.conn_per_sec, global.cps_lim, 0));
|
expire = tick_add(now_ms, next_event_delay(&global.conn_per_sec, global.cps_lim, 0));
|
||||||
task_schedule(global_listener_queue_task, tick_first(expire, global_listener_queue_task->expire));
|
goto wait_expire;
|
||||||
return;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
if (max_accept > max)
|
if (max_accept > max)
|
||||||
@ -302,14 +297,11 @@ void listener_accept(int fd)
|
|||||||
#ifdef USE_OPENSSL
|
#ifdef USE_OPENSSL
|
||||||
if (!(l->options & LI_O_UNLIMITED) && global.ssl_lim && l->bind_conf && l->bind_conf->is_ssl) {
|
if (!(l->options & LI_O_UNLIMITED) && global.ssl_lim && l->bind_conf && l->bind_conf->is_ssl) {
|
||||||
int max = freq_ctr_remain(&global.ssl_per_sec, global.ssl_lim, 0);
|
int max = freq_ctr_remain(&global.ssl_per_sec, global.ssl_lim, 0);
|
||||||
int expire;
|
|
||||||
|
|
||||||
if (unlikely(!max)) {
|
if (unlikely(!max)) {
|
||||||
/* frontend accept rate limit was reached */
|
/* frontend accept rate limit was reached */
|
||||||
limit_listener(l, &global_listener_queue);
|
|
||||||
expire = tick_add(now_ms, next_event_delay(&global.ssl_per_sec, global.ssl_lim, 0));
|
expire = tick_add(now_ms, next_event_delay(&global.ssl_per_sec, global.ssl_lim, 0));
|
||||||
task_schedule(global_listener_queue_task, tick_first(expire, global_listener_queue_task->expire));
|
goto wait_expire;
|
||||||
return;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
if (max_accept > max)
|
if (max_accept > max)
|
||||||
@ -365,8 +357,20 @@ void listener_accept(int fd)
|
|||||||
if (unlikely(cfd == -1)) {
|
if (unlikely(cfd == -1)) {
|
||||||
switch (errno) {
|
switch (errno) {
|
||||||
case EAGAIN:
|
case EAGAIN:
|
||||||
|
if (fdtab[fd].ev & FD_POLL_HUP) {
|
||||||
|
/* the listening socket might have been disabled in a shared
|
||||||
|
* process and we're a collateral victim. We'll just pause for
|
||||||
|
* a while in case it comes back. In the mean time, we need to
|
||||||
|
* clear this sticky flag.
|
||||||
|
*/
|
||||||
|
fdtab[fd].ev &= ~FD_POLL_HUP;
|
||||||
|
goto transient_error;
|
||||||
|
}
|
||||||
fd_cant_recv(fd);
|
fd_cant_recv(fd);
|
||||||
return; /* nothing more to accept */
|
return; /* nothing more to accept */
|
||||||
|
case EINVAL:
|
||||||
|
/* might be trying to accept on a shut fd (eg: soft stop) */
|
||||||
|
goto transient_error;
|
||||||
case EINTR:
|
case EINTR:
|
||||||
case ECONNABORTED:
|
case ECONNABORTED:
|
||||||
continue;
|
continue;
|
||||||
@ -375,26 +379,20 @@ void listener_accept(int fd)
|
|||||||
send_log(p, LOG_EMERG,
|
send_log(p, LOG_EMERG,
|
||||||
"Proxy %s reached system FD limit at %d. Please check system tunables.\n",
|
"Proxy %s reached system FD limit at %d. Please check system tunables.\n",
|
||||||
p->id, maxfd);
|
p->id, maxfd);
|
||||||
limit_listener(l, &global_listener_queue);
|
goto transient_error;
|
||||||
task_schedule(global_listener_queue_task, tick_add(now_ms, 100)); /* try again in 100 ms */
|
|
||||||
return;
|
|
||||||
case EMFILE:
|
case EMFILE:
|
||||||
if (p)
|
if (p)
|
||||||
send_log(p, LOG_EMERG,
|
send_log(p, LOG_EMERG,
|
||||||
"Proxy %s reached process FD limit at %d. Please check 'ulimit-n' and restart.\n",
|
"Proxy %s reached process FD limit at %d. Please check 'ulimit-n' and restart.\n",
|
||||||
p->id, maxfd);
|
p->id, maxfd);
|
||||||
limit_listener(l, &global_listener_queue);
|
goto transient_error;
|
||||||
task_schedule(global_listener_queue_task, tick_add(now_ms, 100)); /* try again in 100 ms */
|
|
||||||
return;
|
|
||||||
case ENOBUFS:
|
case ENOBUFS:
|
||||||
case ENOMEM:
|
case ENOMEM:
|
||||||
if (p)
|
if (p)
|
||||||
send_log(p, LOG_EMERG,
|
send_log(p, LOG_EMERG,
|
||||||
"Proxy %s reached system memory limit at %d sockets. Please check system tunables.\n",
|
"Proxy %s reached system memory limit at %d sockets. Please check system tunables.\n",
|
||||||
p->id, maxfd);
|
p->id, maxfd);
|
||||||
limit_listener(l, &global_listener_queue);
|
goto transient_error;
|
||||||
task_schedule(global_listener_queue_task, tick_add(now_ms, 100)); /* try again in 100 ms */
|
|
||||||
return;
|
|
||||||
default:
|
default:
|
||||||
/* unexpected result, let's give up and let other tasks run */
|
/* unexpected result, let's give up and let other tasks run */
|
||||||
goto stop;
|
goto stop;
|
||||||
@ -442,9 +440,7 @@ void listener_accept(int fd)
|
|||||||
if (ret == 0) /* successful termination */
|
if (ret == 0) /* successful termination */
|
||||||
continue;
|
continue;
|
||||||
|
|
||||||
limit_listener(l, &global_listener_queue);
|
goto transient_error;
|
||||||
task_schedule(global_listener_queue_task, tick_add(now_ms, 100)); /* try again in 100 ms */
|
|
||||||
return;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
if (l->nbconn >= l->maxconn) {
|
if (l->nbconn >= l->maxconn) {
|
||||||
@ -473,6 +469,15 @@ void listener_accept(int fd)
|
|||||||
stop:
|
stop:
|
||||||
fd_done_recv(fd);
|
fd_done_recv(fd);
|
||||||
return;
|
return;
|
||||||
|
|
||||||
|
transient_error:
|
||||||
|
/* pause the listener and try again in 100 ms */
|
||||||
|
expire = tick_add(now_ms, 100);
|
||||||
|
|
||||||
|
wait_expire:
|
||||||
|
limit_listener(l, &global_listener_queue);
|
||||||
|
task_schedule(global_listener_queue_task, tick_first(expire, global_listener_queue_task->expire));
|
||||||
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
Loading…
x
Reference in New Issue
Block a user