BUG/MEDIUM: checks: don't needlessly take the server lock in health_adjust()

The server lock was taken preventively for anything in health_adjust(),
including the static config checks needed to detect that the lock was not
needed, while the function is always called on the response path to update
a server's status. This was responsible for huge contention causing a
performance drop of about 17% on 16 threads. Let's move the lock only
where it should be, i.e. inside the function around the critical sections
only. By doing this, a 16-thread process jumped back from 575 to 675 krps.

This should be backported to 2.3 as the situation degraded there, and
maybe later to 2.2.
This commit is contained in:
Willy Tarreau 2021-02-17 15:20:19 +01:00
parent 64ba5ebadc
commit 4e9df2737d
2 changed files with 14 additions and 13 deletions

View File

@ -60,15 +60,11 @@ void set_srv_agent_port(struct server *srv, int port);
*/ */
static inline void health_adjust(struct server *s, short status) static inline void health_adjust(struct server *s, short status)
{ {
HA_SPIN_LOCK(SERVER_LOCK, &s->lock);
/* return now if observing nor health check is not enabled */ /* return now if observing nor health check is not enabled */
if (!s->observe || !s->check.task) { if (!s->observe || !s->check.task)
HA_SPIN_UNLOCK(SERVER_LOCK, &s->lock);
return; return;
}
__health_adjust(s, status); __health_adjust(s, status);
HA_SPIN_UNLOCK(SERVER_LOCK, &s->lock);
} }
#endif /* _HAPROXY_CHECKS_H */ #endif /* _HAPROXY_CHECKS_H */

View File

@ -404,7 +404,7 @@ void check_notify_stopping(struct check *check)
} }
/* note: use health_adjust() only, which first checks that the observe mode is /* note: use health_adjust() only, which first checks that the observe mode is
* enabled. * enabled. This will take the server lock if needed.
*/ */
void __health_adjust(struct server *s, short status) void __health_adjust(struct server *s, short status)
{ {
@ -444,6 +444,13 @@ void __health_adjust(struct server *s, short status)
chunk_printf(&trash, "Detected %d consecutive errors, last one was: %s", chunk_printf(&trash, "Detected %d consecutive errors, last one was: %s",
s->consecutive_errors, get_analyze_status(status)); s->consecutive_errors, get_analyze_status(status));
if (s->check.fastinter)
expire = tick_add(now_ms, MS_TO_TICKS(s->check.fastinter));
else
expire = TICK_ETERNITY;
HA_SPIN_LOCK(SERVER_LOCK, &s->lock);
switch (s->onerror) { switch (s->onerror) {
case HANA_ONERR_FASTINTER: case HANA_ONERR_FASTINTER:
/* force fastinter - nothing to do here as all modes force it */ /* force fastinter - nothing to do here as all modes force it */
@ -476,16 +483,14 @@ void __health_adjust(struct server *s, short status)
break; break;
} }
HA_SPIN_UNLOCK(SERVER_LOCK, &s->lock);
s->consecutive_errors = 0; s->consecutive_errors = 0;
_HA_ATOMIC_ADD(&s->counters.failed_hana, 1); _HA_ATOMIC_ADD(&s->counters.failed_hana, 1);
if (s->check.fastinter) { if (tick_is_lt(expire, s->check.task->expire)) {
expire = tick_add(now_ms, MS_TO_TICKS(s->check.fastinter)); /* requeue check task with new expire */
if (tick_is_lt(expire, s->check.task->expire)) { task_schedule(s->check.task, expire);
s->check.task->expire = expire;
/* requeue check task with new expire */
task_queue(s->check.task);
}
} }
} }