mirror of
https://git.haproxy.org/git/haproxy.git/
synced 2025-09-20 13:21:29 +02:00
OPTIM: sink: reduce contention on sink_announce_dropped()
perf top shows that sink_announce_dropped() consumes most of the CPU on a 128-thread x86 system. Digging further reveals that the atomic fetch_or() on the dropped field used to detect the presence of another thread is entirely responsible for this. Indeed, the compiler implements it using a CAS that loops without relaxing and makes all threads wait until they can synchronize on this one, only to discover later that another thread is there and they need to give up. Let's just replace this with a hand-crafted CAS loop that will detect *before* attempting the CAS if another thread is there. Doing so achieves the same goal without forcing threads to agree. With this simple change, the sustained request rate on h1 with all traces on bumped from 110k/s to 244k/s! This should be backported to stable releases where it's often needed to help debugging.
This commit is contained in:
parent
361c227465
commit
4431e3bd26
@ -298,11 +298,13 @@ int sink_announce_dropped(struct sink *sink, struct log_header hdr)
|
|||||||
* another thread is already on them and we can just pass and
|
* another thread is already on them and we can just pass and
|
||||||
* count another drop (hence add 2).
|
* count another drop (hence add 2).
|
||||||
*/
|
*/
|
||||||
dropped = HA_ATOMIC_FETCH_OR(&sink->ctx.dropped, 1);
|
dropped = HA_ATOMIC_LOAD(&sink->ctx.dropped);
|
||||||
|
do {
|
||||||
if (dropped & 1) {
|
if (dropped & 1) {
|
||||||
/* another thread was already on it */
|
/* another thread was already on it */
|
||||||
goto leave;
|
goto leave;
|
||||||
}
|
}
|
||||||
|
} while (!_HA_ATOMIC_CAS(&sink->ctx.dropped, &dropped, dropped | 1));
|
||||||
|
|
||||||
last_dropped = 0;
|
last_dropped = 0;
|
||||||
dropped >>= 1;
|
dropped >>= 1;
|
||||||
|
Loading…
x
Reference in New Issue
Block a user