mirror of
https://git.haproxy.org/git/haproxy.git/
synced 2025-08-06 15:17:01 +02:00
MEDIUM: ring: improve speed in the queue waiting loop on x86_64
x86_64 doesn't have a native atomic FETCH_OR(), it's implemented using a CAS, which will always cause a write cycle. Here we know we can just wait as long as the lock bit is held so better loop on a load, and only attempt the CAS on success. This requires a tiny ifdef and brings nice benefits. This brings the performance back from 3.33M to 3.75M at 24C48T while doing no change at 3C6T.
This commit is contained in:
parent
30a659c355
commit
573bbbe127
14
src/ring.c
14
src/ring.c
@ -281,11 +281,15 @@ ssize_t ring_write(struct ring *ring, size_t maxlen, const struct ist pfx[], siz
|
||||
if (next_cell != &cell)
|
||||
goto wait_for_flush; // FIXME: another thread arrived, we should go to wait now
|
||||
__ha_cpu_relax_for_read();
|
||||
|
||||
tail_ofs = HA_ATOMIC_FETCH_OR(tail_ptr, RING_TAIL_LOCK);
|
||||
if (!(tail_ofs & RING_TAIL_LOCK))
|
||||
break;
|
||||
|
||||
#if defined(__x86_64__)
|
||||
/* x86 prefers a read first */
|
||||
if (!(HA_ATOMIC_LOAD(tail_ptr) & RING_TAIL_LOCK))
|
||||
#endif
|
||||
{
|
||||
tail_ofs = HA_ATOMIC_FETCH_OR(tail_ptr, RING_TAIL_LOCK);
|
||||
if (!(tail_ofs & RING_TAIL_LOCK))
|
||||
break;
|
||||
}
|
||||
__ha_cpu_relax_for_read();
|
||||
}
|
||||
/* OK the queue is locked, let's attempt to get the tail lock */
|
||||
|
Loading…
Reference in New Issue
Block a user