mirror of
https://git.haproxy.org/git/haproxy.git/
synced 2025-09-20 13:21:29 +02:00
OPTIM: ring: avoid reloading the tail_ofs value before the CAS in ring_write()
The load followed by the CAS seem to cause two bus cycles, one to retrieve the cache line in shared state and a second one to get exclusive ownership of it. Tests show that on x86 it's much better to just rely on the previous value and preset it to zero before entering the loop. We just mask the ring lock in case of failure so as to challenge it on next iteration and that's done. This little change brings 2.3% extra performance (11.34M msg/s) on a 64-core AMD.
This commit is contained in:
parent
a727c6eaa5
commit
79ef362d9e
@ -274,6 +274,7 @@ ssize_t ring_write(struct ring *ring, size_t maxlen, const struct ist pfx[], siz
|
||||
* threads check the tail.
|
||||
*/
|
||||
|
||||
tail_ofs = 0;
|
||||
while (1) {
|
||||
#if defined(__x86_64__)
|
||||
/* read using a CAS on x86, as it will keep the cache line
|
||||
@ -300,11 +301,9 @@ ssize_t ring_write(struct ring *ring, size_t maxlen, const struct ist pfx[], siz
|
||||
if (!(tail_ofs & RING_TAIL_LOCK))
|
||||
break;
|
||||
#else
|
||||
tail_ofs = HA_ATOMIC_LOAD(tail_ptr);
|
||||
if (likely(!(tail_ofs & RING_TAIL_LOCK))) {
|
||||
if (HA_ATOMIC_CAS(tail_ptr, &tail_ofs, tail_ofs | RING_TAIL_LOCK))
|
||||
break;
|
||||
}
|
||||
if (HA_ATOMIC_CAS(tail_ptr, &tail_ofs, tail_ofs | RING_TAIL_LOCK))
|
||||
break;
|
||||
tail_ofs &= ~RING_TAIL_LOCK;
|
||||
#endif
|
||||
__ha_cpu_relax();
|
||||
}
|
||||
|
Loading…
x
Reference in New Issue
Block a user