From 1e56c70cc963177b8990edcb88a2896077de4c51 Mon Sep 17 00:00:00 2001 From: Willy Tarreau Date: Thu, 7 Mar 2019 18:44:12 +0100 Subject: [PATCH] OPTIM: task: limit the impact of memory barriers in taks_remove_from_task_list() In this function we end up with successive locked operations then a store barrier, and in addition the compiler has to emit less efficient code due to a longer jump. There's no need for absolutely updating the tasks_run_queue counter before clearing the task's leaf pointer, so let's swap the two operations and benefit from a single barrier as much as possible. This code is on the hot path and shows about half a percent of improvement with 8 threads. --- include/proto/task.h | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/include/proto/task.h b/include/proto/task.h index c90a36982..f11b445a0 100644 --- a/include/proto/task.h +++ b/include/proto/task.h @@ -273,11 +273,9 @@ static inline void task_remove_from_task_list(struct task *t) { LIST_DEL_INIT(&((struct tasklet *)t)->list); task_per_thread[tid].task_list_size--; + if (!TASK_IS_TASKLET(t)) + HA_ATOMIC_STORE(&t->rq.node.leaf_p, NULL); // was 0x1 HA_ATOMIC_SUB(&tasks_run_queue, 1); - if (!TASK_IS_TASKLET(t)) { - t->rq.node.leaf_p = NULL; // was 0x1 - __ha_barrier_store(); - } } /*