CLEANUP: task: split the large tasklet_wakeup_on() function in two

This function has become large with the multi-queue scheduler. We need to keep the fast path and the debugging parts inlined, but the rest now moves to task.c just like was done for task_wakeup(). This has reduced the code size by 6kB due to less inlining of large parts that are always context-dependent, and as a side effect, has increased the overall performance by 1%.
2025-11-25 21:01:09 +01:00 · 2021-02-24 17:51:38 +01:00 · 2021-02-24 17:51:38 +01:00 · 9c6dbf0eea
commit 9c6dbf0eea
parent 955a11ebfa
2 changed files with 40 additions and 30 deletions
--- a/include/haproxy/task.h
+++ b/include/haproxy/task.h
@ -105,6 +105,7 @@ extern struct task_per_thread task_per_thread[MAX_THREADS];
 __decl_thread(extern HA_SPINLOCK_T rq_lock);  /* spin lock related to run queue */
 __decl_thread(extern HA_RWLOCK_T wq_lock);    /* RW lock related to the wait queue */

+void __tasklet_wakeup_on(struct tasklet *tl, int thr);
 void task_kill(struct task *t);
 void __task_wakeup(struct task *t);
 void __task_queue(struct task *task, struct eb_root *wq);
@ -375,36 +376,7 @@ static inline void _tasklet_wakeup_on(struct tasklet *tl, int thr, const char *f
 	tl->debug.caller_file[tl->debug.caller_idx] = file;
 	tl->debug.caller_line[tl->debug.caller_idx] = line;
 #endif
-
-	if (likely(thr < 0)) {
-		/* this tasklet runs on the caller thread */
-		if (tl->state & TASK_SELF_WAKING) {
-			LIST_ADDQ(&sched->tasklets[TL_BULK], &tl->list);
-			sched->tl_class_mask |= 1 << TL_BULK;
-		}
-		else if ((struct task *)tl == sched->current) {
-			_HA_ATOMIC_OR(&tl->state, TASK_SELF_WAKING);
-			LIST_ADDQ(&sched->tasklets[TL_BULK], &tl->list);
-			sched->tl_class_mask |= 1 << TL_BULK;
-		}
-		else if (sched->current_queue < 0) {
-			LIST_ADDQ(&sched->tasklets[TL_URGENT], &tl->list);
-			sched->tl_class_mask |= 1 << TL_URGENT;
-		}
-		else {
-			LIST_ADDQ(&sched->tasklets[sched->current_queue], &tl->list);
-			sched->tl_class_mask |= 1 << sched->current_queue;
-		}
-		_HA_ATOMIC_ADD(&sched->rq_total, 1);
-	} else {
-		/* this tasklet runs on a specific thread. */
-		MT_LIST_ADDQ(&task_per_thread[thr].shared_tasklet_list, (struct mt_list *)&tl->list);
-		_HA_ATOMIC_ADD(&task_per_thread[thr].rq_total, 1);
-		if (sleeping_thread_mask & (1UL << thr)) {
-			_HA_ATOMIC_AND(&sleeping_thread_mask, ~(1UL << thr));
-			wake_thread(thr);
-		}
-	}
+	__tasklet_wakeup_on(tl, thr);
 }

 /* schedules tasklet <tl> to run onto the thread designated by tl->tid, which
--- a/src/task.c
+++ b/src/task.c
@ -105,6 +105,44 @@ void task_kill(struct task *t)
 	}
 }

+/* Do not call this one, please use tasklet_wakeup_on() instead, as this one is
+ * the slow path of tasklet_wakeup_on() which performs some preliminary checks
+ * and sets TASK_IN_LIST before calling this one. A negative <thr> designates
+ * the current thread.
+ */
+void __tasklet_wakeup_on(struct tasklet *tl, int thr)
+{
+	if (likely(thr < 0)) {
+		/* this tasklet runs on the caller thread */
+		if (tl->state & TASK_SELF_WAKING) {
+			LIST_ADDQ(&sched->tasklets[TL_BULK], &tl->list);
+			sched->tl_class_mask |= 1 << TL_BULK;
+		}
+		else if ((struct task *)tl == sched->current) {
+			_HA_ATOMIC_OR(&tl->state, TASK_SELF_WAKING);
+			LIST_ADDQ(&sched->tasklets[TL_BULK], &tl->list);
+			sched->tl_class_mask |= 1 << TL_BULK;
+		}
+		else if (sched->current_queue < 0) {
+			LIST_ADDQ(&sched->tasklets[TL_URGENT], &tl->list);
+			sched->tl_class_mask |= 1 << TL_URGENT;
+		}
+		else {
+			LIST_ADDQ(&sched->tasklets[sched->current_queue], &tl->list);
+			sched->tl_class_mask |= 1 << sched->current_queue;
+		}
+		_HA_ATOMIC_ADD(&sched->rq_total, 1);
+	} else {
+		/* this tasklet runs on a specific thread. */
+		MT_LIST_ADDQ(&task_per_thread[thr].shared_tasklet_list, (struct mt_list *)&tl->list);
+		_HA_ATOMIC_ADD(&task_per_thread[thr].rq_total, 1);
+		if (sleeping_thread_mask & (1UL << thr)) {
+			_HA_ATOMIC_AND(&sleeping_thread_mask, ~(1UL << thr));
+			wake_thread(thr);
+		}
+	}
+}
+
 /* Puts the task <t> in run queue at a position depending on t->nice. <t> is
 * returned. The nice value assigns boosts in 32th of the run queue size. A
 * nice value of -1024 sets the task to -tasks_run_queue*32, while a nice value