diff --git a/include/haproxy/task-t.h b/include/haproxy/task-t.h index c46cdcc65..632f4613a 100644 --- a/include/haproxy/task-t.h +++ b/include/haproxy/task-t.h @@ -58,7 +58,8 @@ enum { TL_URGENT = 0, /* urgent tasklets (I/O callbacks) */ TL_NORMAL = 1, /* normal tasks */ - TL_BULK = 2, /* bulk task/tasklets, streaming I/Os */ + TL_BULK = 2, /* bulk task/tasklets, streaming I/Os */ + TL_HEAVY = 3, /* heavy computational tasklets (e.g. TLS handshakes) */ TL_CLASSES /* must be last */ }; @@ -73,18 +74,25 @@ struct notification { /* force to split per-thread stuff into separate cache lines */ struct task_per_thread { + // first and second cache lines on 64 bits: thread-local operations only. struct eb_root timers; /* tree constituting the per-thread wait queue */ struct eb_root rqueue; /* tree constituting the per-thread run queue */ - struct mt_list shared_tasklet_list; /* Tasklet to be run, woken up by other threads */ - struct list tasklets[TL_CLASSES]; /* tasklets (and/or tasks) to run, by class */ - unsigned int rqueue_ticks; /* Insertion counter for the run queue */ - int tasks_in_list; /* Number of tasks in the per-thread tasklets list */ - int current_queue; /* points to current tasklet list being run, -1 if none */ - unsigned int rq_total; /* total size of the run queue, prio_tree + tasklets */ struct task *current; /* current task (not tasklet) */ + unsigned int rqueue_ticks; /* Insertion counter for the run queue */ + int current_queue; /* points to current tasklet list being run, -1 if none */ unsigned int nb_tasks; /* number of tasks allocated on this thread */ uint8_t tl_class_mask; /* bit mask of non-empty tasklets classes */ - __attribute__((aligned(64))) char end[0]; + + // 11 bytes hole here + ALWAYS_ALIGN(2*sizeof(void*)); + struct list tasklets[TL_CLASSES]; /* tasklets (and/or tasks) to run, by class */ + + // third cache line here on 64 bits: accessed mostly using atomic ops + ALWAYS_ALIGN(64); + struct mt_list shared_tasklet_list; /* Tasklet to be run, woken up by other threads */ + unsigned int rq_total; /* total size of the run queue, prio_tree + tasklets */ + int tasks_in_list; /* Number of tasks in the per-thread tasklets list */ + ALWAYS_ALIGN(128); }; diff --git a/src/task.c b/src/task.c index 1cb8352f9..1b1738def 100644 --- a/src/task.c +++ b/src/task.c @@ -635,6 +635,7 @@ void process_runnable_tasks() [TL_URGENT] = 64, // ~50% of CPU bandwidth for I/O [TL_NORMAL] = 48, // ~37% of CPU bandwidth for tasks [TL_BULK] = 16, // ~13% of CPU bandwidth for self-wakers + [TL_HEAVY] = 1, // never more than 1 heavy task at once }; unsigned int max[TL_CLASSES]; // max to be run per class unsigned int max_total; // sum of max above @@ -673,6 +674,14 @@ void process_runnable_tasks() if ((tt->tl_class_mask & (1 << TL_BULK))) max[TL_BULK] = default_weights[TL_BULK]; + /* heavy tasks are processed only once and never refilled in a + * call round. + */ + if ((tt->tl_class_mask & (1 << TL_HEAVY))) + max[TL_HEAVY] = default_weights[TL_HEAVY]; + else + max[TL_HEAVY] = 0; + /* Now compute a fair share of the weights. Total may slightly exceed * 100% due to rounding, this is not a problem. Note that while in * theory the sum cannot be NULL as we cannot get there without tasklets @@ -681,7 +690,7 @@ void process_runnable_tasks() * a first MT_LIST_ISEMPTY() to succeed for thread_has_task() and the * one above to finally fail. This is extremely rare and not a problem. */ - max_total = max[TL_URGENT] + max[TL_NORMAL] + max[TL_BULK]; + max_total = max[TL_URGENT] + max[TL_NORMAL] + max[TL_BULK] + max[TL_HEAVY]; if (!max_total) return; @@ -881,7 +890,7 @@ void mworker_cleantasks() /* perform minimal intializations */ static void init_task() { - int i; + int i, q; #ifdef USE_THREAD memset(&timers, 0, sizeof(timers)); @@ -889,9 +898,8 @@ static void init_task() #endif memset(&task_per_thread, 0, sizeof(task_per_thread)); for (i = 0; i < MAX_THREADS; i++) { - LIST_INIT(&task_per_thread[i].tasklets[TL_URGENT]); - LIST_INIT(&task_per_thread[i].tasklets[TL_NORMAL]); - LIST_INIT(&task_per_thread[i].tasklets[TL_BULK]); + for (q = 0; q < TL_CLASSES; q++) + LIST_INIT(&task_per_thread[i].tasklets[q]); MT_LIST_INIT(&task_per_thread[i].shared_tasklet_list); } }