diff --git a/doc/configuration.txt b/doc/configuration.txt index a2ba7cf93..780d27fd9 100644 --- a/doc/configuration.txt +++ b/doc/configuration.txt @@ -697,6 +697,7 @@ The following keywords are supported in the "global" section : - tune.rcvbuf.server - tune.recv_enough - tune.runqueue-depth + - tune.sched.low-latency - tune.sndbuf.client - tune.sndbuf.server - tune.ssl.cachesize @@ -2095,7 +2096,20 @@ tune.recv_enough tune.runqueue-depth Sets the maximum amount of task that can be processed at once when running tasks. The default value is 200. Increasing it may incur latency when - dealing with I/Os, making it too small can incur extra overhead. + dealing with I/Os, making it too small can incur extra overhead. When + experimenting with much larger values, it may be useful to also enable + tune.sched.low-latency to limit the maximum latency to the lowest possible. + +tune.sched.low-latency { on | off } + Enables ('on') or disables ('off') the low-latency task scheduler. By default + haproxy processes tasks from several classes one class at a time as this is + the most efficient. But when running with large values of tune.runqueue-depth + this can have a measurable effect on request or connection latency. When this + low-latency setting is enabled, tasks of lower priority classes will always + be executed before other ones if they exist. This will permit to lower the + maximum latency experienced by new requests or connections in the middle of + massive traffic, at the expense of a higher impact on this large traffic. + For regular usage it is better to leave this off. The default value is off. tune.sndbuf.client tune.sndbuf.server @@ -15838,11 +15852,12 @@ lat_ns_avg : integer the value low, it is possible to reduce the scheduler's run queue depth using "tune.runqueue-depth", to reduce the number of concurrent events processed at once using "tune.maxpollevents", to decrease the stream's nice value using - the "nice" option on the "bind" lines or in the frontend, or to look for - other heavy requests in logs (those exhibiting large values of "cpu_ns_avg"), - whose processing needs to be adjusted or fixed. Compression of large buffers - could be a culprit, like heavy regex or long lists of regex. - Note: this value is exactly lat_ns_tot divided by cpu_calls. + the "nice" option on the "bind" lines or in the frontend, to enable low + latency scheduling using "tune.sched.low-latency", or to look for other heavy + requests in logs (those exhibiting large values of "cpu_ns_avg"), whose + processing needs to be adjusted or fixed. Compression of large buffers could + be a culprit, like heavy regex or long lists of regex. Note: this value is + exactly lat_ns_tot divided by cpu_calls. lat_ns_tot : integer Returns the total number of nanoseconds spent between the moment the task @@ -15854,10 +15869,11 @@ lat_ns_tot : integer the value low, it is possible to reduce the scheduler's run queue depth using "tune.runqueue-depth", to reduce the number of concurrent events processed at once using "tune.maxpollevents", to decrease the stream's nice value using - the "nice" option on the "bind" lines or in the frontend, or to look for - other heavy requests in logs (those exhibiting large values of "cpu_ns_avg"), - whose processing needs to be adjusted or fixed. Compression of large buffers - could be a culprit, like heavy regex or long lists of regex. Note: while it + the "nice" option on the "bind" lines or in the frontend, to enable low + latency scheduling using "tune.sched.low-latency", or to look for other heavy + requests in logs (those exhibiting large values of "cpu_ns_avg"), whose + processing needs to be adjusted or fixed. Compression of large buffers could + be a culprit, like heavy regex or long lists of regex. Note: while it may intuitively seem that the total latency adds to a transfer time, it is almost never true because while a task waits for the CPU, network buffers continue to fill up and the next call will process more at once. The value diff --git a/include/haproxy/global-t.h b/include/haproxy/global-t.h index c7591b467..0da246dde 100644 --- a/include/haproxy/global-t.h +++ b/include/haproxy/global-t.h @@ -67,6 +67,7 @@ #define GTUNE_INSECURE_FORK (1<<16) #define GTUNE_INSECURE_SETUID (1<<17) #define GTUNE_FD_ET (1<<18) +#define GTUNE_SCHED_LOW_LATENCY (1<<19) /* SSL server verify mode */ enum { diff --git a/src/task.c b/src/task.c index 22954adc2..6079956ba 100644 --- a/src/task.c +++ b/src/task.c @@ -16,6 +16,7 @@ #include #include +#include #include #include #include @@ -328,6 +329,7 @@ unsigned int run_tasks_from_lists(unsigned int budgets[]) struct task *(*process)(struct task *t, void *ctx, unsigned short state); struct list *tl_queues = sched->tasklets; struct task *t; + uint8_t budget_mask = (1 << TL_CLASSES) - 1; unsigned int done = 0; unsigned int queue; unsigned short state; @@ -336,6 +338,33 @@ unsigned int run_tasks_from_lists(unsigned int budgets[]) for (queue = 0; queue < TL_CLASSES;) { sched->current_queue = queue; + /* global.tune.sched.low-latency is set */ + if (global.tune.options & GTUNE_SCHED_LOW_LATENCY) { + if (unlikely(sched->tl_class_mask & budget_mask & ((1 << queue) - 1))) { + /* a lower queue index has tasks again and still has a + * budget to run them. Let's switch to it now. + */ + queue = (sched->tl_class_mask & 1) ? 0 : + (sched->tl_class_mask & 2) ? 1 : 2; + continue; + } + + if (unlikely(queue > TL_URGENT && + budget_mask & (1 << TL_URGENT) && + !MT_LIST_ISEMPTY(&sched->shared_tasklet_list))) { + /* an urgent tasklet arrived from another thread */ + break; + } + + if (unlikely(queue > TL_NORMAL && + budget_mask & (1 << TL_NORMAL) && + ((sched->rqueue_size > 0) || + (global_tasks_mask & tid_bit)))) { + /* a task was woken up by a bulk tasklet or another thread */ + break; + } + } + if (LIST_ISEMPTY(&tl_queues[queue])) { sched->tl_class_mask &= ~(1 << queue); queue++; @@ -343,6 +372,7 @@ unsigned int run_tasks_from_lists(unsigned int budgets[]) } if (!budgets[queue]) { + budget_mask &= ~(1 << queue); queue++; continue; } @@ -687,6 +717,32 @@ static void init_task() } } +/* config parser for global "tune.sched.low-latency", accepts "on" or "off" */ +static int cfg_parse_tune_sched_low_latency(char **args, int section_type, struct proxy *curpx, + struct proxy *defpx, const char *file, int line, + char **err) +{ + if (too_many_args(1, args, err, NULL)) + return -1; + + if (strcmp(args[1], "on") == 0) + global.tune.options |= GTUNE_SCHED_LOW_LATENCY; + else if (strcmp(args[1], "off") == 0) + global.tune.options &= ~GTUNE_SCHED_LOW_LATENCY; + else { + memprintf(err, "'%s' expects either 'on' or 'off' but got '%s'.", args[0], args[1]); + return -1; + } + return 0; +} + +/* config keyword parsers */ +static struct cfg_kw_list cfg_kws = {ILH, { + { CFG_GLOBAL, "tune.sched.low-latency", cfg_parse_tune_sched_low_latency }, + { 0, NULL, NULL } +}}; + +INITCALL1(STG_REGISTER, cfg_register_keywords, &cfg_kws); INITCALL0(STG_PREPARE, init_task); /*