From 75f72338dfe9c41f063c5d3d03febdba71020a60 Mon Sep 17 00:00:00 2001 From: Willy Tarreau Date: Fri, 29 Jan 2021 15:04:16 +0100 Subject: [PATCH] BUG/MINOR: activity: take care of late wakeups in "show tasks" During the call to thread_isolate(), some other threads might have performed some task_wakeup() which will have a call date past the one we retrieved. It could be avoided by taking the current date once we're alone but this would significantly affect the latency measurements by adding the isolation time. Instead we're now only accounting positive times, so that late wakeups normally appear with a zero latency. No backport is needed, this is 2.4. --- src/activity.c | 17 ++++++++++++----- 1 file changed, 12 insertions(+), 5 deletions(-) diff --git a/src/activity.c b/src/activity.c index 96f5008f3..0d945ac96 100644 --- a/src/activity.c +++ b/src/activity.c @@ -209,7 +209,10 @@ static int cli_io_handler_show_tasks(struct appctx *appctx) * possible, using a sched_activity array to collect metrics with * limited collision, then we'll report statistics only. The tasks' * #calls will reflect the number of occurrences, and the lat_time will - * reflect the latency when set. + * reflect the latency when set. We prefer to take the time before + * calling thread_isolate() so that the wait time doesn't impact the + * measurement accuracy. However this requires to take care of negative + * times since tasks might be queued after we retrieve it. */ now_ns = now_mono_time(); @@ -226,7 +229,8 @@ static int cli_io_handler_show_tasks(struct appctx *appctx) entry = sched_activity_entry(tmp_activity, t->process); if (t->call_date) { lat = now_ns - t->call_date; - entry->lat_time += lat; + if ((int64_t)lat > 0) + entry->lat_time += lat; } entry->calls++; rqnode = eb32sc_next(rqnode, ~0UL); @@ -241,7 +245,8 @@ static int cli_io_handler_show_tasks(struct appctx *appctx) entry = sched_activity_entry(tmp_activity, t->process); if (t->call_date) { lat = now_ns - t->call_date; - entry->lat_time += lat; + if ((int64_t)lat > 0) + entry->lat_time += lat; } entry->calls++; rqnode = eb32sc_next(rqnode, ~0UL); @@ -253,7 +258,8 @@ static int cli_io_handler_show_tasks(struct appctx *appctx) entry = sched_activity_entry(tmp_activity, t->process); if (!TASK_IS_TASKLET(t) && t->call_date) { lat = now_ns - t->call_date; - entry->lat_time += lat; + if ((int64_t)lat > 0) + entry->lat_time += lat; } entry->calls++; } @@ -265,7 +271,8 @@ static int cli_io_handler_show_tasks(struct appctx *appctx) entry = sched_activity_entry(tmp_activity, t->process); if (!TASK_IS_TASKLET(t) && t->call_date) { lat = now_ns - t->call_date; - entry->lat_time += lat; + if ((int64_t)lat > 0) + entry->lat_time += lat; } entry->calls++; }