MINOR: sched: add a new function is_sched_alive() to report scheduler's health

This verifies that the scheduler is still ticking without having to
access the activity[] array nor keeping local copies of the ctxsw
counter. It just tests and sets a flag that is reset after each
return from a ->process() function.
This commit is contained in:
Willy Tarreau 2025-04-17 15:24:08 +02:00
parent 874ba2afed
commit 36ec70c526
2 changed files with 26 additions and 0 deletions

View File

@ -123,6 +123,12 @@ void wake_expired_tasks(void);
*/
int next_timer_expiry(void);
/* Pings the scheduler to verify that tasks continue running.
* Returns 1 if the scheduler made progress since last call,
* 0 if it looks stuck.
*/
int is_sched_alive(void);
/*
* Delete every tasks before running the master polling loop
*/

View File

@ -42,6 +42,9 @@ DECLARE_POOL(pool_head_notification, "notification", sizeof(struct notification)
*/
__decl_aligned_rwlock(wq_lock);
/* used to detect if the scheduler looks stuck (for warnings) */
static THREAD_LOCAL int sched_stuck;
/* Flags the task <t> for immediate destruction and puts it into its first
* thread's shared tasklet list if not yet queued/running. This will bypass
* the priority scheduling and make the task show up as fast as possible in
@ -605,6 +608,7 @@ unsigned int run_tasks_from_lists(unsigned int budgets[])
else {
done++;
th_ctx->current = NULL;
sched_stuck = 0; // scheduler is not stuck (don't warn)
/* signal barrier to prevent thread dump helpers
* from dumping a task currently being freed.
*/
@ -646,6 +650,7 @@ unsigned int run_tasks_from_lists(unsigned int budgets[])
task_unlink_wq(t);
__task_free(t);
th_ctx->current = NULL;
sched_stuck = 0; // scheduler is not stuck (don't warn)
__ha_barrier_store();
/* We don't want max_processed to be decremented if
* we're just freeing a destroyed task, we should only
@ -671,6 +676,7 @@ unsigned int run_tasks_from_lists(unsigned int budgets[])
}
th_ctx->current = NULL;
sched_stuck = 0; // scheduler is not stuck (don't warn)
__ha_barrier_store();
/* stats are only registered for non-zero wake dates */
@ -894,6 +900,20 @@ void process_runnable_tasks()
activity[tid].long_rq++;
}
/* Pings the scheduler to verify that tasks continue running.
* Returns 1 if the scheduler made progress since last call,
* 0 if it looks stuck.
*/
int is_sched_alive(void)
{
if (sched_stuck)
return 0;
/* next time we'll know if any progress was made */
sched_stuck = 1;
return 1;
}
/*
* Delete every tasks before running the master polling loop
*/