MINOR: memprof: also permit to sort output by calling context

By passing "byctx" to "show profiling memory", it's possible to sort by
the calling context first, which could help group certain calls by
subsystem and ease the interpretation of the output.
This commit is contained in:
Willy Tarreau 2026-03-06 19:51:08 +01:00
parent 2dfc8417cf
commit 13c89bf20d
2 changed files with 38 additions and 11 deletions

View File

@ -3359,7 +3359,7 @@ show pools [byname|bysize|byusage] [detailed] [match <pfx>] [<nb>]
- Pool quic_conn_c (152 bytes) : 1337 allocated (203224 bytes), ...
Total: 15 pools, 109578176 bytes allocated, 109578176 used ...
show profiling [{all | status | tasks | memory}] [byaddr|bytime|aggr|<max_lines>]*
show profiling [{all | status | tasks | memory}] [byaddr|bytime|byctx|aggr|<max_lines>]*
Dumps the current profiling settings, one per line, as well as the command
needed to change them. When tasks profiling is enabled, some per-function
statistics collected by the scheduler will also be emitted, with a summary
@ -3368,14 +3368,15 @@ show profiling [{all | status | tasks | memory}] [byaddr|bytime|aggr|<max_lines>
allocations/releases and their sizes will be reported. It is possible to
limit the dump to only the profiling status, the tasks, or the memory
profiling by specifying the respective keywords; by default all profiling
information are dumped. It is also possible to limit the number of lines
information are dumped. It is also possible to limit the number of lines of
of output of each category by specifying a numeric limit. If is possible to
request that the output is sorted by address or by total execution time
instead of usage, e.g. to ease comparisons between subsequent calls or to
check what needs to be optimized, and to aggregate task activity by called
function instead of seeing the details. Please note that profiling is
essentially aimed at developers since it gives hints about where CPU cycles
or memory are wasted in the code. There is nothing useful to monitor there.
request that the output is sorted by address, by total execution time, or by
calling context instead of usage, e.g. to ease comparisons between subsequent
calls or to check what needs to be optimized, and to aggregate task activity
by called function instead of seeing the details. Please note that profiling
is essentially aimed at developers since it gives hints about where CPU
cycles or memory are wasted in the code. There is nothing useful to monitor
there.
show resolvers [<resolvers section id>]
Dump statistics for the given resolvers section, or all resolvers sections

View File

@ -29,7 +29,7 @@ struct show_prof_ctx {
int dump_step; /* 0,1,2,4,5,6; see cli_iohandler_show_profiling() */
int linenum; /* next line to be dumped (starts at 0) */
int maxcnt; /* max line count per step (0=not set) */
int by_what; /* 0=sort by usage, 1=sort by address, 2=sort by time */
int by_what; /* 0=sort by usage, 1=sort by address, 2=sort by time, 3=sort by ctx */
int aggr; /* 0=dump raw, 1=aggregate on callee */
/* 4-byte hole here */
struct sched_activity *tmp_activity; /* dynamically allocated during dumps */
@ -959,6 +959,27 @@ static int cmp_memprof_addr(const void *a, const void *b)
else
return 0;
}
static int cmp_memprof_ctx(const void *a, const void *b)
{
const struct memprof_stats *l = (const struct memprof_stats *)a;
const struct memprof_stats *r = (const struct memprof_stats *)b;
if (l->exec_ctx.pointer > r->exec_ctx.pointer)
return -1;
else if (l->exec_ctx.pointer < r->exec_ctx.pointer)
return 1;
else if (l->exec_ctx.type > r->exec_ctx.type)
return -1;
else if (l->exec_ctx.type < r->exec_ctx.type)
return 1;
else if (l->caller > r->caller)
return -1;
else if (l->caller < r->caller)
return 1;
else
return 0;
}
#endif // USE_MEMORY_PROFILING
/* Computes the index of function pointer <func> and caller <caller> for use
@ -1188,8 +1209,10 @@ static int cli_io_handler_show_profiling(struct appctx *appctx)
ctx->tmp_memstats = tmp_memstats;
memcpy(tmp_memstats, memprof_stats, sizeof(memprof_stats));
if (ctx->by_what)
if (ctx->by_what == 1)
qsort(tmp_memstats, MEMPROF_HASH_BUCKETS+1, sizeof(tmp_memstats[0]), cmp_memprof_addr);
else if (ctx->by_what == 3)
qsort(tmp_memstats, MEMPROF_HASH_BUCKETS+1, sizeof(tmp_memstats[0]), cmp_memprof_ctx);
else
qsort(tmp_memstats, MEMPROF_HASH_BUCKETS+1, sizeof(tmp_memstats[0]), cmp_memprof_stats);
@ -1424,6 +1447,9 @@ static int cli_parse_show_profiling(char **args, char *payload, struct appctx *a
else if (strcmp(args[arg], "bytime") == 0) {
ctx->by_what = 2; // sort output by total time instead of usage
}
else if (strcmp(args[arg], "byctx") == 0) {
ctx->by_what = 3; // sort output by caller context instead of usage
}
else if (strcmp(args[arg], "aggr") == 0) {
ctx->aggr = 1; // aggregate output by callee
}
@ -1431,7 +1457,7 @@ static int cli_parse_show_profiling(char **args, char *payload, struct appctx *a
ctx->maxcnt = atoi(args[arg]); // number of entries to dump
}
else
return cli_err(appctx, "Expects either 'all', 'status', 'tasks', 'memory', 'byaddr', 'bytime', 'aggr' or a max number of output lines.\n");
return cli_err(appctx, "Expects either 'all', 'status', 'tasks', 'memory', 'byaddr', 'bytime', 'byctx', 'aggr' or a max number of output lines.\n");
}
return 0;
}