From 13c89bf20de07e32d570f48f7465119bb53e332c Mon Sep 17 00:00:00 2001 From: Willy Tarreau Date: Fri, 6 Mar 2026 19:51:08 +0100 Subject: [PATCH] MINOR: memprof: also permit to sort output by calling context By passing "byctx" to "show profiling memory", it's possible to sort by the calling context first, which could help group certain calls by subsystem and ease the interpretation of the output. --- doc/management.txt | 17 +++++++++-------- src/activity.c | 32 +++++++++++++++++++++++++++++--- 2 files changed, 38 insertions(+), 11 deletions(-) diff --git a/doc/management.txt b/doc/management.txt index 9cae22aae..218994356 100644 --- a/doc/management.txt +++ b/doc/management.txt @@ -3359,7 +3359,7 @@ show pools [byname|bysize|byusage] [detailed] [match ] [] - Pool quic_conn_c (152 bytes) : 1337 allocated (203224 bytes), ... Total: 15 pools, 109578176 bytes allocated, 109578176 used ... -show profiling [{all | status | tasks | memory}] [byaddr|bytime|aggr|]* +show profiling [{all | status | tasks | memory}] [byaddr|bytime|byctx|aggr|]* Dumps the current profiling settings, one per line, as well as the command needed to change them. When tasks profiling is enabled, some per-function statistics collected by the scheduler will also be emitted, with a summary @@ -3368,14 +3368,15 @@ show profiling [{all | status | tasks | memory}] [byaddr|bytime|aggr| allocations/releases and their sizes will be reported. It is possible to limit the dump to only the profiling status, the tasks, or the memory profiling by specifying the respective keywords; by default all profiling - information are dumped. It is also possible to limit the number of lines + information are dumped. It is also possible to limit the number of lines of of output of each category by specifying a numeric limit. If is possible to - request that the output is sorted by address or by total execution time - instead of usage, e.g. to ease comparisons between subsequent calls or to - check what needs to be optimized, and to aggregate task activity by called - function instead of seeing the details. Please note that profiling is - essentially aimed at developers since it gives hints about where CPU cycles - or memory are wasted in the code. There is nothing useful to monitor there. + request that the output is sorted by address, by total execution time, or by + calling context instead of usage, e.g. to ease comparisons between subsequent + calls or to check what needs to be optimized, and to aggregate task activity + by called function instead of seeing the details. Please note that profiling + is essentially aimed at developers since it gives hints about where CPU + cycles or memory are wasted in the code. There is nothing useful to monitor + there. show resolvers [] Dump statistics for the given resolvers section, or all resolvers sections diff --git a/src/activity.c b/src/activity.c index 61648ba40..225a39b64 100644 --- a/src/activity.c +++ b/src/activity.c @@ -29,7 +29,7 @@ struct show_prof_ctx { int dump_step; /* 0,1,2,4,5,6; see cli_iohandler_show_profiling() */ int linenum; /* next line to be dumped (starts at 0) */ int maxcnt; /* max line count per step (0=not set) */ - int by_what; /* 0=sort by usage, 1=sort by address, 2=sort by time */ + int by_what; /* 0=sort by usage, 1=sort by address, 2=sort by time, 3=sort by ctx */ int aggr; /* 0=dump raw, 1=aggregate on callee */ /* 4-byte hole here */ struct sched_activity *tmp_activity; /* dynamically allocated during dumps */ @@ -959,6 +959,27 @@ static int cmp_memprof_addr(const void *a, const void *b) else return 0; } + +static int cmp_memprof_ctx(const void *a, const void *b) +{ + const struct memprof_stats *l = (const struct memprof_stats *)a; + const struct memprof_stats *r = (const struct memprof_stats *)b; + + if (l->exec_ctx.pointer > r->exec_ctx.pointer) + return -1; + else if (l->exec_ctx.pointer < r->exec_ctx.pointer) + return 1; + else if (l->exec_ctx.type > r->exec_ctx.type) + return -1; + else if (l->exec_ctx.type < r->exec_ctx.type) + return 1; + else if (l->caller > r->caller) + return -1; + else if (l->caller < r->caller) + return 1; + else + return 0; +} #endif // USE_MEMORY_PROFILING /* Computes the index of function pointer and caller for use @@ -1188,8 +1209,10 @@ static int cli_io_handler_show_profiling(struct appctx *appctx) ctx->tmp_memstats = tmp_memstats; memcpy(tmp_memstats, memprof_stats, sizeof(memprof_stats)); - if (ctx->by_what) + if (ctx->by_what == 1) qsort(tmp_memstats, MEMPROF_HASH_BUCKETS+1, sizeof(tmp_memstats[0]), cmp_memprof_addr); + else if (ctx->by_what == 3) + qsort(tmp_memstats, MEMPROF_HASH_BUCKETS+1, sizeof(tmp_memstats[0]), cmp_memprof_ctx); else qsort(tmp_memstats, MEMPROF_HASH_BUCKETS+1, sizeof(tmp_memstats[0]), cmp_memprof_stats); @@ -1424,6 +1447,9 @@ static int cli_parse_show_profiling(char **args, char *payload, struct appctx *a else if (strcmp(args[arg], "bytime") == 0) { ctx->by_what = 2; // sort output by total time instead of usage } + else if (strcmp(args[arg], "byctx") == 0) { + ctx->by_what = 3; // sort output by caller context instead of usage + } else if (strcmp(args[arg], "aggr") == 0) { ctx->aggr = 1; // aggregate output by callee } @@ -1431,7 +1457,7 @@ static int cli_parse_show_profiling(char **args, char *payload, struct appctx *a ctx->maxcnt = atoi(args[arg]); // number of entries to dump } else - return cli_err(appctx, "Expects either 'all', 'status', 'tasks', 'memory', 'byaddr', 'bytime', 'aggr' or a max number of output lines.\n"); + return cli_err(appctx, "Expects either 'all', 'status', 'tasks', 'memory', 'byaddr', 'bytime', 'byctx', 'aggr' or a max number of output lines.\n"); } return 0; }