MINOR: memprof: also permit to sort output by calling context

By passing "byctx" to "show profiling memory", it's possible to sort by the calling context first, which could help group certain calls by subsystem and ease the interpretation of the output.
2026-05-07 22:16:10 +02:00 · 2026-03-06 19:51:08 +01:00 · 2026-03-06 19:51:08 +01:00 · 13c89bf20d
commit 13c89bf20d
parent 2dfc8417cf
2 changed files with 38 additions and 11 deletions
--- a/doc/management.txt
+++ b/doc/management.txt
@ -3359,7 +3359,7 @@ show pools [byname|bysize|byusage] [detailed] [match <pfx>] [<nb>]
      - Pool quic_conn_c (152 bytes) : 1337 allocated (203224 bytes), ...
    Total: 15 pools, 109578176 bytes allocated, 109578176 used ...

-show profiling [{all | status | tasks | memory}] [byaddr|bytime|aggr|<max_lines>]*
+show profiling [{all | status | tasks | memory}] [byaddr|bytime|byctx|aggr|<max_lines>]*
  Dumps the current profiling settings, one per line, as well as the command
  needed to change them. When tasks profiling is enabled, some per-function
  statistics collected by the scheduler will also be emitted, with a summary
@ -3368,14 +3368,15 @@ show profiling [{all | status | tasks | memory}] [byaddr|bytime|aggr|<max_lines>
  allocations/releases and their sizes will be reported. It is possible to
  limit the dump to only the profiling status, the tasks, or the memory
  profiling by specifying the respective keywords; by default all profiling
-  information are dumped. It is also possible to limit the number of lines
+  information are dumped. It is also possible to limit the number of lines of
  of output of each category by specifying a numeric limit. If is possible to
-  request that the output is sorted by address or by total execution time
-  instead of usage, e.g. to ease comparisons between subsequent calls or to
-  check what needs to be optimized, and to aggregate task activity by called
-  function instead of seeing the details. Please note that profiling is
-  essentially aimed at developers since it gives hints about where CPU cycles
-  or memory are wasted in the code. There is nothing useful to monitor there.
+  request that the output is sorted by address, by total execution time, or by
+  calling context instead of usage, e.g. to ease comparisons between subsequent
+  calls or to check what needs to be optimized, and to aggregate task activity
+  by called function instead of seeing the details. Please note that profiling
+  is essentially aimed at developers since it gives hints about where CPU
+  cycles or memory are wasted in the code. There is nothing useful to monitor
+  there.

 show resolvers [<resolvers section id>]
  Dump statistics for the given resolvers section, or all resolvers sections
--- a/src/activity.c
+++ b/src/activity.c
@ -29,7 +29,7 @@ struct show_prof_ctx {
 	int dump_step;  /* 0,1,2,4,5,6; see cli_iohandler_show_profiling() */
 	int linenum;    /* next line to be dumped (starts at 0) */
 	int maxcnt;     /* max line count per step (0=not set)  */
-	int by_what;    /* 0=sort by usage, 1=sort by address, 2=sort by time */
+	int by_what;    /* 0=sort by usage, 1=sort by address, 2=sort by time, 3=sort by ctx */
 	int aggr;       /* 0=dump raw, 1=aggregate on callee    */
 	/* 4-byte hole here */
 	struct sched_activity *tmp_activity; /* dynamically allocated during dumps */
@ -959,6 +959,27 @@ static int cmp_memprof_addr(const void *a, const void *b)
 	else
 		return 0;
 }
+
+static int cmp_memprof_ctx(const void *a, const void *b)
+{
+	const struct memprof_stats *l = (const struct memprof_stats *)a;
+	const struct memprof_stats *r = (const struct memprof_stats *)b;
+
+	if (l->exec_ctx.pointer > r->exec_ctx.pointer)
+		return -1;
+	else if (l->exec_ctx.pointer < r->exec_ctx.pointer)
+		return 1;
+	else if (l->exec_ctx.type > r->exec_ctx.type)
+		return -1;
+	else if (l->exec_ctx.type < r->exec_ctx.type)
+		return 1;
+	else if (l->caller > r->caller)
+		return -1;
+	else if (l->caller < r->caller)
+		return 1;
+	else
+		return 0;
+}
 #endif // USE_MEMORY_PROFILING

 /* Computes the index of function pointer <func> and caller <caller> for use
@ -1188,8 +1209,10 @@ static int cli_io_handler_show_profiling(struct appctx *appctx)
 	ctx->tmp_memstats = tmp_memstats;
 	memcpy(tmp_memstats, memprof_stats, sizeof(memprof_stats));

-	if (ctx->by_what)
+	if (ctx->by_what == 1)
 		qsort(tmp_memstats, MEMPROF_HASH_BUCKETS+1, sizeof(tmp_memstats[0]), cmp_memprof_addr);
+	else if (ctx->by_what == 3)
+		qsort(tmp_memstats, MEMPROF_HASH_BUCKETS+1, sizeof(tmp_memstats[0]), cmp_memprof_ctx);
 	else
 		qsort(tmp_memstats, MEMPROF_HASH_BUCKETS+1, sizeof(tmp_memstats[0]), cmp_memprof_stats);

@ -1424,6 +1447,9 @@ static int cli_parse_show_profiling(char **args, char *payload, struct appctx *a
 		else if (strcmp(args[arg], "bytime") == 0) {
 			ctx->by_what = 2; // sort output by total time instead of usage
 		}
+		else if (strcmp(args[arg], "byctx") == 0) {
+			ctx->by_what = 3; // sort output by caller context instead of usage
+		}
 		else if (strcmp(args[arg], "aggr") == 0) {
 			ctx->aggr = 1;    // aggregate output by callee
 		}
@ -1431,7 +1457,7 @@ static int cli_parse_show_profiling(char **args, char *payload, struct appctx *a
 			ctx->maxcnt = atoi(args[arg]); // number of entries to dump
 		}
 		else
-			return cli_err(appctx, "Expects either 'all', 'status', 'tasks', 'memory', 'byaddr', 'bytime', 'aggr' or a max number of output lines.\n");
+			return cli_err(appctx, "Expects either 'all', 'status', 'tasks', 'memory', 'byaddr', 'bytime', 'byctx', 'aggr' or a max number of output lines.\n");
 	}
 	return 0;
 }