DEBUG: pools: detect that malloc_trim() is in progress

Now when calling ha_panic() with a thread still under malloc_trim(),
we'll set a new tainted flag to easily report it, and the output
trace will report that this condition happened and will suggest to
use no-memory-trimming to avoid it in the future.
This commit is contained in:
Willy Tarreau 2023-10-25 15:42:27 +02:00
parent 26a6481f00
commit 96bb99a87d
4 changed files with 21 additions and 0 deletions

View File

@ -251,6 +251,7 @@ enum tainted_flags {
TAINTED_PANIC = 0x00000200, /* a panic dump has started */
TAINTED_LUA_STUCK = 0x00000400, /* stuck in a Lua context */
TAINTED_LUA_STUCK_SHARED = 0x00000800, /* stuck in a shared Lua context */
TAINTED_MEM_TRIMMING_STUCK = 0x00001000, /* stuck while trimming memory */
};
/* this is a bit field made of TAINTED_*, and is declared in haproxy.c */

View File

@ -100,6 +100,9 @@
/* poison each newly allocated area with this byte if >= 0 */
extern int mem_poison_byte;
/* trim() in progress */
extern int pool_trim_in_progress;
/* set of POOL_DBG_* flags */
extern uint pool_debugging;

View File

@ -240,6 +240,10 @@ void ha_thread_dump_one(int thr, int from_signal)
}
}
#endif
if (HA_ATOMIC_LOAD(&pool_trim_in_progress))
mark_tainted(TAINTED_MEM_TRIMMING_STUCK);
/* We only emit the backtrace for stuck threads in order not to
* waste precious output buffer space with non-interesting data.
* Please leave this as the last instruction in this function
@ -468,6 +472,14 @@ void ha_panic()
DISGUISE(write(2, trash.area, trash.data));
}
#endif
if (get_tainted() & TAINTED_MEM_TRIMMING_STUCK) {
chunk_printf(&trash,
"### Note: one thread was found stuck under malloc_trim(), which can run for a\n"
" very long time on large memory systems. You way want to disable this\n"
" memory reclaiming feature by setting 'no-memory-trimming' in the\n"
" 'global' section of your configuration to avoid this in the future.\n");
DISGUISE(write(2, trash.area, trash.data));
}
for (;;)
abort();

View File

@ -38,6 +38,7 @@ THREAD_LOCAL size_t pool_cache_count = 0; /* #cache objects */
static struct list pools __read_mostly = LIST_HEAD_INIT(pools);
int mem_poison_byte __read_mostly = 'P';
int pool_trim_in_progress = 0;
uint pool_debugging __read_mostly = /* set of POOL_DBG_* flags */
#ifdef DEBUG_FAIL_ALLOC
POOL_DBG_FAIL_ALLOC |
@ -218,6 +219,8 @@ int malloc_trim(size_t pad)
if (disable_trim)
return ret;
HA_ATOMIC_INC(&pool_trim_in_progress);
if (my_mallctl) {
/* here we're on jemalloc and malloc_trim() is called either
* by haproxy or another dependency (the worst case that
@ -263,6 +266,8 @@ int malloc_trim(size_t pad)
}
}
#endif
HA_ATOMIC_DEC(&pool_trim_in_progress);
/* here we have ret=0 if nothing was release, or 1 if some were */
return ret;
}