MEDIUM: pools: replace CONFIG_HAP_POOLS with a runtime "NO_CACHE" flag.

Like previous patches, this replaces the build-time code paths that were
conditionned by CONFIG_HAP_POOLS with runtime paths conditionned by
!POOL_DBG_NO_CACHE. One trivial test had to be added in the hot path in
__pool_alloc() to refrain from calling pool_get_from_cache(), and another
one in __pool_free() to avoid calling pool_put_to_cache().

All cache-specific functions were instrumented with a BUG_ON() to make
sure we never call them with cache disabled. Additionally the cache[]
array was not initialized (remains NULL) so that we can later drop it
if not needed. It's particularly huge and should be turned to dynamic
with a pointer to a per-thread area where all the objects are located.
This will solve the memory usage issue and will improve locality, or
even help better deal with NUMA machines once each thread uses its own
arena.
This commit is contained in:
Willy Tarreau 2022-02-22 16:23:09 +01:00
parent dff3b0627d
commit e981631d27
4 changed files with 65 additions and 89 deletions

View File

@ -46,6 +46,7 @@
#define POOL_DBG_COLD_FIRST 0x00000004 // pick cold objects first #define POOL_DBG_COLD_FIRST 0x00000004 // pick cold objects first
#define POOL_DBG_INTEGRITY 0x00000008 // perform integrity checks on cache #define POOL_DBG_INTEGRITY 0x00000008 // perform integrity checks on cache
#define POOL_DBG_NO_GLOBAL 0x00000010 // disable global pools #define POOL_DBG_NO_GLOBAL 0x00000010 // disable global pools
#define POOL_DBG_NO_CACHE 0x00000020 // disable thread-local pool caches
/* This is the head of a thread-local cache */ /* This is the head of a thread-local cache */
@ -115,9 +116,7 @@ struct pool_head {
/* 32-bit hole here */ /* 32-bit hole here */
struct list list; /* list of all known pools */ struct list list; /* list of all known pools */
char name[12]; /* name of the pool */ char name[12]; /* name of the pool */
#ifdef CONFIG_HAP_POOLS
struct pool_cache_head cache[MAX_THREADS]; /* pool caches */ struct pool_cache_head cache[MAX_THREADS]; /* pool caches */
#endif
} __attribute__((aligned(64))); } __attribute__((aligned(64)));
#endif /* _HAPROXY_POOL_T_H */ #endif /* _HAPROXY_POOL_T_H */

View File

@ -133,8 +133,6 @@ void *__pool_alloc(struct pool_head *pool, unsigned int flags);
void __pool_free(struct pool_head *pool, void *ptr); void __pool_free(struct pool_head *pool, void *ptr);
#ifdef CONFIG_HAP_POOLS
/****************** Thread-local cache management ******************/ /****************** Thread-local cache management ******************/
extern THREAD_LOCAL size_t pool_cache_bytes; /* total cache size */ extern THREAD_LOCAL size_t pool_cache_bytes; /* total cache size */
@ -162,7 +160,7 @@ static inline uint pool_releasable(const struct pool_head *pool)
{ {
uint alloc, used; uint alloc, used;
if (unlikely(pool_debugging & POOL_DBG_NO_GLOBAL)) if (unlikely(pool_debugging & (POOL_DBG_NO_CACHE|POOL_DBG_NO_GLOBAL)))
return 0; return 0;
alloc = HA_ATOMIC_LOAD(&pool->allocated); alloc = HA_ATOMIC_LOAD(&pool->allocated);
@ -186,13 +184,16 @@ static inline uint pool_releasable(const struct pool_head *pool)
/* Tries to retrieve an object from the local pool cache corresponding to pool /* Tries to retrieve an object from the local pool cache corresponding to pool
* <pool>. If none is available, tries to allocate from the shared cache if any * <pool>. If none is available, tries to allocate from the shared cache if any
* and returns NULL if nothing is available. * and returns NULL if nothing is available. Must not be used when pools are
* disabled.
*/ */
static inline void *pool_get_from_cache(struct pool_head *pool, const void *caller) static inline void *pool_get_from_cache(struct pool_head *pool, const void *caller)
{ {
struct pool_cache_item *item; struct pool_cache_item *item;
struct pool_cache_head *ph; struct pool_cache_head *ph;
BUG_ON(pool_debugging & POOL_DBG_NO_CACHE);
ph = &pool->cache[tid]; ph = &pool->cache[tid];
if (unlikely(LIST_ISEMPTY(&ph->list))) { if (unlikely(LIST_ISEMPTY(&ph->list))) {
if (!(pool_debugging & POOL_DBG_NO_GLOBAL)) if (!(pool_debugging & POOL_DBG_NO_GLOBAL))
@ -231,22 +232,6 @@ static inline void *pool_get_from_cache(struct pool_head *pool, const void *call
return item; return item;
} }
#else /* CONFIG_HAP_POOLS */
/* no cache pools implementation */
static inline void *pool_get_from_cache(struct pool_head *pool, const void *caller)
{
return NULL;
}
static inline void pool_put_to_cache(struct pool_head *pool, void *ptr, const void *caller)
{
pool_free_nocache(pool, ptr);
}
#endif /* CONFIG_HAP_POOLS */
/****************** Common high-level code ******************/ /****************** Common high-level code ******************/

View File

@ -85,9 +85,7 @@ struct thread_ctx {
uint8_t tl_class_mask; /* bit mask of non-empty tasklets classes */ uint8_t tl_class_mask; /* bit mask of non-empty tasklets classes */
// 7 bytes hole here // 7 bytes hole here
#ifdef CONFIG_HAP_POOLS struct list pool_lru_head; /* oldest objects in thread-local pool caches */
struct list pool_lru_head; /* oldest objects */
#endif
struct list buffer_wq; /* buffer waiters */ struct list buffer_wq; /* buffer waiters */
struct list streams; /* list of streams attached to this thread */ struct list streams; /* list of streams attached to this thread */

View File

@ -29,11 +29,9 @@
#include <haproxy/tools.h> #include <haproxy/tools.h>
#ifdef CONFIG_HAP_POOLS
/* These ones are initialized per-thread on startup by init_pools() */ /* These ones are initialized per-thread on startup by init_pools() */
THREAD_LOCAL size_t pool_cache_bytes = 0; /* total cache size */ THREAD_LOCAL size_t pool_cache_bytes = 0; /* total cache size */
THREAD_LOCAL size_t pool_cache_count = 0; /* #cache objects */ THREAD_LOCAL size_t pool_cache_count = 0; /* #cache objects */
#endif
static struct list pools __read_mostly = LIST_HEAD_INIT(pools); static struct list pools __read_mostly = LIST_HEAD_INIT(pools);
int mem_poison_byte __read_mostly = -1; int mem_poison_byte __read_mostly = -1;
@ -52,6 +50,9 @@ uint pool_debugging __read_mostly = /* set of POOL_DBG_* flags */
#endif #endif
#ifdef CONFIG_HAP_NO_GLOBAL_POOLS #ifdef CONFIG_HAP_NO_GLOBAL_POOLS
POOL_DBG_NO_GLOBAL | POOL_DBG_NO_GLOBAL |
#endif
#ifndef CONFIG_HAP_POOLS
POOL_DBG_NO_CACHE |
#endif #endif
0; 0;
@ -206,15 +207,16 @@ struct pool_head *create_pool(char *name, unsigned int size, unsigned int flags)
size = ((size + POOL_EXTRA + align - 1) & -align) - POOL_EXTRA; size = ((size + POOL_EXTRA + align - 1) & -align) - POOL_EXTRA;
} }
#ifdef CONFIG_HAP_POOLS if (!(pool_debugging & POOL_DBG_NO_CACHE)) {
/* we'll store two lists there, we need the room for this. This is /* we'll store two lists there, we need the room for this. This is
* guaranteed by the test above, except if MEM_F_EXACT is set, or if * guaranteed by the test above, except if MEM_F_EXACT is set, or if
* the only EXTRA part is in fact the one that's stored in the cache * the only EXTRA part is in fact the one that's stored in the cache
* in addition to the pci struct. * in addition to the pci struct.
*/ */
if (size + POOL_EXTRA - POOL_EXTRA_CALLER < sizeof(struct pool_cache_item)) if (size + POOL_EXTRA - POOL_EXTRA_CALLER < sizeof(struct pool_cache_item))
size = sizeof(struct pool_cache_item) + POOL_EXTRA_CALLER - POOL_EXTRA; size = sizeof(struct pool_cache_item) + POOL_EXTRA_CALLER - POOL_EXTRA;
#endif }
/* TODO: thread: we do not lock pool list for now because all pools are /* TODO: thread: we do not lock pool list for now because all pools are
* created during HAProxy startup (so before threads creation) */ * created during HAProxy startup (so before threads creation) */
start = &pools; start = &pools;
@ -254,14 +256,14 @@ struct pool_head *create_pool(char *name, unsigned int size, unsigned int flags)
pool->flags = flags; pool->flags = flags;
LIST_APPEND(start, &pool->list); LIST_APPEND(start, &pool->list);
#ifdef CONFIG_HAP_POOLS if (!(pool_debugging & POOL_DBG_NO_CACHE)) {
/* update per-thread pool cache if necessary */ /* update per-thread pool cache if necessary */
for (thr = 0; thr < MAX_THREADS; thr++) { for (thr = 0; thr < MAX_THREADS; thr++) {
LIST_INIT(&pool->cache[thr].list); LIST_INIT(&pool->cache[thr].list);
pool->cache[thr].tid = thr; pool->cache[thr].tid = thr;
pool->cache[thr].pool = pool; pool->cache[thr].pool = pool;
}
} }
#endif
} }
pool->users++; pool->users++;
return pool; return pool;
@ -336,8 +338,6 @@ void pool_free_nocache(struct pool_head *pool, void *ptr)
} }
#ifdef CONFIG_HAP_POOLS
/* Updates <pch>'s fill_pattern and fills the free area after <item> with it, /* Updates <pch>'s fill_pattern and fills the free area after <item> with it,
* up to <size> bytes. The item part is left untouched. * up to <size> bytes. The item part is left untouched.
*/ */
@ -386,7 +386,8 @@ void pool_check_pattern(struct pool_cache_head *pch, struct pool_cache_item *ite
* pool <pool>. The shared pool is refilled with these objects in the limit * pool <pool>. The shared pool is refilled with these objects in the limit
* of the number of acceptable objects, and the rest will be released to the * of the number of acceptable objects, and the rest will be released to the
* OS. It is not a problem is <count> is larger than the number of objects in * OS. It is not a problem is <count> is larger than the number of objects in
* the local cache. The counters are automatically updated. * the local cache. The counters are automatically updated. Must not be used
* with pools disabled.
*/ */
static void pool_evict_last_items(struct pool_head *pool, struct pool_cache_head *ph, uint count) static void pool_evict_last_items(struct pool_head *pool, struct pool_cache_head *ph, uint count)
{ {
@ -396,6 +397,8 @@ static void pool_evict_last_items(struct pool_head *pool, struct pool_cache_head
uint cluster = 0; uint cluster = 0;
uint to_free_max; uint to_free_max;
BUG_ON(pool_debugging & POOL_DBG_NO_CACHE);
/* Note: this will be zero when global pools are disabled */ /* Note: this will be zero when global pools are disabled */
to_free_max = pool_releasable(pool); to_free_max = pool_releasable(pool);
@ -440,11 +443,14 @@ static void pool_evict_last_items(struct pool_head *pool, struct pool_cache_head
* or the total size of the local cache is no more than 75% of its maximum (i.e. * or the total size of the local cache is no more than 75% of its maximum (i.e.
* we don't want a single cache to use all the cache for itself). For this, the * we don't want a single cache to use all the cache for itself). For this, the
* list is scanned in reverse. If <full> is non-null, all objects are evicted. * list is scanned in reverse. If <full> is non-null, all objects are evicted.
* Must not be used when pools are disabled.
*/ */
void pool_evict_from_local_cache(struct pool_head *pool, int full) void pool_evict_from_local_cache(struct pool_head *pool, int full)
{ {
struct pool_cache_head *ph = &pool->cache[tid]; struct pool_cache_head *ph = &pool->cache[tid];
BUG_ON(pool_debugging & POOL_DBG_NO_CACHE);
while ((ph->count && full) || while ((ph->count && full) ||
(ph->count >= CONFIG_HAP_POOL_CLUSTER_SIZE && (ph->count >= CONFIG_HAP_POOL_CLUSTER_SIZE &&
ph->count >= 16 + pool_cache_count / 8 && ph->count >= 16 + pool_cache_count / 8 &&
@ -454,7 +460,7 @@ void pool_evict_from_local_cache(struct pool_head *pool, int full)
} }
/* Evicts some of the oldest objects from the local cache, pushing them to the /* Evicts some of the oldest objects from the local cache, pushing them to the
* global pool. * global pool. Must not be used when pools are disabled.
*/ */
void pool_evict_from_local_caches() void pool_evict_from_local_caches()
{ {
@ -462,6 +468,8 @@ void pool_evict_from_local_caches()
struct pool_cache_head *ph; struct pool_cache_head *ph;
struct pool_head *pool; struct pool_head *pool;
BUG_ON(pool_debugging & POOL_DBG_NO_CACHE);
do { do {
item = LIST_PREV(&th_ctx->pool_lru_head, struct pool_cache_item *, by_lru); item = LIST_PREV(&th_ctx->pool_lru_head, struct pool_cache_item *, by_lru);
BUG_ON(&item->by_lru == &th_ctx->pool_lru_head); BUG_ON(&item->by_lru == &th_ctx->pool_lru_head);
@ -482,13 +490,16 @@ void pool_evict_from_local_caches()
* shared cache, which itself may decide to release some of them to the OS. * shared cache, which itself may decide to release some of them to the OS.
* While it is unspecified what the object becomes past this point, it is * While it is unspecified what the object becomes past this point, it is
* guaranteed to be released from the users' perpective. A caller address may * guaranteed to be released from the users' perpective. A caller address may
* be passed and stored into the area when DEBUG_POOL_TRACING is set. * be passed and stored into the area when DEBUG_POOL_TRACING is set. Must not
* be used with pools disabled.
*/ */
void pool_put_to_cache(struct pool_head *pool, void *ptr, const void *caller) void pool_put_to_cache(struct pool_head *pool, void *ptr, const void *caller)
{ {
struct pool_cache_item *item = (struct pool_cache_item *)ptr; struct pool_cache_item *item = (struct pool_cache_item *)ptr;
struct pool_cache_head *ph = &pool->cache[tid]; struct pool_cache_head *ph = &pool->cache[tid];
BUG_ON(pool_debugging & POOL_DBG_NO_CACHE);
LIST_INSERT(&ph->list, &item->by_pool); LIST_INSERT(&ph->list, &item->by_pool);
LIST_INSERT(&th_ctx->pool_lru_head, &item->by_lru); LIST_INSERT(&th_ctx->pool_lru_head, &item->by_lru);
POOL_DEBUG_TRACE_CALLER(pool, item, caller); POOL_DEBUG_TRACE_CALLER(pool, item, caller);
@ -510,7 +521,8 @@ void pool_put_to_cache(struct pool_head *pool, void *ptr, const void *caller)
* This is only used when pools are in use and shared pools are enabled. No * This is only used when pools are in use and shared pools are enabled. No
* malloc() is attempted, and poisonning is never performed. The purpose is to * malloc() is attempted, and poisonning is never performed. The purpose is to
* get the fastest possible refilling so that the caller can easily check if * get the fastest possible refilling so that the caller can easily check if
* the cache has enough objects for its use. * the cache has enough objects for its use. Must not be used when pools are
* disabled.
*/ */
void pool_refill_local_from_shared(struct pool_head *pool, struct pool_cache_head *pch) void pool_refill_local_from_shared(struct pool_head *pool, struct pool_cache_head *pch)
{ {
@ -518,6 +530,8 @@ void pool_refill_local_from_shared(struct pool_head *pool, struct pool_cache_hea
struct pool_item *ret, *down; struct pool_item *ret, *down;
uint count; uint count;
BUG_ON(pool_debugging & POOL_DBG_NO_CACHE);
/* we'll need to reference the first element to figure the next one. We /* we'll need to reference the first element to figure the next one. We
* must temporarily lock it so that nobody allocates then releases it, * must temporarily lock it so that nobody allocates then releases it,
* or the dereference could fail. * or the dereference could fail.
@ -588,7 +602,7 @@ void pool_flush(struct pool_head *pool)
{ {
struct pool_item *next, *temp, *down; struct pool_item *next, *temp, *down;
if (!pool) if (!pool || (pool_debugging & (POOL_DBG_NO_CACHE|POOL_DBG_NO_GLOBAL)))
return; return;
/* The loop below atomically detaches the head of the free list and /* The loop below atomically detaches the head of the free list and
@ -650,21 +664,6 @@ void pool_gc(struct pool_head *pool_ctx)
thread_release(); thread_release();
} }
#else /* CONFIG_HAP_POOLS */
/* legacy stuff */
void pool_flush(struct pool_head *pool)
{
}
/* This function might ask the malloc library to trim its buffers. */
void pool_gc(struct pool_head *pool_ctx)
{
trim_all_pools();
}
#endif /* CONFIG_HAP_POOLS */
/* /*
* Returns a pointer to type <type> taken from the pool <pool_type> or * Returns a pointer to type <type> taken from the pool <pool_type> or
* dynamically allocated. In the first case, <pool_type> is updated to point to * dynamically allocated. In the first case, <pool_type> is updated to point to
@ -683,8 +682,9 @@ void *__pool_alloc(struct pool_head *pool, unsigned int flags)
#if defined(DEBUG_POOL_TRACING) #if defined(DEBUG_POOL_TRACING)
caller = __builtin_return_address(0); caller = __builtin_return_address(0);
#endif #endif
if (!p) if (likely(!(pool_debugging & POOL_DBG_NO_CACHE)) && !p)
p = pool_get_from_cache(pool, caller); p = pool_get_from_cache(pool, caller);
if (unlikely(!p)) if (unlikely(!p))
p = pool_alloc_nocache(pool); p = pool_alloc_nocache(pool);
@ -711,6 +711,12 @@ void __pool_free(struct pool_head *pool, void *ptr)
/* we'll get late corruption if we refill to the wrong pool or double-free */ /* we'll get late corruption if we refill to the wrong pool or double-free */
POOL_DEBUG_CHECK_MARK(pool, ptr); POOL_DEBUG_CHECK_MARK(pool, ptr);
POOL_DEBUG_RESET_MARK(pool, ptr); POOL_DEBUG_RESET_MARK(pool, ptr);
if (unlikely(pool_debugging & POOL_DBG_NO_CACHE)) {
pool_free_nocache(pool, ptr);
return;
}
pool_put_to_cache(pool, ptr, caller); pool_put_to_cache(pool, ptr, caller);
} }
@ -775,9 +781,9 @@ void pool_free_area_uaf(void *area, size_t size)
void *pool_destroy(struct pool_head *pool) void *pool_destroy(struct pool_head *pool)
{ {
if (pool) { if (pool) {
#ifdef CONFIG_HAP_POOLS if (!(pool_debugging & POOL_DBG_NO_CACHE))
pool_evict_from_local_cache(pool, 1); pool_evict_from_local_cache(pool, 1);
#endif
pool_flush(pool); pool_flush(pool);
if (pool->used) if (pool->used)
return pool; return pool;
@ -806,30 +812,24 @@ void dump_pools_to_trash()
struct pool_head *entry; struct pool_head *entry;
unsigned long allocated, used; unsigned long allocated, used;
int nbpools; int nbpools;
#ifdef CONFIG_HAP_POOLS
unsigned long cached_bytes = 0; unsigned long cached_bytes = 0;
uint cached = 0; uint cached = 0;
#endif
allocated = used = nbpools = 0; allocated = used = nbpools = 0;
chunk_printf(&trash, "Dumping pools usage. Use SIGQUIT to flush them.\n"); chunk_printf(&trash, "Dumping pools usage. Use SIGQUIT to flush them.\n");
list_for_each_entry(entry, &pools, list) { list_for_each_entry(entry, &pools, list) {
#ifdef CONFIG_HAP_POOLS if (!(pool_debugging & POOL_DBG_NO_CACHE)) {
int i; int i;
for (cached = i = 0; i < global.nbthread; i++) for (cached = i = 0; i < global.nbthread; i++)
cached += entry->cache[i].count; cached += entry->cache[i].count;
cached_bytes += cached * entry->size; cached_bytes += cached * entry->size;
#endif }
chunk_appendf(&trash, " - Pool %s (%u bytes) : %u allocated (%u bytes), %u used" chunk_appendf(&trash, " - Pool %s (%u bytes) : %u allocated (%u bytes), %u used"
#ifdef CONFIG_HAP_POOLS
" (~%u by thread caches)" " (~%u by thread caches)"
#endif
", needed_avg %u, %u failures, %u users, @%p%s\n", ", needed_avg %u, %u failures, %u users, @%p%s\n",
entry->name, entry->size, entry->allocated, entry->name, entry->size, entry->allocated,
entry->size * entry->allocated, entry->used, entry->size * entry->allocated, entry->used,
#ifdef CONFIG_HAP_POOLS
cached, cached,
#endif
swrate_avg(entry->needed_avg, POOL_AVG_SAMPLES), entry->failed, swrate_avg(entry->needed_avg, POOL_AVG_SAMPLES), entry->failed,
entry->users, entry, entry->users, entry,
(entry->flags & MEM_F_SHARED) ? " [SHARED]" : ""); (entry->flags & MEM_F_SHARED) ? " [SHARED]" : "");
@ -839,14 +839,9 @@ void dump_pools_to_trash()
nbpools++; nbpools++;
} }
chunk_appendf(&trash, "Total: %d pools, %lu bytes allocated, %lu used" chunk_appendf(&trash, "Total: %d pools, %lu bytes allocated, %lu used"
#ifdef CONFIG_HAP_POOLS
" (~%lu by thread caches)" " (~%lu by thread caches)"
#endif
".\n", ".\n",
nbpools, allocated, used nbpools, allocated, used, cached_bytes
#ifdef CONFIG_HAP_POOLS
, cached_bytes
#endif
); );
} }
@ -923,13 +918,12 @@ void create_pool_callback(struct pool_head **ptr, char *name, unsigned int size)
/* Initializes all per-thread arrays on startup */ /* Initializes all per-thread arrays on startup */
static void init_pools() static void init_pools()
{ {
#ifdef CONFIG_HAP_POOLS
int thr; int thr;
for (thr = 0; thr < MAX_THREADS; thr++) { for (thr = 0; thr < MAX_THREADS; thr++) {
LIST_INIT(&ha_thread_ctx[thr].pool_lru_head); LIST_INIT(&ha_thread_ctx[thr].pool_lru_head);
} }
#endif
detect_allocator(); detect_allocator();
} }