MEDIUM: dynbuf: implement emergency buffers

The buffer reserve set by tune.buffers.reserve has long been unused, and
in order to deal gracefully with failed memory allocations we'll need to
resort to a few emergency buffers that are pre-allocated per thread.

These buffers are only for emergency use, so every time their count is
below the configured number a b_free() will refill them. For this reason
their count can remain pretty low. We changed the default number from 2
to 4 per thread, and the minimum value is now zero (e.g. for low-memory
systems). The tune.buffers.limit setting has always been a problem when
trying to deal with the reserve but now we could simplify it by simply
pushing the limit (if set) to match the reserve. That was already done in
the past with a static value, but now with threads it was a bit trickier,
which is why the per-thread allocators increment the limit on the fly
before allocating their own buffers. This also means that the configured
limit is saner and now corresponds to the regular buffers that can be
allocated on top of emergency buffers.

At the moment these emergency buffers are not used upon allocation
failure. The only reason is to ease bisecting later if needed, since
this commit only has to deal with resource management.
This commit is contained in:
Willy Tarreau 2024-04-26 16:40:32 +02:00
parent 47665be083
commit 0ce51dc93b
5 changed files with 77 additions and 38 deletions

View File

@ -3104,26 +3104,24 @@ tune.applet.zero-copy-forwarding { on | off }
tune.buffers.limit <number>
Sets a hard limit on the number of buffers which may be allocated per process.
The default value is zero which means unlimited. The minimum non-zero value
will always be greater than "tune.buffers.reserve" and should ideally always
be about twice as large. Forcing this value can be particularly useful to
limit the amount of memory a process may take, while retaining a sane
behavior. When this limit is reached, streams which need a buffer wait for
another one to be released by another stream. Since buffers are dynamically
allocated and released, the waiting time is very short and not perceptible
provided that limits remain reasonable. In fact sometimes reducing the limit
may even increase performance by increasing the CPU cache's efficiency. Tests
have shown good results on average HTTP traffic with a limit to 1/10 of the
expected global maxconn setting, which also significantly reduces memory
usage. The memory savings come from the fact that a number of connections
will not allocate 2*tune.bufsize. It is best not to touch this value unless
advised to do so by an HAProxy core developer.
The default value is zero which means unlimited. The limit will automatically
be re-adjusted to satisfy the reserved buffers for emergency situations so
that the user doesn't have to perform complicated calculations. Forcing this
value can be particularly useful to limit the amount of memory a process may
take, while retaining a sane behavior. When this limit is reached, a task
that requests a buffer waits for another one to be released first. Most of
the time the waiting time is very short and not perceptible provided that
limits remain reasonable. However, some historical limitations have weakened
this mechanism over versions and it is known that in certain situations of
sustained shortage, some tasks may freeze until their timeout expires, so it
is safer to avoid using this when not strictly necessary.
tune.buffers.reserve <number>
Sets the number of buffers which are pre-allocated and reserved for use only
during memory shortage conditions resulting in failed memory allocations. The
minimum value is 2 and is also the default. There is no reason a user would
want to change this value, it's mostly aimed at HAProxy core developers.
Sets the number of per-thread buffers which are pre-allocated and reserved
for use only during memory shortage conditions resulting in failed memory
allocations. The minimum value is 0 and the default is 4. There is no reason
a user would want to change this value, unless a core developer suggests to
change it for a very specific reason.
tune.bufsize <number>
Sets the buffer size to this size (in bytes). Lower values allow more

View File

@ -71,18 +71,9 @@
#define BUFSIZE 16384
#endif
/* certain buffers may only be allocated for responses in order to avoid
* deadlocks caused by request queuing. 2 buffers is the absolute minimum
* acceptable to ensure that a request gaining access to a server can get
* a response buffer even if it doesn't completely flush the request buffer.
* The worst case is an applet making use of a request buffer that cannot
* completely be sent while the server starts to respond, and all unreserved
* buffers are allocated by request buffers from pending connections in the
* queue waiting for this one to flush. Both buffers reserved buffers may
* thus be used at the same time.
*/
// number of per-thread emergency buffers for low-memory conditions
#ifndef RESERVED_BUFS
#define RESERVED_BUFS 2
#define RESERVED_BUFS 4
#endif
// reserved buffer space for header rewriting

View File

@ -70,6 +70,20 @@ static inline int b_may_alloc_for_crit(uint crit)
return 1;
}
/* Allocates one of the emergency buffers or returns NULL if there are none left */
static inline char *__b_get_emergency_buf(void)
{
char *ret;
if (!th_ctx->emergency_bufs_left)
return NULL;
th_ctx->emergency_bufs_left--;
ret = th_ctx->emergency_bufs[th_ctx->emergency_bufs_left];
th_ctx->emergency_bufs[th_ctx->emergency_bufs_left] = NULL;
return ret;
}
/* Ensures that <buf> is allocated, or allocates it. If no memory is available,
* ((char *)1) is assigned instead with a zero size. The allocated buffer is
* returned, or NULL in case no memory is available. Since buffers only contain
@ -112,7 +126,10 @@ static inline int b_may_alloc_for_crit(uint crit)
*/ \
*(_buf) = BUF_NULL; \
__ha_barrier_store(); \
pool_free(pool_head_buffer, area); \
if (th_ctx->emergency_bufs_left < global.tune.reserved_bufs) \
th_ctx->emergency_bufs[th_ctx->emergency_bufs_left++] = area; \
else \
pool_free(pool_head_buffer, area); \
} while (0) \
/* Releases buffer <buf> if allocated, and marks it empty. */

View File

@ -149,7 +149,10 @@ struct thread_ctx {
struct list quic_conns_clo; /* list of closing quic-conns attached to this thread */
struct list queued_checks; /* checks waiting for a connection slot */
struct list tasklets[TL_CLASSES]; /* tasklets (and/or tasks) to run, by class */
// around 48 bytes here for thread-local variables
void **emergency_bufs; /* array of buffers allocated at boot. Next free one is [emergency_bufs_left-1] */
uint emergency_bufs_left; /* number of emergency buffers left in magic_bufs[] */
// around 36 bytes here for thread-local variables
// third cache line here on 64 bits: accessed mostly using atomic ops
ALWAYS_ALIGN(64);

View File

@ -163,8 +163,6 @@ static int cfg_parse_tune_buffers_limit(char **args, int section_type, struct pr
if (global.tune.buf_limit) {
if (global.tune.buf_limit < 3)
global.tune.buf_limit = 3;
if (global.tune.buf_limit <= global.tune.reserved_bufs)
global.tune.buf_limit = global.tune.reserved_bufs + 1;
}
return 0;
@ -187,14 +185,44 @@ static int cfg_parse_tune_buffers_reserve(char **args, int section_type, struct
}
global.tune.reserved_bufs = reserve;
if (global.tune.reserved_bufs < 2)
global.tune.reserved_bufs = 2;
if (global.tune.buf_limit && global.tune.buf_limit <= global.tune.reserved_bufs)
global.tune.buf_limit = global.tune.reserved_bufs + 1;
return 0;
}
/* allocate emergency buffers for the thread */
static int alloc_emergency_buffers_per_thread(void)
{
int idx;
th_ctx->emergency_bufs_left = global.tune.reserved_bufs;
th_ctx->emergency_bufs = calloc(global.tune.reserved_bufs, sizeof(*th_ctx->emergency_bufs));
if (!th_ctx->emergency_bufs)
return 0;
for (idx = 0; idx < global.tune.reserved_bufs; idx++) {
/* reserved bufs are not subject to the limit, so we must push it */
if (_HA_ATOMIC_LOAD(&pool_head_buffer->limit))
_HA_ATOMIC_INC(&pool_head_buffer->limit);
th_ctx->emergency_bufs[idx] = pool_alloc_flag(pool_head_buffer, POOL_F_NO_POISON | POOL_F_NO_FAIL);
if (!th_ctx->emergency_bufs[idx])
return 0;
}
return 1;
}
/* frees the thread's emergency buffers */
static void free_emergency_buffers_per_thread(void)
{
int idx;
if (th_ctx->emergency_bufs) {
for (idx = 0; idx < global.tune.reserved_bufs; idx++)
pool_free(pool_head_buffer, th_ctx->emergency_bufs[idx]);
}
ha_free(&th_ctx->emergency_bufs);
}
/* config keyword parsers */
static struct cfg_kw_list cfg_kws = {ILH, {
{ CFG_GLOBAL, "tune.buffers.limit", cfg_parse_tune_buffers_limit },
@ -203,6 +231,8 @@ static struct cfg_kw_list cfg_kws = {ILH, {
}};
INITCALL1(STG_REGISTER, cfg_register_keywords, &cfg_kws);
REGISTER_PER_THREAD_ALLOC(alloc_emergency_buffers_per_thread);
REGISTER_PER_THREAD_FREE(free_emergency_buffers_per_thread);
/*
* Local variables: