diff --git a/doc/configuration.txt b/doc/configuration.txt index bb3515bbc..6cc616afb 100644 --- a/doc/configuration.txt +++ b/doc/configuration.txt @@ -3104,26 +3104,24 @@ tune.applet.zero-copy-forwarding { on | off } tune.buffers.limit Sets a hard limit on the number of buffers which may be allocated per process. - The default value is zero which means unlimited. The minimum non-zero value - will always be greater than "tune.buffers.reserve" and should ideally always - be about twice as large. Forcing this value can be particularly useful to - limit the amount of memory a process may take, while retaining a sane - behavior. When this limit is reached, streams which need a buffer wait for - another one to be released by another stream. Since buffers are dynamically - allocated and released, the waiting time is very short and not perceptible - provided that limits remain reasonable. In fact sometimes reducing the limit - may even increase performance by increasing the CPU cache's efficiency. Tests - have shown good results on average HTTP traffic with a limit to 1/10 of the - expected global maxconn setting, which also significantly reduces memory - usage. The memory savings come from the fact that a number of connections - will not allocate 2*tune.bufsize. It is best not to touch this value unless - advised to do so by an HAProxy core developer. + The default value is zero which means unlimited. The limit will automatically + be re-adjusted to satisfy the reserved buffers for emergency situations so + that the user doesn't have to perform complicated calculations. Forcing this + value can be particularly useful to limit the amount of memory a process may + take, while retaining a sane behavior. When this limit is reached, a task + that requests a buffer waits for another one to be released first. Most of + the time the waiting time is very short and not perceptible provided that + limits remain reasonable. However, some historical limitations have weakened + this mechanism over versions and it is known that in certain situations of + sustained shortage, some tasks may freeze until their timeout expires, so it + is safer to avoid using this when not strictly necessary. tune.buffers.reserve - Sets the number of buffers which are pre-allocated and reserved for use only - during memory shortage conditions resulting in failed memory allocations. The - minimum value is 2 and is also the default. There is no reason a user would - want to change this value, it's mostly aimed at HAProxy core developers. + Sets the number of per-thread buffers which are pre-allocated and reserved + for use only during memory shortage conditions resulting in failed memory + allocations. The minimum value is 0 and the default is 4. There is no reason + a user would want to change this value, unless a core developer suggests to + change it for a very specific reason. tune.bufsize Sets the buffer size to this size (in bytes). Lower values allow more diff --git a/include/haproxy/defaults.h b/include/haproxy/defaults.h index 336e550d8..eda346aa2 100644 --- a/include/haproxy/defaults.h +++ b/include/haproxy/defaults.h @@ -71,18 +71,9 @@ #define BUFSIZE 16384 #endif -/* certain buffers may only be allocated for responses in order to avoid - * deadlocks caused by request queuing. 2 buffers is the absolute minimum - * acceptable to ensure that a request gaining access to a server can get - * a response buffer even if it doesn't completely flush the request buffer. - * The worst case is an applet making use of a request buffer that cannot - * completely be sent while the server starts to respond, and all unreserved - * buffers are allocated by request buffers from pending connections in the - * queue waiting for this one to flush. Both buffers reserved buffers may - * thus be used at the same time. - */ +// number of per-thread emergency buffers for low-memory conditions #ifndef RESERVED_BUFS -#define RESERVED_BUFS 2 +#define RESERVED_BUFS 4 #endif // reserved buffer space for header rewriting diff --git a/include/haproxy/dynbuf.h b/include/haproxy/dynbuf.h index c4c28538d..31d9dd105 100644 --- a/include/haproxy/dynbuf.h +++ b/include/haproxy/dynbuf.h @@ -70,6 +70,20 @@ static inline int b_may_alloc_for_crit(uint crit) return 1; } +/* Allocates one of the emergency buffers or returns NULL if there are none left */ +static inline char *__b_get_emergency_buf(void) +{ + char *ret; + + if (!th_ctx->emergency_bufs_left) + return NULL; + + th_ctx->emergency_bufs_left--; + ret = th_ctx->emergency_bufs[th_ctx->emergency_bufs_left]; + th_ctx->emergency_bufs[th_ctx->emergency_bufs_left] = NULL; + return ret; +} + /* Ensures that is allocated, or allocates it. If no memory is available, * ((char *)1) is assigned instead with a zero size. The allocated buffer is * returned, or NULL in case no memory is available. Since buffers only contain @@ -112,7 +126,10 @@ static inline int b_may_alloc_for_crit(uint crit) */ \ *(_buf) = BUF_NULL; \ __ha_barrier_store(); \ - pool_free(pool_head_buffer, area); \ + if (th_ctx->emergency_bufs_left < global.tune.reserved_bufs) \ + th_ctx->emergency_bufs[th_ctx->emergency_bufs_left++] = area; \ + else \ + pool_free(pool_head_buffer, area); \ } while (0) \ /* Releases buffer if allocated, and marks it empty. */ diff --git a/include/haproxy/tinfo-t.h b/include/haproxy/tinfo-t.h index 5564a70b5..636d5b2e5 100644 --- a/include/haproxy/tinfo-t.h +++ b/include/haproxy/tinfo-t.h @@ -149,7 +149,10 @@ struct thread_ctx { struct list quic_conns_clo; /* list of closing quic-conns attached to this thread */ struct list queued_checks; /* checks waiting for a connection slot */ struct list tasklets[TL_CLASSES]; /* tasklets (and/or tasks) to run, by class */ - // around 48 bytes here for thread-local variables + + void **emergency_bufs; /* array of buffers allocated at boot. Next free one is [emergency_bufs_left-1] */ + uint emergency_bufs_left; /* number of emergency buffers left in magic_bufs[] */ + // around 36 bytes here for thread-local variables // third cache line here on 64 bits: accessed mostly using atomic ops ALWAYS_ALIGN(64); diff --git a/src/dynbuf.c b/src/dynbuf.c index a849aeeb9..aec966701 100644 --- a/src/dynbuf.c +++ b/src/dynbuf.c @@ -163,8 +163,6 @@ static int cfg_parse_tune_buffers_limit(char **args, int section_type, struct pr if (global.tune.buf_limit) { if (global.tune.buf_limit < 3) global.tune.buf_limit = 3; - if (global.tune.buf_limit <= global.tune.reserved_bufs) - global.tune.buf_limit = global.tune.reserved_bufs + 1; } return 0; @@ -187,14 +185,44 @@ static int cfg_parse_tune_buffers_reserve(char **args, int section_type, struct } global.tune.reserved_bufs = reserve; - if (global.tune.reserved_bufs < 2) - global.tune.reserved_bufs = 2; - if (global.tune.buf_limit && global.tune.buf_limit <= global.tune.reserved_bufs) - global.tune.buf_limit = global.tune.reserved_bufs + 1; - return 0; } +/* allocate emergency buffers for the thread */ +static int alloc_emergency_buffers_per_thread(void) +{ + int idx; + + th_ctx->emergency_bufs_left = global.tune.reserved_bufs; + th_ctx->emergency_bufs = calloc(global.tune.reserved_bufs, sizeof(*th_ctx->emergency_bufs)); + if (!th_ctx->emergency_bufs) + return 0; + + for (idx = 0; idx < global.tune.reserved_bufs; idx++) { + /* reserved bufs are not subject to the limit, so we must push it */ + if (_HA_ATOMIC_LOAD(&pool_head_buffer->limit)) + _HA_ATOMIC_INC(&pool_head_buffer->limit); + th_ctx->emergency_bufs[idx] = pool_alloc_flag(pool_head_buffer, POOL_F_NO_POISON | POOL_F_NO_FAIL); + if (!th_ctx->emergency_bufs[idx]) + return 0; + } + + return 1; +} + +/* frees the thread's emergency buffers */ +static void free_emergency_buffers_per_thread(void) +{ + int idx; + + if (th_ctx->emergency_bufs) { + for (idx = 0; idx < global.tune.reserved_bufs; idx++) + pool_free(pool_head_buffer, th_ctx->emergency_bufs[idx]); + } + + ha_free(&th_ctx->emergency_bufs); +} + /* config keyword parsers */ static struct cfg_kw_list cfg_kws = {ILH, { { CFG_GLOBAL, "tune.buffers.limit", cfg_parse_tune_buffers_limit }, @@ -203,6 +231,8 @@ static struct cfg_kw_list cfg_kws = {ILH, { }}; INITCALL1(STG_REGISTER, cfg_register_keywords, &cfg_kws); +REGISTER_PER_THREAD_ALLOC(alloc_emergency_buffers_per_thread); +REGISTER_PER_THREAD_FREE(free_emergency_buffers_per_thread); /* * Local variables: