MEDIUM: dynbuf: implement emergency buffers

The buffer reserve set by tune.buffers.reserve has long been unused, and in order to deal gracefully with failed memory allocations we'll need to resort to a few emergency buffers that are pre-allocated per thread. These buffers are only for emergency use, so every time their count is below the configured number a b_free() will refill them. For this reason their count can remain pretty low. We changed the default number from 2 to 4 per thread, and the minimum value is now zero (e.g. for low-memory systems). The tune.buffers.limit setting has always been a problem when trying to deal with the reserve but now we could simplify it by simply pushing the limit (if set) to match the reserve. That was already done in the past with a static value, but now with threads it was a bit trickier, which is why the per-thread allocators increment the limit on the fly before allocating their own buffers. This also means that the configured limit is saner and now corresponds to the regular buffers that can be allocated on top of emergency buffers. At the moment these emergency buffers are not used upon allocation failure. The only reason is to ease bisecting later if needed, since this commit only has to deal with resource management.
2025-11-16 16:31:26 +01:00 · 2024-04-26 16:40:32 +02:00 · 2024-04-26 16:40:32 +02:00 · 0ce51dc93b
commit 0ce51dc93b
parent 47665be083
5 changed files with 77 additions and 38 deletions
--- a/doc/configuration.txt
+++ b/doc/configuration.txt
@ -3104,26 +3104,24 @@ tune.applet.zero-copy-forwarding { on | off }
 tune.buffers.limit <number>
  Sets a hard limit on the number of buffers which may be allocated per process.
-  The default value is zero which means unlimited. The minimum non-zero value
+  The default value is zero which means unlimited. The limit will automatically
-  will always be greater than "tune.buffers.reserve" and should ideally always
+  be re-adjusted to satisfy the reserved buffers for emergency situations so
-  be about twice as large. Forcing this value can be particularly useful to
+  that the user doesn't have to perform complicated calculations. Forcing this
-  limit the amount of memory a process may take, while retaining a sane
+  value can be particularly useful to limit the amount of memory a process may
-  behavior. When this limit is reached, streams which need a buffer wait for
+  take, while retaining a sane behavior. When this limit is reached, a task
-  another one to be released by another stream. Since buffers are dynamically
+  that requests a buffer waits for another one to be released first. Most of
-  allocated and released, the waiting time is very short and not perceptible
+  the time the waiting time is very short and not perceptible provided that
-  provided that limits remain reasonable. In fact sometimes reducing the limit
+  limits remain reasonable. However, some historical limitations have weakened
-  may even increase performance by increasing the CPU cache's efficiency. Tests
+  this mechanism over versions and it is known that in certain situations of
-  have shown good results on average HTTP traffic with a limit to 1/10 of the
+  sustained shortage, some tasks may freeze until their timeout expires, so it
-  expected global maxconn setting, which also significantly reduces memory
+  is safer to avoid using this when not strictly necessary.
  usage. The memory savings come from the fact that a number of connections
  will not allocate 2*tune.bufsize. It is best not to touch this value unless
  advised to do so by an HAProxy core developer.
 tune.buffers.reserve <number>
-  Sets the number of buffers which are pre-allocated and reserved for use only
+  Sets the number of per-thread buffers which are pre-allocated and reserved
-  during memory shortage conditions resulting in failed memory allocations. The
+  for use only during memory shortage conditions resulting in failed memory
-  minimum value is 2 and is also the default. There is no reason a user would
+  allocations. The minimum value is 0 and the default is 4. There is no reason
-  want to change this value, it's mostly aimed at HAProxy core developers.
+  a user would want to change this value, unless a core developer suggests to
  change it for a very specific reason.
 tune.bufsize <number>
  Sets the buffer size to this size (in bytes). Lower values allow more
--- a/include/haproxy/defaults.h
+++ b/include/haproxy/defaults.h
@ -71,18 +71,9 @@
 #define BUFSIZE	        16384
 #endif
-/* certain buffers may only be allocated for responses in order to avoid
+// number of per-thread emergency buffers for low-memory conditions
 * deadlocks caused by request queuing. 2 buffers is the absolute minimum
 * acceptable to ensure that a request gaining access to a server can get
 * a response buffer even if it doesn't completely flush the request buffer.
 * The worst case is an applet making use of a request buffer that cannot
 * completely be sent while the server starts to respond, and all unreserved
 * buffers are allocated by request buffers from pending connections in the
 * queue waiting for this one to flush. Both buffers reserved buffers may
 * thus be used at the same time.
 */
 #ifndef RESERVED_BUFS
-#define RESERVED_BUFS   2
+#define RESERVED_BUFS   4
 #endif
 // reserved buffer space for header rewriting
--- a/include/haproxy/dynbuf.h
+++ b/include/haproxy/dynbuf.h
@ -70,6 +70,20 @@ static inline int b_may_alloc_for_crit(uint crit)
 	return 1;
 }
 /* Allocates one of the emergency buffers or returns NULL if there are none left */
 static inline char *__b_get_emergency_buf(void)
 {
 	char *ret;
 	if (!th_ctx->emergency_bufs_left)
 		return NULL;
 	th_ctx->emergency_bufs_left--;
 	ret = th_ctx->emergency_bufs[th_ctx->emergency_bufs_left];
 	th_ctx->emergency_bufs[th_ctx->emergency_bufs_left] = NULL;
 	return ret;
 }
 /* Ensures that <buf> is allocated, or allocates it. If no memory is available,
 * ((char *)1) is assigned instead with a zero size. The allocated buffer is
 * returned, or NULL in case no memory is available. Since buffers only contain
@ -112,7 +126,10 @@ static inline int b_may_alloc_for_crit(uint crit)
 		 */							            \
 		*(_buf) = BUF_NULL;					\
 		__ha_barrier_store();					\
-		pool_free(pool_head_buffer, area);			\
+		if (th_ctx->emergency_bufs_left < global.tune.reserved_bufs) \
 			th_ctx->emergency_bufs[th_ctx->emergency_bufs_left++] = area; \
 		else							\
 			pool_free(pool_head_buffer, area);		\
 	} while (0)							\
 /* Releases buffer <buf> if allocated, and marks it empty. */
--- a/include/haproxy/tinfo-t.h
+++ b/include/haproxy/tinfo-t.h
@ -149,7 +149,10 @@ struct thread_ctx {
 	struct list quic_conns_clo;         /* list of closing quic-conns attached to this thread */
 	struct list queued_checks;          /* checks waiting for a connection slot */
 	struct list tasklets[TL_CLASSES];   /* tasklets (and/or tasks) to run, by class */
-	// around 48 bytes here for thread-local variables
+
 	void **emergency_bufs;              /* array of buffers allocated at boot. Next free one is [emergency_bufs_left-1] */
 	uint emergency_bufs_left;           /* number of emergency buffers left in magic_bufs[] */
 	// around 36 bytes here for thread-local variables
 	// third cache line here on 64 bits: accessed mostly using atomic ops
 	ALWAYS_ALIGN(64);
--- a/src/dynbuf.c
+++ b/src/dynbuf.c
@ -163,8 +163,6 @@ static int cfg_parse_tune_buffers_limit(char **args, int section_type, struct pr
 	if (global.tune.buf_limit) {
 		if (global.tune.buf_limit < 3)
 			global.tune.buf_limit = 3;
 		if (global.tune.buf_limit <= global.tune.reserved_bufs)
 			global.tune.buf_limit = global.tune.reserved_bufs + 1;
 	}
 	return 0;
@ -187,14 +185,44 @@ static int cfg_parse_tune_buffers_reserve(char **args, int section_type, struct
 	}
 	global.tune.reserved_bufs = reserve;
 	if (global.tune.reserved_bufs < 2)
 		global.tune.reserved_bufs = 2;
 	if (global.tune.buf_limit && global.tune.buf_limit <= global.tune.reserved_bufs)
 		global.tune.buf_limit = global.tune.reserved_bufs + 1;
 	return 0;
 }
 /* allocate emergency buffers for the thread */
 static int alloc_emergency_buffers_per_thread(void)
 {
 	int idx;
 	th_ctx->emergency_bufs_left = global.tune.reserved_bufs;
 	th_ctx->emergency_bufs = calloc(global.tune.reserved_bufs, sizeof(*th_ctx->emergency_bufs));
 	if (!th_ctx->emergency_bufs)
 		return 0;
 	for (idx = 0; idx < global.tune.reserved_bufs; idx++) {
 		/* reserved bufs are not subject to the limit, so we must push it */
 		if (_HA_ATOMIC_LOAD(&pool_head_buffer->limit))
 			_HA_ATOMIC_INC(&pool_head_buffer->limit);
 		th_ctx->emergency_bufs[idx] = pool_alloc_flag(pool_head_buffer, POOL_F_NO_POISON | POOL_F_NO_FAIL);
 		if (!th_ctx->emergency_bufs[idx])
 			return 0;
 	}
 	return 1;
 }
 /* frees the thread's emergency buffers */
 static void free_emergency_buffers_per_thread(void)
 {
 	int idx;
 	if (th_ctx->emergency_bufs) {
 		for (idx = 0; idx < global.tune.reserved_bufs; idx++)
 			pool_free(pool_head_buffer, th_ctx->emergency_bufs[idx]);
 	}
 	ha_free(&th_ctx->emergency_bufs);
 }
 /* config keyword parsers */
 static struct cfg_kw_list cfg_kws = {ILH, {
 	{ CFG_GLOBAL, "tune.buffers.limit", cfg_parse_tune_buffers_limit },
@ -203,6 +231,8 @@ static struct cfg_kw_list cfg_kws = {ILH, {
 }};
 INITCALL1(STG_REGISTER, cfg_register_keywords, &cfg_kws);
 REGISTER_PER_THREAD_ALLOC(alloc_emergency_buffers_per_thread);
 REGISTER_PER_THREAD_FREE(free_emergency_buffers_per_thread);
 /*
 * Local variables: