Maxime Henrion bc8e14ec23 CLEANUP: use the automatic alignment feature
- Use the automatic alignment feature instead of hardcoding 64 all over
  the code.
- This also converts a few bare __attribute__((aligned(X))) to using the
  ALIGNED macro.
2025-12-09 17:14:58 +01:00

165 lines
6.8 KiB
C

/*
* include/haproxy/ring-t.h
* This file provides definitions for ring buffers used for disposable data.
*
* Copyright (C) 2000-2019 Willy Tarreau - w@1wt.eu
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation, version 2.1
* exclusively.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this library; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
#ifndef _HAPROXY_RING_T_H
#define _HAPROXY_RING_T_H
#include <haproxy/api-t.h>
#include <haproxy/buf-t.h>
#include <haproxy/thread.h>
/* The code below handles circular buffers with single-producer and multiple
* readers (up to 254). The buffer storage area must remain always allocated.
* It's made of series of payload blocks followed by a readers count (RC).
* There is always a readers count at the beginning of the buffer as well. Each
* payload block is composed of a varint-encoded size (VI) followed by the
* actual payload (PL).
*
* The readers count is encoded on a single byte. It indicates how many readers
* are still waiting at this position. The writer writes after the buffer's
* tail, which initially starts just past the first readers count. Then it
* knows by reading this count that it must wake up the readers to indicate
* data availability. When a reader reads the payload block, it increments the
* next readers count and decrements the current one. The area between the
* initial readers count and the next one is protected from overwriting for as
* long as the initial count is non-null. As such these readers count are
* effective barriers against data recycling.
*
* Only the writer is allowed to update the buffer's tail/head. This ensures
* that events can remain as long as possible so that late readers can get the
* maximum history available. It also helps dealing with multi-thread accesses
* using a simple RW lock during the buffer head's manipulation. The writer
* will have to delete some old records starting at the head until the new
* message can fit or a non-null readers count is encountered. If a message
* cannot fit due to insufficient room, the message is lost and the drop
* counted must be incremented.
*
* Like any buffer, this buffer naturally wraps at the end and continues at the
* beginning. The creation process consists in immediately adding a null
* readers count byte into the buffer. The write process consists in always
* writing a payload block followed by a new readers count. The delete process
* consists in removing a null readers count and payload block. As such, there
* is always at least one readers count byte in the buffer available at the
* head for new readers to attach to, and one before the tail, both of which
* may be the same when the buffer doesn't contain any event. It is thus safe
* for any reader to simply keep the absolute offset of the last visited
* position and to restart from there. The write will update the buffer's
* absolute offset when deleting entries. All this also has the benefit of
* allowing a buffer to be hot-resized without losing its contents.
*
* Thus we have this :
* - init of empty buffer:
* head-, ,-tail
* [ RC | xxxxxxxxxxxxxxxxxxxxxxxxxx ]
*
* - reader attached:
* head-, ,-tail
* [ RC | xxxxxxxxxxxxxxxxxxxxxxxxxx ]
* ^- +1
*
* - append of one event:
* appended
* head-, <----------> ,-tail
* [ RC | VI | PL | RC | xxxxxxxxxxx ]
*
* - reader advancing:
* head-, ,-tail
* [ RC | VI | PL | RC | xxxxxxxxxxx ]
* ^- -1 ^- +1
*
* - writer removing older message:
* head-, ,-tail
* [ xxxxxxxxxxxx | RC | xxxxxxxxxxx ]
* <---------->
* removed
*/
/* ring watch flags to be used when watching the ring */
#define RING_WF_WAIT_MODE 0x00000001 /* wait for new contents */
#define RING_WF_SEEK_NEW 0x00000002 /* seek to new contents */
#define RING_WF_END_ZERO 0x00000004 /* mark end of events with \0 instead of \n */
/* ring flags */
#define RING_FL_MAPPED 0x00000001 /* mmapped area, must not free() */
/* keep values below in decimal, they may be dumped in error messages */
#define RING_WRITING_SIZE 255 /* the next message's size is being written */
#define RING_MAX_READERS 254 /* highest supported value for RC */
/* mask used to lock the tail */
#define RING_TAIL_LOCK (1ULL << ((sizeof(size_t) * 8) - 1))
/* A cell describing a waiting thread.
* ->next is initialized to 0x1 before the pointer is set, so that any
* leader thread can see that the pointer is not set yet. This allows
* to enqueue all waiting threads very quickly using XCHG() on the head
* without having to rely on a flaky CAS, while threads finish their setup
* in parallel. The pointer will turn to NULL again once the thread is
* released.
*/
struct ring_wait_cell {
size_t to_send_self; // size needed to serialize this msg
size_t needed_tot; // size needed to serialize pending msgs
size_t maxlen; // msg truncated to this size
const struct ist *pfx; // prefixes
size_t npfx; // #prefixes
const struct ist *msg; // message parts
size_t nmsg; // #message parts
struct ring_wait_cell *next; // next waiting thread
};
/* this is the mmapped part */
struct ring_storage {
size_t size; // storage size
size_t rsvd; // header length (used for file-backed maps)
THREAD_ALIGN();
size_t tail; // storage tail
THREAD_ALIGN();
size_t head; // storage head
THREAD_ALIGN();
char area[0]; // storage area begins immediately here
};
/* this is the ring definition, config, waiters etc */
struct ring {
struct ring_storage *storage; // the mapped part
struct mt_list waiters; // list of waiters, for now, CLI "show event"
int readers_count;
uint flags; // RING_FL_*
uint pending; // new writes that have not yet been subject to a wakeup
uint waking; // indicates a thread is currently waking up readers
/* keep the queue in a separate cache line below */
struct {
THREAD_ALIGN();
struct ring_wait_cell *ptr;
} queue[RING_WAIT_QUEUES + 1]; // wait queue + 1 spacer
};
#endif /* _HAPROXY_RING_T_H */
/*
* Local variables:
* c-indent-level: 8
* c-basic-offset: 8
* End:
*/