CLEANUP: tinfo: better align fields in thread_ctx

The introduction of buffer_wq[] in thread_ctx pushed a few fields around
and the cache line alignment is less satisfying. And more importantly, even
before this, all the lists in the local parts were 8-aligned, with the first
one split across two cache lines.

We can do better:
  - sched_profile_entry is not atomic at all, the data it points to is
    atomic so it doesn't need to be in the atomic-only region, and it can
    fill the 8-hole before the lists
  - the align(2*void) that was only before tasklets[] moves before all
    lists (and it's a nop for now)

This now makes the lists and buffer_wq[] start on a cache line boundary,
leaves 48 bytes after the lists before the atomic-only cache line, and
leaves a full cache line at the end for 128-alignment. This way we still
have plenty of room in both parts with better aligned fields.
This commit is contained in:
Willy Tarreau 2024-04-30 15:22:46 +02:00
parent a5d6a79986
commit 53461e4d94

View File

@ -137,17 +137,19 @@ struct thread_ctx {
uint8_t tl_class_mask; /* bit mask of non-empty tasklets classes */
uint8_t bufq_map; /* one bit per non-empty buffer_wq */
// 6 bytes hole here
// 2 bytes hole here
unsigned int nb_rhttp_conns; /* count of current conns used for active reverse HTTP */
struct sched_activity *sched_profile_entry; /* profile entry in use by the current task/tasklet, only if sched_wake_date>0 */
ALWAYS_ALIGN(2*sizeof(void*));
struct list buffer_wq[DYNBUF_NBQ]; /* buffer waiters, 4 criticality-based queues */
struct list pool_lru_head; /* oldest objects in thread-local pool caches */
struct list streams; /* list of streams attached to this thread */
struct list quic_conns; /* list of active quic-conns attached to this thread */
struct list quic_conns_clo; /* list of closing quic-conns attached to this thread */
struct list queued_checks; /* checks waiting for a connection slot */
struct list buffer_wq[DYNBUF_NBQ]; /* buffer waiters, 4 criticality-based queues */
unsigned int nb_rhttp_conns; /* count of current conns used for active reverse HTTP */
ALWAYS_ALIGN(2*sizeof(void*));
struct list tasklets[TL_CLASSES]; /* tasklets (and/or tasks) to run, by class */
// around 48 bytes here for thread-local variables
// third cache line here on 64 bits: accessed mostly using atomic ops
ALWAYS_ALIGN(64);
@ -161,7 +163,6 @@ struct thread_ctx {
uint32_t sched_wake_date; /* current task/tasklet's wake date or 0 */
uint32_t sched_call_date; /* current task/tasklet's call date (valid if sched_wake_date > 0) */
struct sched_activity *sched_profile_entry; /* profile entry in use by the current task/tasklet, only if sched_wake_date>0 */
uint64_t prev_cpu_time; /* previous per thread CPU time */
uint64_t prev_mono_time; /* previous system wide monotonic time */
@ -175,6 +176,7 @@ struct thread_ctx {
unsigned long long out_bytes; /* total #of bytes emitted */
unsigned long long spliced_out_bytes; /* total #of bytes emitted though a kernel pipe */
struct buffer *thread_dump_buffer; /* NULL out of dump, valid during a dump, 0x01 once done */
// around 64 bytes here for shared variables
ALWAYS_ALIGN(128);
};