DEBUG: thread: reduce the struct lock_stat to store only 30 buckets

Storing only 30 buckets means we only keep 256 bytes per label. This
further simplifies address calculation and reduces the memory used
without complicating the locking code. It means we won't measure wait
times larger than a second but we're not supposed to face this as it
would trigger the watchdog anyway. It may become a little bit just if
measuring using rdtsc() instead of now_mono_time() though (typically
the limit would be around 350ms for a 3 GHz CPU).
This commit is contained in:
Willy Tarreau 2025-02-10 11:15:44 +01:00
parent c2f2d6fd3c
commit eced1d6d8a
2 changed files with 21 additions and 8 deletions

View File

@ -110,13 +110,13 @@
/* Debugging information that is only used when thread debugging is enabled */
/* This is aligned as it's already 512B per lock label, so better simplify the
/* This is aligned as it's already 256B per lock label, so better simplify the
* address calculations in the fast path than save a few bytes in BSS.
*/
struct lock_stat {
uint64_t nsec_wait;
uint64_t num_unlocked;
uint64_t buckets[32]; // operations per time buckets (1-2ns to 2.1-4.3s)
uint64_t buckets[30]; // operations per time buckets (1-2ns to 0.5-1s)
} ALIGNED(256);
struct ha_spinlock_state {

View File

@ -483,7 +483,7 @@ static uint64_t get_lock_stat_num_read(int lbl)
uint64_t ret = 0;
uint bucket;
for (bucket = 0; bucket < 32; bucket++)
for (bucket = 0; bucket < 30; bucket++)
ret += _HA_ATOMIC_LOAD(&lock_stats_rd[lbl].buckets[bucket]);
return ret;
}
@ -493,7 +493,7 @@ static uint64_t get_lock_stat_num_seek(int lbl)
uint64_t ret = 0;
uint bucket;
for (bucket = 0; bucket < 32; bucket++)
for (bucket = 0; bucket < 30; bucket++)
ret += _HA_ATOMIC_LOAD(&lock_stats_sk[lbl].buckets[bucket]);
return ret;
}
@ -503,7 +503,7 @@ static uint64_t get_lock_stat_num_write(int lbl)
uint64_t ret = 0;
uint bucket;
for (bucket = 0; bucket < 32; bucket++)
for (bucket = 0; bucket < 30; bucket++)
ret += _HA_ATOMIC_LOAD(&lock_stats_wr[lbl].buckets[bucket]);
return ret;
}
@ -542,7 +542,7 @@ void show_lock_stats()
(double)lock_stats_wr[lbl].nsec_wait / 1000000.0,
num_write_locked ? ((double)lock_stats_wr[lbl].nsec_wait / (double)num_write_locked) : 0);
for (bucket = 0; bucket < 32; bucket++)
for (bucket = 0; bucket < 30; bucket++)
if (lock_stats_wr[lbl].buckets[bucket])
fprintf(stderr, " %u:%llu", bucket, (ullong)lock_stats_wr[lbl].buckets[bucket]);
fprintf(stderr, "\n");
@ -561,7 +561,7 @@ void show_lock_stats()
(double)lock_stats_sk[lbl].nsec_wait / 1000000.0,
num_seek_locked ? ((double)lock_stats_sk[lbl].nsec_wait / (double)num_seek_locked) : 0);
for (bucket = 0; bucket < 32; bucket++)
for (bucket = 0; bucket < 30; bucket++)
if (lock_stats_sk[lbl].buckets[bucket])
fprintf(stderr, " %u:%llu", bucket, (ullong)lock_stats_sk[lbl].buckets[bucket]);
fprintf(stderr, "\n");
@ -580,7 +580,7 @@ void show_lock_stats()
(double)lock_stats_rd[lbl].nsec_wait / 1000000.0,
num_read_locked ? ((double)lock_stats_rd[lbl].nsec_wait / (double)num_read_locked) : 0);
for (bucket = 0; bucket < 32; bucket++)
for (bucket = 0; bucket < 30; bucket++)
if (lock_stats_rd[lbl].buckets[bucket])
fprintf(stderr, " %u:%llu", bucket, (ullong)lock_stats_rd[lbl].buckets[bucket]);
fprintf(stderr, "\n");
@ -619,6 +619,7 @@ void __ha_rwlock_wrlock(enum lock_label lbl, struct ha_rwlock *l,
start_time += now_mono_time();
HA_ATOMIC_ADD(&lock_stats_wr[lbl].nsec_wait, start_time);
start_time &= 0x3fffffff; // keep values below 1 billion only
bucket = flsnz((uint32_t)start_time + 1) - 1;
HA_ATOMIC_INC(&lock_stats_wr[lbl].buckets[bucket]);
@ -654,6 +655,7 @@ int __ha_rwlock_trywrlock(enum lock_label lbl, struct ha_rwlock *l,
}
HA_ATOMIC_ADD(&lock_stats_wr[lbl].nsec_wait, start_time);
start_time &= 0x3fffffff; // keep values below 1 billion only
bucket = flsnz((uint32_t)start_time ? (uint32_t)start_time : 1) - 1;
HA_ATOMIC_INC(&lock_stats_wr[lbl].buckets[bucket]);
@ -705,6 +707,7 @@ void __ha_rwlock_rdlock(enum lock_label lbl,struct ha_rwlock *l)
start_time += now_mono_time();
HA_ATOMIC_ADD(&lock_stats_rd[lbl].nsec_wait, start_time);
start_time &= 0x3fffffff; // keep values below 1 billion only
bucket = flsnz((uint32_t)start_time ? (uint32_t)start_time : 1) - 1;
HA_ATOMIC_INC(&lock_stats_rd[lbl].buckets[bucket]);
@ -734,6 +737,7 @@ int __ha_rwlock_tryrdlock(enum lock_label lbl,struct ha_rwlock *l)
HA_ATOMIC_ADD(&lock_stats_rd[lbl].nsec_wait, start_time);
start_time &= 0x3fffffff; // keep values below 1 billion only
bucket = flsnz((uint32_t)start_time ? (uint32_t)start_time : 1) - 1;
HA_ATOMIC_INC(&lock_stats_rd[lbl].buckets[bucket]);
@ -780,6 +784,7 @@ void __ha_rwlock_wrtord(enum lock_label lbl, struct ha_rwlock *l,
start_time += now_mono_time();
HA_ATOMIC_ADD(&lock_stats_rd[lbl].nsec_wait, start_time);
start_time &= 0x3fffffff; // keep values below 1 billion only
bucket = flsnz((uint32_t)start_time ? (uint32_t)start_time : 1) - 1;
HA_ATOMIC_INC(&lock_stats_rd[lbl].buckets[bucket]);
@ -813,6 +818,7 @@ void __ha_rwlock_wrtosk(enum lock_label lbl, struct ha_rwlock *l,
start_time += now_mono_time();
HA_ATOMIC_ADD(&lock_stats_sk[lbl].nsec_wait, start_time);
start_time &= 0x3fffffff; // keep values below 1 billion only
bucket = flsnz((uint32_t)start_time ? (uint32_t)start_time : 1) - 1;
HA_ATOMIC_INC(&lock_stats_sk[lbl].buckets[bucket]);
@ -843,6 +849,7 @@ void __ha_rwlock_sklock(enum lock_label lbl, struct ha_rwlock *l,
start_time += now_mono_time();
HA_ATOMIC_ADD(&lock_stats_sk[lbl].nsec_wait, start_time);
start_time &= 0x3fffffff; // keep values below 1 billion only
bucket = flsnz((uint32_t)start_time ? (uint32_t)start_time : 1) - 1;
HA_ATOMIC_INC(&lock_stats_sk[lbl].buckets[bucket]);
@ -875,6 +882,7 @@ void __ha_rwlock_sktowr(enum lock_label lbl, struct ha_rwlock *l,
start_time += now_mono_time();
HA_ATOMIC_ADD(&lock_stats_wr[lbl].nsec_wait, start_time);
start_time &= 0x3fffffff; // keep values below 1 billion only
bucket = flsnz((uint32_t)start_time ? (uint32_t)start_time : 1) - 1;
HA_ATOMIC_INC(&lock_stats_wr[lbl].buckets[bucket]);
@ -908,6 +916,7 @@ void __ha_rwlock_sktord(enum lock_label lbl, struct ha_rwlock *l,
start_time += now_mono_time();
HA_ATOMIC_ADD(&lock_stats_rd[lbl].nsec_wait, start_time);
start_time &= 0x3fffffff; // keep values below 1 billion only
bucket = flsnz((uint32_t)start_time ? (uint32_t)start_time : 1) - 1;
HA_ATOMIC_INC(&lock_stats_rd[lbl].buckets[bucket]);
@ -960,6 +969,7 @@ int __ha_rwlock_trysklock(enum lock_label lbl, struct ha_rwlock *l,
/* got the lock ! */
HA_ATOMIC_ADD(&lock_stats_sk[lbl].nsec_wait, start_time);
start_time &= 0x3fffffff; // keep values below 1 billion only
bucket = flsnz((uint32_t)start_time ? (uint32_t)start_time : 1) - 1;
HA_ATOMIC_INC(&lock_stats_sk[lbl].buckets[bucket]);
HA_ATOMIC_OR(&st->cur_seeker, tbit);
@ -997,6 +1007,7 @@ int __ha_rwlock_tryrdtosk(enum lock_label lbl, struct ha_rwlock *l,
/* got the lock ! */
HA_ATOMIC_ADD(&lock_stats_sk[lbl].nsec_wait, start_time);
start_time &= 0x3fffffff; // keep values below 1 billion only
bucket = flsnz((uint32_t)start_time ? (uint32_t)start_time : 1) - 1;
HA_ATOMIC_INC(&lock_stats_sk[lbl].buckets[bucket]);
HA_ATOMIC_OR(&st->cur_seeker, tbit);
@ -1042,6 +1053,7 @@ void __spin_lock(enum lock_label lbl, struct ha_spinlock *l,
start_time += now_mono_time();
HA_ATOMIC_ADD(&lock_stats_sk[lbl].nsec_wait, start_time);
start_time &= 0x3fffffff; // keep values below 1 billion only
bucket = flsnz((uint32_t)start_time ? (uint32_t)start_time : 1) - 1;
HA_ATOMIC_INC(&lock_stats_sk[lbl].buckets[bucket]);
@ -1078,6 +1090,7 @@ int __spin_trylock(enum lock_label lbl, struct ha_spinlock *l,
HA_ATOMIC_ADD(&lock_stats_sk[lbl].nsec_wait, start_time);
start_time &= 0x3fffffff; // keep values below 1 billion only
bucket = flsnz((uint32_t)start_time ? (uint32_t)start_time : 1) - 1;
HA_ATOMIC_INC(&lock_stats_sk[lbl].buckets[bucket]);