mirror of
https://git.haproxy.org/git/haproxy.git/
synced 2025-09-21 22:01:31 +02:00
Now instead of getting aggregate stats per called function, we have them per function AND per call place. The "byaddr" sort considers the function pointer first, then the call count, so that dominant callers of a given callee are instantly spotted. This allows to get sorted outputs like this: Tasks activity: function calls cpu_tot cpu_avg lat_tot lat_avg h1_io_cb 17357952 40.91s 2.357us 4.849m 16.76us <- sock_conn_iocb@src/sock.c:869 tasklet_wakeup sc_conn_io_cb 10357182 6.297s 607.0ns 27.93m 161.8us <- sc_app_chk_rcv_conn@src/stconn.c:762 tasklet_wakeup process_stream 9891131 1.809m 10.97us 53.61m 325.2us <- sc_notify@src/stconn.c:1209 task_wakeup process_stream 9823934 1.887m 11.52us 48.31m 295.1us <- stream_new@src/stream.c:563 task_wakeup sc_conn_io_cb 9347863 16.59s 1.774us 6.143m 39.43us <- h1_wake_stream_for_recv@src/mux_h1.c:2600 tasklet_wakeup h1_io_cb 501344 1.848s 3.686us 6.544m 783.2us <- conn_subscribe@src/connection.c:732 tasklet_wakeup sc_conn_io_cb 239717 492.3ms 2.053us 3.213m 804.3us <- qcs_notify_send@src/mux_quic.c:529 tasklet_wakeup h2_io_cb 173019 4.204s 24.30us 40.95s 236.7us <- h2_snd_buf@src/mux_h2.c:6712 tasklet_wakeup h2_io_cb 149487 424.3ms 2.838us 14.63s 97.87us <- h2c_restart_reading@src/mux_h2.c:856 tasklet_wakeup other 101893 4.626s 45.40us 14.84s 145.7us quic_lstnr_dghdlr 94389 614.0ms 6.504us 30.54s 323.6us <- quic_lstnr_dgram_dispatch@src/quic_sock.c:255 tasklet_wakeup quic_conn_app_io_cb 92205 3.735s 40.51us 390.9ms 4.239us <- qc_lstnr_pkt_rcv@src/xprt_quic.c:6184 tasklet_wakeup_after qc_io_cb 50355 19.01s 377.5us 10.65s 211.4us <- qc_treat_acked_tx_frm@src/xprt_quic.c:1695 tasklet_wakeup h1_io_cb 44427 155.0ms 3.489us 21.50s 484.0us <- h1_takeover@src/mux_h1.c:4085 tasklet_wakeup qc_io_cb 9018 4.924s 546.0us 3.084s 342.0us <- qc_stream_desc_ack@src/quic_stream.c:128 tasklet_wakeup h1_timeout_task 3236 1.172ms 362.0ns 1.119s 345.9us <- h1_release@src/mux_h1.c:1087 task_wakeup h1_io_cb 2804 7.974ms 2.843us 1.980s 706.0us <- sock_conn_iocb@src/sock.c:849 tasklet_wakeup sc_conn_io_cb 2804 33.44ms 11.92us 2.597s 926.2us <- h1_wake_stream_for_send@src/mux_h1.c:2610 tasklet_wakeup qc_io_cb 2623 2.669s 1.017ms 1.347s 513.5us <- h3_snd_buf@src/h3.c:1084 tasklet_wakeup qc_process_timer 662 526.4us 795.0ns 1.081s 1.633ms <- wake_expired_tasks@src/task.c:344 task_wakeup quic_conn_app_io_cb 648 12.62ms 19.47us 225.7ms 348.2us <- qc_process_timer@src/xprt_quic.c:4635 tasklet_wakeup accept_queue_process 286 1.571ms 5.494us 72.55ms 253.7us <- listener_accept@src/listener.c:1099 tasklet_wakeup process_resolvers 176 157.8us 896.0ns 7.835ms 44.52us <- wake_expired_tasks@src/task.c:429 task_drop_running qc_io_cb 167 10.71ms 64.12us 32.47ms 194.4us <- qc_process_timer@src/xprt_quic.c:4602 tasklet_wakeup sc_conn_io_cb 123 80.05us 650.0ns 50.35ms 409.4us <- qcs_notify_recv@src/mux_quic.c:519 tasklet_wakeup h2_timeout_task 32 30.69us 958.0ns 9.038ms 282.4us <- h2_release@src/mux_h2.c:1191 task_wakeup task_run_applet 24 33.79ms 1.408ms 5.838ms 243.3us <- sc_applet_create@src/stconn.c:489 appctx_wakeup accept_queue_process 17 56.34us 3.314us 7.505ms 441.5us <- accept_queue_process@src/listener.c:165 tasklet_wakeup srv_cleanup_toremove_conns 16 1.133ms 70.81us 5.685ms 355.3us <- srv_cleanup_idle_conns@src/server.c:5948 task_wakeup srv_cleanup_idle_conns 16 74.57us 4.660us 2.797ms 174.8us <- wake_expired_tasks@src/task.c:429 task_drop_running quic_conn_app_io_cb 12 786.9us 65.58us 2.042ms 170.1us <- qc_process_timer@src/xprt_quic.c:4589 tasklet_wakeup sc_conn_io_cb 9 20.55us 2.283us 2.475ms 275.0us <- sock_conn_iocb@src/sock.c:869 tasklet_wakeup h2_io_cb 8 34.12us 4.265us 1.784ms 223.0us <- h2_do_shutw@src/mux_h2.c:4656 tasklet_wakeup task_run_applet 4 6.615ms 1.654ms 2.306us 576.0ns <- sc_app_chk_snd_applet@src/stconn.c:996 appctx_wakeup quic_conn_io_cb 4 4.278ms 1.069ms 6.469us 1.617us <- qc_lstnr_pkt_rcv@src/xprt_quic.c:6184 tasklet_wakeup_after qc_io_cb 2 20.81us 10.40us 4.943us 2.471us <- qc_init@src/mux_quic.c:2057 tasklet_wakeup quic_conn_app_io_cb 2 752.9us 376.4us 63.97us 31.99us <- qc_xprt_start@src/xprt_quic.c:7122 tasklet_wakeup quic_accept_run 2 13.84us 6.920us 172.8us 86.42us <- quic_accept_push_qc@src/quic_sock.c:458 tasklet_wakeup qc_idle_timer_task 2 295.0us 147.5us 8.761us 4.380us <- wake_expired_tasks@src/task.c:344 task_wakeup qc_io_cb 1 867.1us 867.1us 812.8us 812.8us <- qcs_consume@src/mux_quic.c:800 tasklet_wakeup ... and calls sorted by address like this: Tasks activity: function calls cpu_tot cpu_avg lat_tot lat_avg task_run_applet 23 32.73ms 1.423ms 5.837ms 253.8us <- sc_applet_create@src/stconn.c:489 appctx_wakeup task_run_applet 4 6.615ms 1.654ms 2.306us 576.0ns <- sc_app_chk_snd_applet@src/stconn.c:996 appctx_wakeup accept_queue_process 285 1.566ms 5.495us 72.49ms 254.3us <- listener_accept@src/listener.c:1099 tasklet_wakeup accept_queue_process 17 56.34us 3.314us 7.505ms 441.5us <- accept_queue_process@src/listener.c:165 tasklet_wakeup sc_conn_io_cb 10357182 6.297s 607.0ns 27.93m 161.8us <- sc_app_chk_rcv_conn@src/stconn.c:762 tasklet_wakeup sc_conn_io_cb 9347863 16.59s 1.774us 6.143m 39.43us <- h1_wake_stream_for_recv@src/mux_h1.c:2600 tasklet_wakeup sc_conn_io_cb 239717 492.3ms 2.053us 3.213m 804.3us <- qcs_notify_send@src/mux_quic.c:529 tasklet_wakeup sc_conn_io_cb 2804 33.44ms 11.92us 2.597s 926.2us <- h1_wake_stream_for_send@src/mux_h1.c:2610 tasklet_wakeup sc_conn_io_cb 123 80.05us 650.0ns 50.35ms 409.4us <- qcs_notify_recv@src/mux_quic.c:519 tasklet_wakeup sc_conn_io_cb 9 20.55us 2.283us 2.475ms 275.0us <- sock_conn_iocb@src/sock.c:869 tasklet_wakeup process_resolvers 159 145.9us 917.0ns 7.823ms 49.20us <- wake_expired_tasks@src/task.c:429 task_drop_running srv_cleanup_idle_conns 16 74.57us 4.660us 2.797ms 174.8us <- wake_expired_tasks@src/task.c:429 task_drop_running srv_cleanup_toremove_conns 16 1.133ms 70.81us 5.685ms 355.3us <- srv_cleanup_idle_conns@src/server.c:5948 task_wakeup process_stream 9891130 1.809m 10.97us 53.61m 325.2us <- sc_notify@src/stconn.c:1209 task_wakeup process_stream 9823933 1.887m 11.52us 48.31m 295.1us <- stream_new@src/stream.c:563 task_wakeup h1_io_cb 17357952 40.91s 2.357us 4.849m 16.76us <- sock_conn_iocb@src/sock.c:869 tasklet_wakeup h1_io_cb 501344 1.848s 3.686us 6.544m 783.2us <- conn_subscribe@src/connection.c:732 tasklet_wakeup h1_io_cb 44427 155.0ms 3.489us 21.50s 484.0us <- h1_takeover@src/mux_h1.c:4085 tasklet_wakeup h1_io_cb 2804 7.974ms 2.843us 1.980s 706.0us <- sock_conn_iocb@src/sock.c:849 tasklet_wakeup h1_timeout_task 3236 1.172ms 362.0ns 1.119s 345.9us <- h1_release@src/mux_h1.c:1087 task_wakeup h2_timeout_task 32 30.69us 958.0ns 9.038ms 282.4us <- h2_release@src/mux_h2.c:1191 task_wakeup h2_io_cb 173019 4.204s 24.30us 40.95s 236.7us <- h2_snd_buf@src/mux_h2.c:6712 tasklet_wakeup h2_io_cb 149487 424.3ms 2.838us 14.63s 97.87us <- h2c_restart_reading@src/mux_h2.c:856 tasklet_wakeup h2_io_cb 8 34.12us 4.265us 1.784ms 223.0us <- h2_do_shutw@src/mux_h2.c:4656 tasklet_wakeup qc_io_cb 50355 19.01s 377.5us 10.65s 211.4us <- qc_treat_acked_tx_frm@src/xprt_quic.c:1695 tasklet_wakeup qc_io_cb 9018 4.924s 546.0us 3.084s 342.0us <- qc_stream_desc_ack@src/quic_stream.c:128 tasklet_wakeup qc_io_cb 2623 2.669s 1.017ms 1.347s 513.5us <- h3_snd_buf@src/h3.c:1084 tasklet_wakeup qc_io_cb 167 10.71ms 64.12us 32.47ms 194.4us <- qc_process_timer@src/xprt_quic.c:4602 tasklet_wakeup qc_io_cb 2 20.81us 10.40us 4.943us 2.471us <- qc_init@src/mux_quic.c:2057 tasklet_wakeup qc_io_cb 1 867.1us 867.1us 812.8us 812.8us <- qcs_consume@src/mux_quic.c:800 tasklet_wakeup qc_idle_timer_task 2 295.0us 147.5us 8.761us 4.380us <- wake_expired_tasks@src/task.c:344 task_wakeup quic_conn_io_cb 4 4.278ms 1.069ms 6.469us 1.617us <- qc_lstnr_pkt_rcv@src/xprt_quic.c:6184 tasklet_wakeup_after quic_conn_app_io_cb 92205 3.735s 40.51us 390.9ms 4.239us <- qc_lstnr_pkt_rcv@src/xprt_quic.c:6184 tasklet_wakeup_after quic_conn_app_io_cb 648 12.62ms 19.47us 225.7ms 348.2us <- qc_process_timer@src/xprt_quic.c:4635 tasklet_wakeup quic_conn_app_io_cb 12 786.9us 65.58us 2.042ms 170.1us <- qc_process_timer@src/xprt_quic.c:4589 tasklet_wakeup quic_conn_app_io_cb 2 752.9us 376.4us 63.97us 31.99us <- qc_xprt_start@src/xprt_quic.c:7122 tasklet_wakeup quic_lstnr_dghdlr 94389 614.0ms 6.504us 30.54s 323.6us <- quic_lstnr_dgram_dispatch@src/quic_sock.c:255 tasklet_wakeup qc_process_timer 662 526.4us 795.0ns 1.081s 1.633ms <- wake_expired_tasks@src/task.c:344 task_wakeup quic_accept_run 2 13.84us 6.920us 172.8us 86.42us <- quic_accept_push_qc@src/quic_sock.c:458 tasklet_wakeup other 101892 4.626s 45.40us 14.84s 145.7us It already becomes visible that some tasks have different very costs depending where they're called (e.g. process_stream). The method used to wake them up is also shown. Applets are handled specially and shown as appctx_wakeup.
143 lines
5.6 KiB
C
143 lines
5.6 KiB
C
/*
|
|
* include/haproxy/activity-t.h
|
|
* This file contains structure declarations for activity measurements.
|
|
*
|
|
* Copyright (C) 2000-2020 Willy Tarreau - w@1wt.eu
|
|
*
|
|
* This library is free software; you can redistribute it and/or
|
|
* modify it under the terms of the GNU Lesser General Public
|
|
* License as published by the Free Software Foundation, version 2.1
|
|
* exclusively.
|
|
*
|
|
* This library is distributed in the hope that it will be useful,
|
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
|
* Lesser General Public License for more details.
|
|
*
|
|
* You should have received a copy of the GNU Lesser General Public
|
|
* License along with this library; if not, write to the Free Software
|
|
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
|
*/
|
|
|
|
#ifndef _HAPROXY_ACTIVITY_T_H
|
|
#define _HAPROXY_ACTIVITY_T_H
|
|
|
|
#include <haproxy/api-t.h>
|
|
#include <haproxy/freq_ctr-t.h>
|
|
|
|
/* bit fields for the "profiling" global variable */
|
|
#define HA_PROF_TASKS_OFF 0x00000000 /* per-task CPU profiling forced disabled */
|
|
#define HA_PROF_TASKS_AOFF 0x00000001 /* per-task CPU profiling off (automatic) */
|
|
#define HA_PROF_TASKS_AON 0x00000002 /* per-task CPU profiling on (automatic) */
|
|
#define HA_PROF_TASKS_ON 0x00000003 /* per-task CPU profiling forced enabled */
|
|
#define HA_PROF_TASKS_MASK 0x00000003 /* per-task CPU profiling mask */
|
|
|
|
#define HA_PROF_MEMORY 0x00000004 /* memory profiling */
|
|
|
|
|
|
#ifdef USE_MEMORY_PROFILING
|
|
/* Elements used by memory profiling. This determines the number of buckets to
|
|
* store stats.
|
|
*/
|
|
#define MEMPROF_HASH_BITS 10
|
|
#define MEMPROF_HASH_BUCKETS (1U << MEMPROF_HASH_BITS)
|
|
|
|
enum memprof_method {
|
|
MEMPROF_METH_UNKNOWN = 0,
|
|
MEMPROF_METH_MALLOC,
|
|
MEMPROF_METH_CALLOC,
|
|
MEMPROF_METH_REALLOC,
|
|
MEMPROF_METH_FREE,
|
|
MEMPROF_METH_P_ALLOC, // pool_alloc()
|
|
MEMPROF_METH_P_FREE, // pool_free()
|
|
MEMPROF_METH_METHODS /* count, must be last */
|
|
};
|
|
|
|
/* stats:
|
|
* - malloc increases alloc
|
|
* - free increases free (if non null)
|
|
* - realloc increases either depending on the size change.
|
|
* when the real size is known (malloc_usable_size()), it's used in free_tot
|
|
* and alloc_tot, otherwise the requested size is reported in alloc_tot and
|
|
* zero in free_tot.
|
|
*/
|
|
struct memprof_stats {
|
|
const void *caller;
|
|
enum memprof_method method;
|
|
/* 4-7 bytes hole here */
|
|
unsigned long long alloc_calls;
|
|
unsigned long long free_calls;
|
|
unsigned long long alloc_tot;
|
|
unsigned long long free_tot;
|
|
void *info; // for pools, ptr to the pool
|
|
void *pad; // pad to 64
|
|
};
|
|
#endif
|
|
|
|
/* per-thread activity reports. It's important that it's aligned on cache lines
|
|
* because some elements will be updated very often. Most counters are OK on
|
|
* 32-bit since this will be used during debugging sessions for troubleshooting
|
|
* in iterative mode.
|
|
*/
|
|
struct activity {
|
|
unsigned int loops; // complete loops in run_poll_loop()
|
|
unsigned int wake_tasks; // active tasks prevented poll() from sleeping
|
|
unsigned int wake_signal; // pending signal prevented poll() from sleeping
|
|
unsigned int poll_io; // number of times poll() reported I/O events
|
|
unsigned int poll_exp; // number of times poll() sees an expired timeout (includes wake_*)
|
|
unsigned int poll_drop_fd; // poller dropped a dead FD from the update list
|
|
unsigned int poll_skip_fd; // poller skipped another thread's FD
|
|
unsigned int conn_dead; // conn_fd_handler woke up on an FD indicating a dead connection
|
|
unsigned int stream_calls; // calls to process_stream()
|
|
unsigned int ctxsw; // total number of context switches
|
|
unsigned int tasksw; // total number of task switches
|
|
unsigned int empty_rq; // calls to process_runnable_tasks() with nothing for the thread
|
|
unsigned int long_rq; // process_runnable_tasks() left with tasks in the run queue
|
|
unsigned int cpust_total; // sum of half-ms stolen per thread
|
|
unsigned int fd_takeover; // number of times this thread stole another one's FD
|
|
ALWAYS_ALIGN(64);
|
|
|
|
struct freq_ctr cpust_1s; // avg amount of half-ms stolen over last second
|
|
struct freq_ctr cpust_15s; // avg amount of half-ms stolen over last 15s
|
|
unsigned int avg_loop_us; // average run time per loop over last 1024 runs
|
|
unsigned int accepted; // accepted incoming connections
|
|
unsigned int accq_pushed; // accept queue connections pushed
|
|
unsigned int accq_full; // accept queue connection not pushed because full
|
|
unsigned int pool_fail; // failed a pool allocation
|
|
unsigned int buf_wait; // waited on a buffer allocation
|
|
#if defined(DEBUG_DEV)
|
|
/* keep these ones at the end */
|
|
unsigned int ctr0; // general purposee debug counter
|
|
unsigned int ctr1; // general purposee debug counter
|
|
unsigned int ctr2; // general purposee debug counter
|
|
#endif
|
|
char __pad[0]; // unused except to check remaining room
|
|
char __end[0] __attribute__((aligned(64))); // align size to 64.
|
|
};
|
|
|
|
/* 256 entries for callers * callees should be highly sufficient (~45 seen usually) */
|
|
#define SCHED_ACT_HASH_BITS 8
|
|
#define SCHED_ACT_HASH_BUCKETS (1U << SCHED_ACT_HASH_BITS)
|
|
|
|
/* global profiling stats from the scheduler: each entry corresponds to a
|
|
* task or tasklet ->process function pointer, with a number of calls and
|
|
* a total time. Each entry is unique, except entry 0 which is for colliding
|
|
* hashes (i.e. others). All of these must be accessed atomically.
|
|
*/
|
|
struct sched_activity {
|
|
const void *func;
|
|
const struct ha_caller *caller;
|
|
uint64_t calls;
|
|
uint64_t cpu_time;
|
|
uint64_t lat_time;
|
|
};
|
|
|
|
#endif /* _HAPROXY_ACTIVITY_T_H */
|
|
|
|
/*
|
|
* Local variables:
|
|
* c-indent-level: 8
|
|
* c-basic-offset: 8
|
|
* End:
|
|
*/
|