From 69bc0e7351e0c895d7fe0b8962672aea5fd31427 Mon Sep 17 00:00:00 2001 From: Pavel Punsky Date: Sun, 3 May 2026 22:03:08 -0700 Subject: [PATCH] Load generator mode in turnutils_uclient (#1894) ## Summary Adds load-generator modes to `turnutils_uclient` for repeatable TURN server performance testing: - Adds `-Y packet|alloc|invalid` load modes. - Supports packet flood, allocation flood, and invalid-packet flood workflows. - Adds unique local client ports for allocation flood mode. - Removes default packet pacing in load-generator modes unless explicitly set. - Adds helper scripts under `examples/loadtest/`. - Documents load-test usage in `README.turnutils`, `man/man1/turnutils.1`, `CLAUDE.md`, and `docs/PerformanceIterationLog.md`. The performance log captures DigitalOcean benchmark methodology, A/B lessons, hot-path findings, and future optimization candidates. --- CLAUDE.md | 203 +++++++++++++++++ README.turnutils | 18 ++ docs/PerformanceIterationLog.md | 268 ++++++++++++++++++++++ examples/loadtest/allocation_flood.sh | 45 ++++ examples/loadtest/invalid_flood.sh | 41 ++++ examples/loadtest/packet_flood.sh | 49 +++++ man/man1/turnutils.1 | 19 ++ src/apps/uclient/mainuclient.c | 74 ++++++- src/apps/uclient/startuclient.c | 135 +++++++++++- src/apps/uclient/startuclient.h | 9 + src/apps/uclient/uclient.c | 306 +++++++++++++++++++++++--- src/apps/uclient/uclient.h | 13 ++ 12 files changed, 1136 insertions(+), 44 deletions(-) create mode 100644 docs/PerformanceIterationLog.md create mode 100755 examples/loadtest/allocation_flood.sh create mode 100755 examples/loadtest/invalid_flood.sh create mode 100755 examples/loadtest/packet_flood.sh diff --git a/CLAUDE.md b/CLAUDE.md index 752566df..dd1ceb86 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -134,6 +134,209 @@ cd examples && ./scripts/basic/udp_c2c_client.sh cd examples && ./run_tests.sh ``` +## Load Test on DigitalOcean + +Use two same-region CPU-optimized droplets for repeatable load tests. The last +known setup used Ubuntu 24.04 `c-4` droplets in `nyc1`: + +- turnserver droplet private IP: `10.116.0.2` +- loadgen droplet private IP: `10.116.0.3` +- build: current branch archived with `git archive` +- important baseline: turnserver was **not** run with `--udp-recvmmsg` + +Never paste DigitalOcean tokens into logs or files. Use a local environment +variable such as `DIGITALOCEAN_TOKEN`, and revoke temporary tokens after the +run. + +Local source package and upload: + +```bash +git archive --format=tar HEAD -o /tmp/coturn.tar + +scp /tmp/coturn.tar root@TURN_PUBLIC_IP:/root/coturn.tar +scp /tmp/coturn.tar root@LOADGEN_PUBLIC_IP:/root/coturn.tar +``` + +Install dependencies and build on both droplets: + +```bash +export DEBIAN_FRONTEND=noninteractive +apt-get update +apt-get install -y build-essential cmake pkg-config libssl-dev libevent-dev \ + libsqlite3-dev libhiredis-dev git iproute2 sysstat + +rm -rf /root/coturn +mkdir /root/coturn +tar -xf /root/coturn.tar -C /root/coturn +cmake -S /root/coturn -B /root/coturn/build -DCMAKE_BUILD_TYPE=Release +cmake --build /root/coturn/build --target turnserver turnutils_uclient turnutils_peer -j$(nproc) +``` + +Start `turnserver` on the server droplet. This is the baseline command used for +the final run; add `--udp-recvmmsg` only when intentionally comparing that mode: + +```bash +pkill -x turnserver || true +sysctl -w net.core.rmem_max=134217728 net.core.wmem_max=134217728 \ + net.core.netdev_max_backlog=250000 || true +ulimit -n 1048576 + +nohup /root/coturn/build/bin/turnserver \ + --use-auth-secret \ + --static-auth-secret=secret \ + --realm=north.gov \ + --allow-loopback-peers \ + --listening-ip=10.116.0.2 \ + --relay-ip=10.116.0.2 \ + --min-port=49152 \ + --max-port=65535 \ + --no-cli \ + --no-tls \ + --no-dtls \ + --log-file=stdout \ + --simple-log \ + > /root/turnserver.log 2>&1 & +echo $! > /root/turnserver.pid +``` + +Start the UDP peer on the loadgen droplet: + +```bash +pkill -x turnutils_peer || true +sysctl -w net.core.rmem_max=134217728 net.core.wmem_max=134217728 \ + net.core.netdev_max_backlog=250000 || true +ulimit -n 1048576 + +nohup /root/coturn/build/bin/turnutils_peer -L 10.116.0.3 -p 3480 \ + > /root/peer.log 2>&1 & +echo $! > /root/peer.pid +``` + +Optional server-side monitor, run on the turnserver droplet before each test: + +```bash +cat > /root/start_monitor.sh <<'EOF' +#!/bin/bash +label=$1 +pid=$(cat /root/turnserver.pid) +rm -f /root/${label}_*.txt +nohup bash -c "pidstat -h -u -r -p $pid 1 14 > /root/${label}_pidstat.txt & \ + mpstat 1 14 > /root/${label}_mpstat.txt & \ + sar -n DEV 1 14 > /root/${label}_sar.txt & wait" \ + > /root/${label}_monitor.out 2>&1 & +echo $! > /root/${label}_monitor.pid +EOF +chmod +x /root/start_monitor.sh +``` + +Connectivity smoke from loadgen: + +```bash +/root/coturn/build/bin/turnutils_uclient \ + -Y packet -m 1 -n 1000 -l 120 \ + -e 10.116.0.3 -r 3480 -X -g \ + -u user -W secret \ + 10.116.0.2 +``` + +Packet relay sweep from loadgen: + +```bash +for m in 1 2 4 8 16 32; do + log=/root/packet_m${m}.log + timeout -s INT 12s /root/coturn/build/bin/turnutils_uclient \ + -Y packet -m "$m" -l 120 \ + -e 10.116.0.3 -r 3480 -X -g \ + -u user -W secret \ + 10.116.0.2 > "$log" 2>&1 || true + tail -20 "$log" +done +``` + +Monitored packet run: + +```bash +# on turnserver +/root/start_monitor.sh packet_m1_mon + +# on loadgen +timeout -s INT 12s /root/coturn/build/bin/turnutils_uclient \ + -Y packet -m 1 -l 120 \ + -e 10.116.0.3 -r 3480 -X -g \ + -u user -W secret \ + 10.116.0.2 > /root/packet_m1_mon.log 2>&1 || true +``` + +Packet-only CPU profile, useful when checking the relay bottleneck. Build with +`-DCMAKE_BUILD_TYPE=RelWithDebInfo` if you want readable user-space symbols. +Run once without `--udp-recvmmsg`, then restart `turnserver` with +`--udp-recvmmsg` and rerun the same commands with the `recvmmsg` label: + +```bash +# on turnserver +sysctl -w kernel.perf_event_paranoid=-1 kernel.kptr_restrict=0 || true +pid=$(cat /root/turnserver.pid) +label=no_recvmmsg + +(pidstat -h -u -r -p "$pid" 1 14 > /root/${label}_pidstat.txt & \ + mpstat 1 14 > /root/${label}_mpstat.txt & \ + sar -n DEV 1 14 > /root/${label}_sar.txt & wait) \ + > /root/${label}_monitor.out 2>&1 & + +perf record -F 99 -g -p "$pid" -o /root/${label}.perf.data -- sleep 14 +perf report --stdio -i /root/${label}.perf.data --no-children \ + --sort comm,dso,symbol > /root/${label}_perf.report +perf report --stdio -i /root/${label}.perf.data --children \ + --sort symbol,dso > /root/${label}_perf.children + +# on loadgen, started about one second after perf starts +timeout -s INT 12s /root/coturn/build/bin/turnutils_uclient \ + -Y packet -m 1 -l 120 \ + -e 10.116.0.3 -r 3480 -X -g \ + -u user -W secret \ + 10.116.0.2 > /root/${label}_packet_m1.log 2>&1 || true +``` + +Invalid-packet flood: + +```bash +# on turnserver +/root/start_monitor.sh invalid_m1_mon + +# on loadgen +timeout -s INT 12s /root/coturn/build/bin/turnutils_uclient \ + -Y invalid -m 1 -l 16 \ + 10.116.0.2 > /root/invalid_m1_mon.log 2>&1 || true +``` + +Restart `turnserver` after invalid-packet tests before allocation tests. The +last run saw rapid RSS growth during invalid flood, so avoid chaining tests on +the same server process. + +Allocation flood: + +```bash +# on turnserver +/root/start_monitor.sh alloc_10000_mon + +# on loadgen +/root/coturn/build/bin/turnutils_uclient \ + -Y alloc -m 50 -n 200 \ + -L 10.116.0.3 \ + -u user -W secret \ + 10.116.0.2 > /root/alloc_10000.log 2>&1 +``` + +Useful summaries: + +```bash +grep -h 'send_pps=' /root/packet_m*.log /root/*_mon.log | tail -50 +grep -h 'total_allocations=' /root/alloc_*.log | tail -20 +ps -o pid,rss,vsz,pcpu,pmem,comm -p $(cat /root/turnserver.pid) +tail -20 /root/*_pidstat.txt +tail -20 /root/*_sar.txt +``` + ### Unit tests (Unity, opt-in via `BUILD_TESTING=ON`) Unity is fetched on demand via CMake `FetchContent`; nothing is vendored. diff --git a/README.turnutils b/README.turnutils index d318222a..46b46dc4 100644 --- a/README.turnutils +++ b/README.turnutils @@ -123,6 +123,12 @@ Flags: -J Use oAuth with default test key kid='north'. +-Y Load-generator mode: + packet floods data through a single TURN allocation as fast as possible, + alloc creates allocations as fast as possible, + and invalid sends small invalid packets to the TURN listener as fast as possible. + Load-generator modes imply -c and do not support -y. + Options with required values: -l Message length (Default: 100 Bytes). @@ -137,6 +143,8 @@ Options with required values: -p TURN Server port (Defaults: 3478 unsecure, 5349 secure). -n Number of messages to send (Default: 5). + In load-generator mode, -n is the number of operations per client. + If omitted there, the client runs until interrupted. -d Local interface device (optional, Linux only). @@ -149,6 +157,7 @@ Options with required values: -r Peer port (Default: 3480). -z Per-session packet interval in milliseconds (Default: 20). + In packet and invalid load-generator modes the default is 0 ms. -u STUN/TURN user name. @@ -168,6 +177,15 @@ Options with required values: -a Bandwidth for the bandwidth request in ALLOCATE. The default value is zero. +Notes for load-generator mode: + +- packet mode still performs the normal TURN allocation/setup and then starts sending immediately with no pacing. + +- alloc mode does not require -e; it repeatedly establishes new allocations and closes them again. +- alloc mode does not require -e; it repeatedly establishes new allocations, uses a unique client local port for each one, attaches each one to a unique synthetic peer ip:port, and closes them again. + +- invalid mode does not require -e; by default it uses 16-byte payloads unless -l is specified. + See the examples in the "examples/scripts" directory. ====================================== diff --git a/docs/PerformanceIterationLog.md b/docs/PerformanceIterationLog.md new file mode 100644 index 00000000..8316394c --- /dev/null +++ b/docs/PerformanceIterationLog.md @@ -0,0 +1,268 @@ +# Performance iteration log + +Running notes for the multi-iteration performance work on the UDP relay +data path. Pick this up to continue without re-deriving everything. + +The harness, baseline command, and droplet topology are documented in +[CLAUDE.md](../CLAUDE.md) under "Load Test on DigitalOcean" — this file +captures the *deltas*: what was measured, what landed, what didn't, and +where the next round should go. + +## Cumulative result + +Five commits on `claude/beautiful-black-c3b741` between `727ec2ab` +("loadgen") and `321a2d18`: + +| # | Commit | Optimization | +|---|---|---| +| 1 | `ce7e7e53` | Hoist `turn_server_get_engine()` out of per-packet hot path | +| 2 | `8e28491a` | `ioa_socket_check_bandwidth` early fast-exit; drop dead `if (!(s->done \|\| s->fd==-1))` in `send_data_from_ioa_socket_nbh` | +| 3 | `344360f6` | Cache `get_relay_socket_ss()` and `ioa_network_buffer_get_size()` in `write_to_peerchannel`, `handle_turn_send`, `read_client_connection` | +| 4 | `a6f6767f` | Inline `get_ioa_addr_len()` via `ns_turn_ioaddr.h` | +| 5 | `321a2d18` | Inline `addr_cpy()` via `ns_turn_ioaddr.h` | + +Alternating A/B run on the same droplet pair, m=1 packet flood, 30 s +per run, with a 4 s warm-up between binary swaps: + +- Baseline (clean `master` binary): mean 146,984 round-trips / 30 s +- Cumulative (all 5 iters): mean 155,468 round-trips / 30 s +- **+5.8 % throughput** + +Per-iteration deltas were within run-to-run noise (~5–10 % variance). +The cumulative effect is what's visible. + +## Test setup that was used + +Two `c-4` Ubuntu 24.04 droplets in `nyc1`, same VPC `default-nyc1`: + +- `coturn-turnserver` — public `68.183.121.197`, private `10.116.0.2` +- `coturn-loadgen` — public `68.183.132.220`, private `10.116.0.3` + +Created via the DigitalOcean v2 API (`doctl` is *not* installed; use +`curl` + `$DIGITALOCEAN_TOKEN` from the user's `~/.zshrc`). SSH via +`~/.ssh/id_rsa` (matches DO ssh key id `23704483`, fingerprint +`37:3a:9b:e3:1e:1a:9b:42:a0:6f:58:f5:5a:3a:6a:2c`). + +State on the turnserver droplet (kept across iterations): + +- `/root/coturn_clean.tar` — `git archive HEAD` of master at start of run. + Re-extract this before applying any new patch. +- `/root/coturn_baseline/build/bin/turnserver` — clean baseline binary, + used as the "B" in every A/B round. **Don't overwrite.** +- `/root/coturn/build/bin/turnserver` — current iteration binary. +- `/root/start_turnserver.sh`, `/root/baseline_run.sh` — helper scripts. + +State on the loadgen droplet: + +- `/root/coturn/build/bin/turnutils_uclient`, `turnutils_peer`. +- `turnutils_peer` runs as a daemon on `10.116.0.3:3480` + (`pid` in `/root/peer.pid`). + +A small env file was written to `/tmp/coturn_perf_env.sh` on the local +machine with the IPs / droplet IDs — recreate it from the current +state of the DO account if it gets lost. + +The standard packet-flood command (matches CLAUDE.md baseline, runs +*without* `--udp-recvmmsg`): + +```bash +timeout -s INT 30s /root/coturn/build/bin/turnutils_uclient \ + -Y packet -m 1 -l 120 \ + -e 10.116.0.3 -r 3480 -X -g \ + -u user -W secret \ + 10.116.0.2 +``` + +Metric: the `tot_recv_msgs` field on the last `start_mclient:` log +line. (This is round-trips through the relay over the test window; +`send_pps` is loadgen-side only and can hit 262 K even when the relay +is dropping most of them, so it's not a useful proxy for relay +throughput.) + +## Hot-path map at the end of iter 5 + +`perf record -F 99 -g` on the turnserver during a 12 s `-Y packet -m 1` +run, sorted by user-space self-time: + +``` +0.80 % send_data_from_ioa_socket_nbh +0.76 % socket_input_worker +0.69 % read_client_connection.isra.0 +0.60 % turn_report_session_usage +0.53 % peer_input_handler +0.51 % udp_server_input_handler +0.35 % udp_recvfrom # was 0.76 % at iter 1 +0.34 % lm_map_get +0.27 % stun_is_channel_message_str +0.27 % get_relay_socket +0.26 % ioa_socket_check_bandwidth # was 0.33 % at iter 1 +0.26 % udp_send # was 0.60 % at iter 1 +0.18 % ioa_network_buffer_get_size +``` + +Total user-space coturn cycles: ~5–7 % of the relay thread. +The relay thread sits at ~100 % CPU pinned to one core; the 4 relay +threads aren't parallelised by the m=1 single-flow test (one 5-tuple +hashes to one SO_REUSEPORT worker). + +Kernel side (children-aggregated) is the real cost: + +``` +36 % udp_sendmsg (sendto path) +14 % udp_recvmsg +17 % ip_finish_output / ip_output / __dev_queue_xmit +~23 % syscall enter / exit machinery (sysret, SYSRETQ, SYSCALL_64*) +``` + +That ~23 % syscall overhead is the next big lever. Halving it +(via batching) is worth ~10 % wall-clock CPU. + +## What didn't work + +### Default `--udp-recvmmsg=true` on Linux (tried in iter 1, reverted) + +The flag exists and is wired to `receive_udp_batch_recvmmsg` in +[dtls_listener.c](../src/apps/relay/dtls_listener.c), but **only on +the listener socket** — the unconnected `udp_listen_s` that handles +the *first* packet from a new client. Once `dtls_listener` calls +`create_new_connected_udp_socket` (line ~583), subsequent +client→relay traffic on that 5-tuple goes through a per-session +*connected* UDP socket whose libevent callback is +`socket_input_handler` → `socket_input_worker` → +`udp_recvfrom` (single `recvmsg`). Same on the peer→relay direction. + +In a steady-state packet flood with one client, almost zero packets +hit the listener path, so flipping the default does nothing for this +test. It would help a many-client / many-allocate workload, but +that's not what the m=1 harness measures. + +Throughput parity confirmed across multiple A/B rounds; reverted to +keep the baseline mental model in CLAUDE.md intact. + +### Caching `get_relay_socket_ss` (iter 3) — no measurable wall-clock win + +The function is `static inline` already and the underlying +`get_relay_socket()` is a four-line accessor. Caching the result +*does* save a cross-TU function call per packet (the compiler can't +prove `get_relay_socket` pure across the +`set_df_on_ioa_socket` / `ioa_network_buffer_*` calls in between), +which the perf profile picked up as a small redistribution, but +throughput stayed in the noise band. Kept anyway: the cleanup is +defensible and matches the iter 4/5 inlining direction. + +## Methodology lessons + +- **Always alternate A/B per round** rather than running 5×B then 5×I. + The droplet pair has noticeable environmental drift over a few + minutes (other tenants on the hypervisor, NIC ring backpressure, + whatever); sequential blocks bias whichever binary ran on the worse + half of the run. +- **Discard the first run after a turnserver restart.** The loadgen's + first run after a server restart is consistently 30–80 % slower + than steady-state — looks like channel/permission state in the + client side warming up, not the server. A 4 s "throwaway" run + before the measured 30 s run is enough. +- **Run-to-run variance is ~5–10 %** even with alternation. Plan on + 6–8 rounds (≈ 8 minutes wall-clock) before claiming a sub-10 % win. + A single 3-round A/B will lie to you. +- **Use the `tot_recv_msgs` field, not `send_pps`**. Loadgen send rate + saturates at ~262 K pps regardless of relay capacity — it's + whatever the loadgen kernel will accept into its UDP send buffer. + The receive count is what made it round-trip through the relay. +- **The relay is kernel-bound.** User-space coturn is ~5 % of cycles. + Halving it gives at most ~2.5 % wall-clock — usually undetectable + per-iteration, only visible cumulatively. Don't expect a 10 % jump + from a CSE. +- **Single-flow tests pin one core.** With `SO_REUSEPORT` the kernel + hashes 5-tuples to worker sockets; one client → one tuple → one + worker thread. The other 3 cores sit idle. To exercise all 4 relay + threads you'd need m≥4 *with distinct source ports* — ours don't + spread cleanly because the loadgen reuses ports. +- **Don't re-extract `/root/coturn` between iterations** if you want + to keep `git apply`-style patches working. The droplet copy is *not* + a git checkout (it's the `git archive` tar). Use `patch -p1`. Each + iteration uploaded a *cumulative* diff (current branch vs `master`) + and re-extracted from `/root/coturn_clean.tar` first to get a clean + apply. + +## Optimization backlog (bigger fish for next session) + +Ordered by expected impact for the m=1 packet-flood metric: + +1. **Extend `recvmmsg` into `socket_input_worker`** for plain UDP + non-DTLS sockets. The existing `try_again` loop in + [ns_ioalib_engine_impl.c:2683](../src/apps/relay/ns_ioalib_engine_impl.c#L2683) + already drains up to `MAX_TRIES = 16` packets per epoll wakeup via + 16 single `recvmsg` calls. Replacing the inner read with a + `recvmmsg` of up to 16 messages saves ~15 syscalls per drain + iteration. At ~14 % `udp_recvmsg` kernel + ~6 % syscall machinery + on the recv side, plausible 8–12 % throughput. Risk: the function + is heavily branched (TCP / TLS / DTLS / UDP all share the body) + and state can change mid-loop (`s->tobeclosed` etc.); the cleanest + shape is a separate UDP-only helper called from + `socket_input_handler` *before* falling through to the existing + `socket_input_worker`, gated on `s->ssl == NULL && s->bev == NULL + && !s->parent_s`. **This is the highest-value remaining item.** + +2. **`sendmmsg` batched send.** Each successful packet fires one + `sendto`. After (1) lands, when the receive loop hands a batch of + N packets to the dispatch layer in one go, the corresponding sends + could be coalesced into one `sendmmsg`. Requires a lightweight + per-thread send queue and a flush at the end of each event-loop + tick. Bigger refactor; expect another ~10 % if (1) lands. + +3. **GSO (`UDP_SEGMENT`)** on the send path. Linux can take one + "large" datagram and segment it in the kernel for back-to-back + packets to the same destination. Our channel-data flood IS + same-destination. Setting `UDP_SEGMENT` and submitting a single + `sendmsg` of N×packet_size cuts skb-alloc / `__dev_queue_xmit` + work substantially. Needs careful handling for short tails and + non-uniform sizes; complementary to (2). + +4. **Inline more cross-TU per-packet accessors.** Pattern from iter + 4/5 still applies: `addr_eq` (called per channel-data packet for + permission lookup), `ioa_network_buffer_get_size`, + `get_ioa_socket_type` / `_app_type`. Each is small enough; the + only reason to be cautious is they're declared in `ns_turn_ioalib.h` + which is part of the public-ish server library API — moving the + body inline doesn't break ABI but does require a recompile of all + consumers. Likely <1 % each but cheap to do. + +5. **Re-evaluate `--udp-recvmmsg` default after (1) lands.** Once + per-session sockets also batch, the listener path is no longer a + special case and turning it on by default becomes a free win for + multi-tenant servers without hurting m=1. + +## Things investigated and ruled out (don't redo) + +- `set_socket_ttl` / `set_socket_tos` already short-circuit on + no-change via `s->current_ttl != ttl` / `s->current_tos != tos`. + In a steady-state flood the per-packet call returns immediately + without `setsockopt`. Already optimized. +- `set_df_on_ioa_socket` similarly guarded + ([ns_ioalib_engine_impl.c:242](../src/apps/relay/ns_ioalib_engine_impl.c#L242)). +- `turn_report_session_usage` slow path runs once per 4096 packets + (see iter 1 commit); the per-call overhead is now ~3 reads + 1 + bitmask test + 1 conditional return. +- `MSG_CONFIRM` in `sendto` would skip ARP refresh, but + `neigh_resolve_output` + `neigh_hh_output` show ~17 % combined in + perf only because we're sending *that many* packets — per-packet + it's the normal cached neighbor path, not a refresh. +- Increasing `MAX_TRIES` from 16 to 64 in `socket_input_worker` + doesn't change syscall count; it only delays returning to libevent. + Useless without (1) above. + +## How to resume + +1. Verify the droplets are still up (the IPs above). If they were + destroyed, re-create with `c-4` / `nyc1` / `default-nyc1` VPC and + the `pavel` SSH key (id 23704483). +2. Re-upload `/tmp/coturn_clean.tar` from `git archive master` and + rebuild `/root/coturn_baseline/build/bin/turnserver` if the + baseline binary is gone. The A/B harness depends on having both + binaries side-by-side on the turnserver droplet. +3. Run a 6-round alternating A/B as a sanity check that the current + tip-of-branch still beats `master` by ~5 %. If it doesn't, the + environment drifted and the baseline needs re-anchoring. +4. Pick the next item from the backlog. Item (1) — `recvmmsg` into + `socket_input_worker` — is where the next material gain lives. diff --git a/examples/loadtest/allocation_flood.sh b/examples/loadtest/allocation_flood.sh new file mode 100755 index 00000000..e3702d42 --- /dev/null +++ b/examples/loadtest/allocation_flood.sh @@ -0,0 +1,45 @@ +#!/bin/bash + +set -eu + +# Allocation flood does not start turnutils_peer. +# turnutils_uclient now generates a unique synthetic peer ip:port for +# each new allocation cycle, so only turnserver and uclient are needed. + +SCRIPT_DIR="$(CDPATH= cd -- "$(dirname -- "$0")" && pwd)" +REPO_ROOT="$(CDPATH= cd -- "${SCRIPT_DIR}/../.." && pwd)" + +BINDIR="${REPO_ROOT}/build/bin" +if [ ! -x "${BINDIR}/turnserver" ]; then + BINDIR="${REPO_ROOT}/bin" +fi + +cleanup() { + kill "${uclient_pid:-}" "${turnserver_pid:-}" 2>/dev/null || true + wait "${uclient_pid:-}" "${turnserver_pid:-}" 2>/dev/null || true +} +trap cleanup EXIT INT TERM + +"${BINDIR}/turnserver" \ + --use-auth-secret \ + --static-auth-secret=secret \ + --realm=north.gov \ + --allow-loopback-peers \ + --listening-ip=127.0.0.1 \ + --relay-ip=127.0.0.1 \ + > /dev/null 2>&1 & +turnserver_pid=$! + +sleep 2 + +"${BINDIR}/turnutils_uclient" \ + -Y alloc \ + -m 50 \ + -L 127.0.0.1 \ + -u user \ + -W secret \ + "$@" \ + 127.0.0.1 & +uclient_pid=$! + +wait "${uclient_pid}" diff --git a/examples/loadtest/invalid_flood.sh b/examples/loadtest/invalid_flood.sh new file mode 100755 index 00000000..e3bbce9f --- /dev/null +++ b/examples/loadtest/invalid_flood.sh @@ -0,0 +1,41 @@ +#!/bin/bash + +set -eu + +SCRIPT_DIR="$(CDPATH= cd -- "$(dirname -- "$0")" && pwd)" +REPO_ROOT="$(CDPATH= cd -- "${SCRIPT_DIR}/../.." && pwd)" + +BINDIR="${REPO_ROOT}/build/bin" +if [ ! -x "${BINDIR}/turnserver" ]; then + BINDIR="${REPO_ROOT}/bin" +fi + +cleanup() { + kill "${uclient_pid:-}" "${turnserver_pid:-}" 2>/dev/null || true + wait "${uclient_pid:-}" "${turnserver_pid:-}" 2>/dev/null || true +} +trap cleanup EXIT INT TERM + +# "${BINDIR}/turnserver" \ +# --use-auth-secret \ +# --static-auth-secret=secret \ +# --realm=north.gov \ +# --allow-loopback-peers \ +# --listening-ip=127.0.0.1 \ +# --relay-ip=127.0.0.1 \ +# > /dev/null 2>&1 & +# turnserver_pid=$! + +sleep 2 + +"${BINDIR}/turnutils_uclient" \ + -Y invalid \ + -m 50 \ + -l 16 \ + -u user \ + -W secret \ + "$@" \ + 127.0.0.1 & +uclient_pid=$! + +wait "${uclient_pid}" diff --git a/examples/loadtest/packet_flood.sh b/examples/loadtest/packet_flood.sh new file mode 100755 index 00000000..fc8c263a --- /dev/null +++ b/examples/loadtest/packet_flood.sh @@ -0,0 +1,49 @@ +#!/bin/bash + +set -eu + +SCRIPT_DIR="$(CDPATH= cd -- "$(dirname -- "$0")" && pwd)" +REPO_ROOT="$(CDPATH= cd -- "${SCRIPT_DIR}/../.." && pwd)" + +BINDIR="${REPO_ROOT}/build/bin" +if [ ! -x "${BINDIR}/turnserver" ]; then + BINDIR="${REPO_ROOT}/bin" +fi + +cleanup() { + kill "${uclient_pid:-}" "${peer_pid:-}" "${turnserver_pid:-}" 2>/dev/null || true + wait "${uclient_pid:-}" "${peer_pid:-}" "${turnserver_pid:-}" 2>/dev/null || true +} +trap cleanup EXIT INT TERM + +"${BINDIR}/turnserver" \ + --use-auth-secret \ + --static-auth-secret=secret \ + --realm=north.gov \ + --allow-loopback-peers \ + --listening-ip=127.0.0.1 \ + --relay-ip=127.0.0.1 \ + > /dev/null 2>&1 & +turnserver_pid=$! + +"${BINDIR}/turnutils_peer" -L 127.0.0.1 -p 3480 > /dev/null 2>&1 & +peer_pid=$! + +sleep 2 + +"${BINDIR}/turnutils_uclient" \ + -Y packet \ + -m 1 \ + -z 0 \ + -l 120 \ + -e 127.0.0.1 \ + -r 3480 \ + -X \ + -g \ + -u user \ + -W secret \ + "$@" \ + 127.0.0.1 & +uclient_pid=$! + +wait "${uclient_pid}" diff --git a/man/man1/turnutils.1 b/man/man1/turnutils.1 index ec488992..d7fbabd5 100644 --- a/man/man1/turnutils.1 +++ b/man/man1/turnutils.1 @@ -191,6 +191,14 @@ Dual allocation (SSODA). Implies \fB\-c\fP option. .B \fB\-J\fP Use oAuth with default test key kid='north'. +.TP +.B +\fB\-Y\fP +Load\-generator mode: +\fIpacket\fP floods data through a single TURN allocation as fast as possible, +\fIalloc\fP creates allocations as fast as possible, +and \fIinvalid\fP sends small invalid packets to the TURN listener as fast as possible. +Load\-generator modes imply \fB\-c\fP and do not support \fB\-y\fP. .PP Options with required values: .TP @@ -218,6 +226,8 @@ if the server certificate to be verified. .B \fB\-n\fP Number of messages to send (Default: 5). +In load\-generator mode, \fB\-n\fP is the number of operations per client. +If omitted there, the client runs until interrupted. .TP .B \fB\-d\fP @@ -242,6 +252,7 @@ Peer port (Default: 3480). .B \fB\-z\fP Per\-session packet interval in milliseconds (Default: 20). +In \fIpacket\fP and \fIinvalid\fP load\-generator modes the default is 0 ms. .TP .B \fB\-u\fP @@ -275,6 +286,14 @@ the ORIGIN STUN attribute value. \fB\-a\fP Bandwidth for the bandwidth request in ALLOCATE. The default value is zero. .PP +Notes for load\-generator mode: +.IP +\fIpacket\fP mode still performs the normal TURN allocation/setup and then starts sending immediately with no pacing. +.IP +\fIalloc\fP mode does not require \fB\-e\fP; it repeatedly establishes new allocations, uses a unique client local port for each one, attaches each one to a unique synthetic peer ip:port, and closes them again. +.IP +\fIinvalid\fP mode does not require \fB\-e\fP; by default it uses 16\-byte payloads unless \fB\-l\fP is specified. +.PP See the examples in the "examples/scripts" directory. .SH ====================================== diff --git a/src/apps/uclient/mainuclient.c b/src/apps/uclient/mainuclient.c index 89e38573..4fb5bda8 100644 --- a/src/apps/uclient/mainuclient.c +++ b/src/apps/uclient/mainuclient.c @@ -97,6 +97,8 @@ char origin[STUN_MAX_ORIGIN_SIZE + 1] = "\0"; band_limit_t bps = 0; bool dual_allocation = false; +bool unique_client_ports = false; +uclient_load_mode load_mode = UCLIENT_LOAD_MODE_NONE; int oauth = 0; oauth_key okey_array[3]; @@ -108,6 +110,22 @@ static oauth_key_data_raw okdr_array[3] = { //////////////// local definitions ///////////////// +static uclient_load_mode parse_load_mode(const char *mode) { + if (!mode) { + return UCLIENT_LOAD_MODE_NONE; + } + if (!strcmp(mode, "packet")) { + return UCLIENT_LOAD_MODE_PACKET_FLOOD; + } + if (!strcmp(mode, "alloc")) { + return UCLIENT_LOAD_MODE_ALLOC_FLOOD; + } + if (!strcmp(mode, "invalid")) { + return UCLIENT_LOAD_MODE_INVALID_FLOOD; + } + return UCLIENT_LOAD_MODE_NONE; +} + static char Usage[] = "Usage: uclient [flags] [options] turn-server-ip-address\n" "Flags:\n" @@ -138,6 +156,7 @@ static char Usage[] = " -Z Dual allocation (implies -c).\n" " -J Use oAuth with default test keys kid='north', 'union' or 'oldempire'.\n" "Options:\n" + " -Y Enable load-generator mode.\n" " -l Message length (Default: 100 Bytes).\n" " -i Certificate file (for secure connections only, optional).\n" " -k Private key file (for secure connections only).\n" @@ -172,6 +191,9 @@ int main(int argc, char **argv) { char rest_api_separator = ':'; bool use_null_cipher = false; + bool message_length_set = false; + bool message_count_set = false; + bool packet_interval_set = false; #if defined(WINDOWS) @@ -200,7 +222,7 @@ int main(int argc, char **argv) { memset(local_addr, 0, sizeof(local_addr)); - while ((c = getopt(argc, argv, "a:d:p:l:n:L:m:e:r:u:w:i:k:z:W:C:E:F:o:bZvsyhcxXgtTSAPDNOUMRIGBJ")) != -1) { + while ((c = getopt(argc, argv, "a:d:p:l:n:L:m:e:r:u:w:i:k:z:W:C:E:F:o:Y:bZvsyhcxXgtTSAPDNOUMRIGBJ")) != -1) { switch (c) { case 'J': { @@ -232,6 +254,13 @@ int main(int argc, char **argv) { case 'a': bps = (band_limit_t)strtoul(optarg, NULL, 10); break; + case 'Y': + load_mode = parse_load_mode(optarg); + if (load_mode == UCLIENT_LOAD_MODE_NONE) { + fprintf(stderr, "Unknown load mode: %s\n", optarg); + exit(1); + } + break; case 'o': STRCPY(origin, optarg); break; @@ -274,6 +303,7 @@ int main(int argc, char **argv) { negative_protocol_test = true; break; case 'z': + packet_interval_set = true; RTP_PACKET_INTERVAL = atoi(optarg); break; case 'Z': @@ -298,12 +328,14 @@ int main(int argc, char **argv) { default_address_family = STUN_ATTRIBUTE_REQUESTED_ADDRESS_FAMILY_VALUE_IPV4; break; case 'l': + message_length_set = true; clmessage_length = atoi(optarg); break; case 's': do_not_use_channel = true; break; case 'n': + message_count_set = true; messagenumber = atoi(optarg); break; case 'p': @@ -388,6 +420,31 @@ int main(int argc, char **argv) { no_rtcp = true; } + if (is_load_generator_mode()) { + no_rtcp = true; + + if (!message_count_set) { + messagenumber = 0; + } + + if ((is_packet_flood_mode() || is_invalid_flood_mode()) && !packet_interval_set) { + RTP_PACKET_INTERVAL = 0; + } + + if (is_invalid_flood_mode() && !message_length_set) { + clmessage_length = 16; + } + + if (is_alloc_flood_mode()) { + unique_client_ports = true; + } + + if (c2c) { + fprintf(stderr, "Load-generator mode does not support -y client-to-client mode\n"); + exit(1); + } + } + if (g_use_auth_secret_with_timestamp) { { @@ -453,14 +510,19 @@ int main(int argc, char **argv) { } } - if (clmessage_length < (int)sizeof(message_info)) { + if (!is_invalid_flood_mode() && clmessage_length < (int)sizeof(message_info)) { clmessage_length = (int)sizeof(message_info); } + if (is_invalid_flood_mode() && clmessage_length < 1) { + clmessage_length = 1; + } + const int max_header = 100; - if (clmessage_length > (int)(STUN_BUFFER_SIZE - max_header)) { - fprintf(stderr, "Message length was corrected to %d\n", (STUN_BUFFER_SIZE - max_header)); - clmessage_length = (int)(STUN_BUFFER_SIZE - max_header); + const int max_message_length = is_invalid_flood_mode() ? (int)STUN_BUFFER_SIZE : (int)(STUN_BUFFER_SIZE - max_header); + if (clmessage_length > max_message_length) { + fprintf(stderr, "Message length was corrected to %d\n", max_message_length); + clmessage_length = max_message_length; } if (optind >= argc) { @@ -468,7 +530,7 @@ int main(int argc, char **argv) { exit(-1); } - if (!c2c) { + if (!c2c && !is_alloc_flood_mode() && !is_invalid_flood_mode()) { if (!peer_address[0]) { fprintf(stderr, "Either -e peer_address or -y must be specified\n"); return -1; diff --git a/src/apps/uclient/startuclient.c b/src/apps/uclient/startuclient.c index ec4a6fd2..945417e7 100644 --- a/src/apps/uclient/startuclient.c +++ b/src/apps/uclient/startuclient.c @@ -62,9 +62,19 @@ static const int never_allocate_rtcp = 0; static const unsigned char kALPNProtos[] = "\x08http/1.1\x09stun.turn\x12stun.nat-discovery"; static const size_t kALPNProtosLen = sizeof(kALPNProtos) - 1; +static uint16_t next_unique_local_port = 49152; ///////////////////////////////////////// +static uint16_t allocate_unique_local_port(void) { + const uint16_t port = next_unique_local_port; + ++next_unique_local_port; + if (next_unique_local_port < 49152) { + next_unique_local_port = 49152; + } + return port; +} + int rare_event(void) { if (dos) { return (((unsigned long)turn_random_number()) % 1000 == 777); @@ -160,7 +170,7 @@ static SSL *tls_connect(ioa_socket_raw fd, ioa_addr *remote_addr, bool *try_agai switch (SSL_get_error(ssl, rc)) { case SSL_ERROR_WANT_READ: case SSL_ERROR_WANT_WRITE: - if (!dos) { + if (!dos && !is_load_generator_mode()) { usleep(1000); } continue; @@ -216,6 +226,7 @@ static int clnet_connect(uint16_t clnet_remote_port, const char *remote_address, ioa_addr local_addr; int connect_cycle = 0; + int bind_cycle = 0; ioa_addr remote_addr; @@ -261,16 +272,36 @@ start_socket: } } - addr_bind(clnet_fd, &local_addr, 0, 1, get_socket_type()); - - } else if (strlen(local_address) > 0) { - - if (make_ioa_addr((const uint8_t *)local_address, 0, &local_addr) < 0) { + if (addr_bind(clnet_fd, &local_addr, 0, 1, get_socket_type()) < 0) { socket_closesocket(clnet_fd); return -1; } - addr_bind(clnet_fd, &local_addr, 0, 1, get_socket_type()); + } else if (strlen(local_address) > 0 || unique_client_ports) { + + const char *bind_address = local_address; + if (!bind_address[0]) { + bind_address = (remote_addr.ss.sa_family == AF_INET6) ? "::" : "0.0.0.0"; + } + + if (make_ioa_addr((const uint8_t *)bind_address, 0, &local_addr) < 0) { + socket_closesocket(clnet_fd); + return -1; + } + + if (unique_client_ports) { + addr_set_port(&local_addr, allocate_unique_local_port()); + } + + const int bind_debug = unique_client_ports ? 0 : 1; + if (addr_bind(clnet_fd, &local_addr, 0, bind_debug, get_socket_type()) < 0) { + const int bind_err = socket_errno(); + socket_closesocket(clnet_fd); + if (unique_client_ports && bind_err == EADDRINUSE && bind_cycle++ < MAX_CONNECT_EFFORTS) { + goto start_socket; + } + return -1; + } } int connect_err = 0; @@ -307,7 +338,7 @@ start_socket: addr_debug_print(verbose, &remote_addr, "Connected to"); } - if (!dos) { + if (!dos && !is_load_generator_mode()) { usleep(500); } @@ -943,6 +974,62 @@ beg_cp: return 0; } +int turn_refresh_allocation(bool verbose, app_ur_conn_info *clnet_info, uint32_t lifetime) { + + stun_buffer request_message, response_message; + +beg_refresh: + + stun_init_request(STUN_METHOD_REFRESH, &request_message); + uint32_t lt = htonl(lifetime); + stun_attr_add(&request_message, STUN_ATTRIBUTE_LIFETIME, (const char *)<, 4); + + add_origin(&request_message); + + if (add_integrity(clnet_info, &request_message) < 0) { + return -1; + } + + stun_attr_add_fingerprint_str(request_message.buf, &(request_message.len)); + + if (send_buffer(clnet_info, &request_message, 0, 0) <= 0) { + return -1; + } + + while (true) { + const int len = recv_buffer(clnet_info, &response_message, 1, 0, NULL, &request_message); + if (len <= 0) { + return -1; + } + + response_message.len = len; + + int err_code = 0; + uint8_t err_msg[129]; + + if (stun_is_success_response(&response_message)) { + if (clnet_info->nonce[0]) { + if (check_integrity(clnet_info, &response_message) < 0) { + return -1; + } + } + if (verbose) { + TURN_LOG_FUNC(TURN_LOG_LEVEL_INFO, "refresh success: lifetime=%u\n", lifetime); + } + return 0; + } else if (stun_is_challenge_response_str(response_message.buf, response_message.len, &err_code, err_msg, + sizeof(err_msg), clnet_info->realm, clnet_info->nonce, + clnet_info->server_name, &(clnet_info->oauth))) { + goto beg_refresh; + } else if (stun_is_error_response(&response_message, &err_code, err_msg, sizeof(err_msg))) { + if (verbose) { + TURN_LOG_FUNC(TURN_LOG_LEVEL_INFO, "refresh error %d (%s)\n", err_code, (char *)err_msg); + } + return -1; + } + } +} + int start_connection(uint16_t clnet_remote_port0, const char *remote_address0, const unsigned char *ifname, const char *local_address, bool verbose, app_ur_conn_info *clnet_info_probe, app_ur_conn_info *clnet_info, uint16_t *chn, app_ur_conn_info *clnet_info_rtcp, @@ -1170,6 +1257,38 @@ int start_connection(uint16_t clnet_remote_port0, const char *remote_address0, c return 0; } +int start_allocate_only_connection(uint16_t clnet_remote_port0, const char *remote_address0, + const unsigned char *ifname, const char *local_address, bool verbose, + app_ur_conn_info *clnet_info_probe, app_ur_conn_info *clnet_info, + ioa_addr *peer_addr) { + + UNUSED_ARG(clnet_info_probe); + + ioa_addr relay_addr; + + if (clnet_connect(clnet_remote_port0, remote_address0, ifname, local_address, verbose, clnet_info) < 0) { + exit(-1); + } + + if (clnet_allocate(verbose, clnet_info, &relay_addr, default_address_family, NULL, NULL) < 0) { + return -1; + } + + if (peer_addr) { + addr_cpy(&(clnet_info->peer_addr), peer_addr); + if (turn_create_permission(verbose, clnet_info, peer_addr, 1) < 0) { + return -1; + } + } + + return 0; +} + +int start_raw_connection(uint16_t clnet_remote_port, const char *remote_address, const unsigned char *ifname, + const char *local_address, bool verbose, app_ur_conn_info *clnet_info) { + return clnet_connect(clnet_remote_port, remote_address, ifname, local_address, verbose, clnet_info); +} + int start_c2c_connection(uint16_t clnet_remote_port0, const char *remote_address0, const unsigned char *ifname, const char *local_address, bool verbose, app_ur_conn_info *clnet_info_probe, app_ur_conn_info *clnet_info1, uint16_t *chn1, app_ur_conn_info *clnet_info1_rtcp, diff --git a/src/apps/uclient/startuclient.h b/src/apps/uclient/startuclient.h index d351ab84..f17289b7 100644 --- a/src/apps/uclient/startuclient.h +++ b/src/apps/uclient/startuclient.h @@ -61,6 +61,15 @@ int start_connection(uint16_t clnet_remote_port, const char *remote_address, con app_ur_conn_info *clnet_info, uint16_t *chn, app_ur_conn_info *clnet_info_rtcp, uint16_t *chn_rtcp); +int start_allocate_only_connection(uint16_t clnet_remote_port, const char *remote_address, const unsigned char *ifname, + const char *local_address, bool verbose, app_ur_conn_info *clnet_info_probe, + app_ur_conn_info *clnet_info, ioa_addr *peer_addr); + +int start_raw_connection(uint16_t clnet_remote_port, const char *remote_address, const unsigned char *ifname, + const char *local_address, bool verbose, app_ur_conn_info *clnet_info); + +int turn_refresh_allocation(bool verbose, app_ur_conn_info *clnet_info, uint32_t lifetime); + int turn_tcp_connect(bool verbose, app_ur_conn_info *clnet_info, ioa_addr *peer_addr); void tcp_data_connect(app_ur_session *elem, uint32_t cid); diff --git a/src/apps/uclient/uclient.c b/src/apps/uclient/uclient.c index 03328360..3dd247d8 100644 --- a/src/apps/uclient/uclient.c +++ b/src/apps/uclient/uclient.c @@ -62,6 +62,11 @@ static uint64_t tot_send_bytes = 0; static uint32_t tot_recv_messages = 0; static uint64_t tot_recv_bytes = 0; static uint64_t tot_send_dropped = 0; +static uint64_t tot_allocations = 0; +static uint64_t load_sent_packets = 0; +static uint64_t load_last_sent_packets = 0; +static uint64_t load_last_report_time = 0; +static uint64_t synthetic_peer_counter = 0; struct event_base *client_event_base = NULL; @@ -97,6 +102,74 @@ static uint64_t max_jitter = 0; static bool show_statistics = false; +static bool uses_turn_allocation(void) { return !is_invalid_flood_mode(); } + +static bool uses_unlimited_message_count(const app_ur_session *elem) { + return elem && is_load_generator_mode() && (elem->tot_msgnum <= 0); +} + +static int get_send_burst_limit(void) { return is_packet_flood_mode() || is_invalid_flood_mode() ? 4096 : 50; } + +static size_t get_invalid_packet_length(void) { + if (clmessage_length < 1) { + return 1; + } + if (clmessage_length > (int)STUN_BUFFER_SIZE) { + return STUN_BUFFER_SIZE; + } + return (size_t)clmessage_length; +} + +static void reset_load_generator_rate_stats(void) { + load_sent_packets = 0; + load_last_sent_packets = 0; + load_last_report_time = current_time; +} + +static void print_load_generator_rate(const char *context) { + if (!is_load_generator_mode()) { + return; + } + + if (current_time <= load_last_report_time) { + return; + } + + const uint64_t elapsed = current_time - load_last_report_time; + const uint64_t delta_packets = load_sent_packets - load_last_sent_packets; + const double pps = (double)delta_packets / (double)elapsed; + + TURN_LOG_FUNC(TURN_LOG_LEVEL_INFO, "%s: send_pps=%.2f, total_packets=%llu\n", context, pps, + (unsigned long long)load_sent_packets); + + load_last_report_time = current_time; + load_last_sent_packets = load_sent_packets; +} + +static void generate_unique_allocation_peer(ioa_addr *peer_addr) { + if (!peer_addr) { + return; + } + + const uint64_t peer_index = synthetic_peer_counter++; + const uint16_t port = (uint16_t)(1024 + (peer_index % (uint64_t)(0x10000 - 1024))); + char peer_saddr[129]; + + if (default_address_family == STUN_ATTRIBUTE_REQUESTED_ADDRESS_FAMILY_VALUE_IPV6) { + const uint64_t host_index = peer_index / (uint64_t)(0x10000 - 1024); + snprintf(peer_saddr, sizeof(peer_saddr), "2001:db8:%x:%x::1", (unsigned int)((host_index >> 16) & 0xffffU), + (unsigned int)(host_index & 0xffffU)); + } else { + const uint64_t host_index = 1 + (peer_index / (uint64_t)(0x10000 - 1024)); + snprintf(peer_saddr, sizeof(peer_saddr), "198.%u.%u.%u", 18 + (unsigned int)((host_index >> 16) & 0x1U), + (unsigned int)((host_index >> 8) & 0xffU), (unsigned int)(host_index & 0xffU)); + } + + if (make_ioa_addr((const uint8_t *)peer_saddr, port, peer_addr) < 0) { + addr_set_any(peer_addr); + } +} + /////////////////////////////////////////////////////////////////////////////// static void __turn_getMSTime(void) { @@ -307,6 +380,10 @@ int send_buffer(app_ur_conn_info *clnet_info, stun_buffer *message, bool data_co ret = (int)message->len; } + if ((ret > 0) && is_load_generator_mode()) { + ++load_sent_packets; + } + return ret; } @@ -878,12 +955,27 @@ static int client_write(app_ur_session *elem) { elem->ctime = current_time; - message_info *mi = (message_info *)buffer_to_send; - mi->msgnum = elem->wmsgnum; - mi->mstime = current_mstime; app_tcp_conn_info *atc = NULL; + size_t payload_len = (size_t)clmessage_length; - if (is_TCP_relay()) { + if (is_invalid_flood_mode()) { + payload_len = get_invalid_packet_length(); + memset(elem->out_buffer.buf, 0xA5, payload_len); + if (payload_len >= 8) { + elem->out_buffer.buf[0] = 0x00; + elem->out_buffer.buf[1] = 0x01; + elem->out_buffer.buf[2] = 0x7f; + elem->out_buffer.buf[3] = 0x7f; + memcpy(elem->out_buffer.buf + 4, &(elem->wmsgnum), sizeof(elem->wmsgnum)); + } + elem->out_buffer.len = payload_len; + } else { + message_info *mi = (message_info *)buffer_to_send; + mi->msgnum = elem->wmsgnum; + mi->mstime = current_mstime; + } + + if (!is_invalid_flood_mode() && is_TCP_relay()) { memcpy(elem->out_buffer.buf, buffer_to_send, clmessage_length); elem->out_buffer.len = clmessage_length; @@ -893,7 +985,7 @@ static int client_write(app_ur_session *elem) { ++elem->wmsgnum; elem->to_send_timems += RTP_PACKET_INTERVAL; tot_send_messages++; - tot_send_bytes += clmessage_length; + tot_send_bytes += payload_len; } return 0; } @@ -907,11 +999,11 @@ static int client_write(app_ur_session *elem) { printf("%s: Uninitialized atc: i=%d, atc=%p\n", __FUNCTION__, i, atc); return -1; } - } else if (!do_not_use_channel) { + } else if (!is_invalid_flood_mode() && !do_not_use_channel) { /* Let's always do padding: */ stun_init_channel_message(elem->chnum, &(elem->out_buffer), clmessage_length, mandatory_channel_padding || use_tcp); memcpy(elem->out_buffer.buf + 4, buffer_to_send, clmessage_length); - } else { + } else if (!is_invalid_flood_mode()) { stun_init_indication(STUN_METHOD_SEND, &(elem->out_buffer)); stun_attr_add(&(elem->out_buffer), STUN_ATTRIBUTE_DATA, buffer_to_send, clmessage_length); stun_attr_add_addr(&(elem->out_buffer), STUN_ATTRIBUTE_XOR_PEER_ADDRESS, &(elem->pinfo.peer_addr)); @@ -940,7 +1032,7 @@ static int client_write(app_ur_session *elem) { TURN_LOG_FUNC(TURN_LOG_LEVEL_INFO, "wrote %d bytes\n", (int)rc); } tot_send_messages++; - tot_send_bytes += clmessage_length; + tot_send_bytes += payload_len; } else { return -1; } @@ -990,6 +1082,33 @@ void client_input_handler(evutil_socket_t fd, short what, void *arg) { } } +static void client_discard_input_handler(evutil_socket_t fd, short what, void *arg) { + if (!(what & EV_READ) || !arg) { + return; + } + + UNUSED_ARG(fd); + + app_ur_session *elem = (app_ur_session *)arg; + if (!elem || (elem->state != UR_STATE_READY)) { + return; + } + + uint8_t buffer[STUN_BUFFER_SIZE]; + + if (elem->pinfo.ssl) { + int rc = 0; + do { + rc = SSL_read(elem->pinfo.ssl, buffer, (int)sizeof(buffer)); + } while ((rc > 0) || (rc < 0 && socket_eintr())); + } else if (elem->pinfo.fd >= 0) { + ssize_t rc = 0; + do { + rc = recv(elem->pinfo.fd, buffer, sizeof(buffer), 0); + } while ((rc > 0) || (rc < 0 && socket_eintr())); + } +} + static void run_events(int short_burst) { struct timeval timeout; @@ -1008,6 +1127,33 @@ static void run_events(int short_burst) { ////////////////////// main method ///////////////// +static int start_invalid_client(const char *remote_address, uint16_t port, const unsigned char *ifname, + const char *local_address, int messagenumber, int i) { + + app_ur_session *ss = create_new_ss(); + app_ur_conn_info *clnet_info = &(ss->pinfo); + + if (start_raw_connection(port, remote_address, ifname, local_address, clnet_verbose, clnet_info) < 0) { + exit(-1); + } + + socket_set_nonblocking(clnet_info->fd); + + struct event *ev = + event_new(client_event_base, clnet_info->fd, EV_READ | EV_PERSIST, client_discard_input_handler, ss); + event_add(ev, NULL); + + ss->state = UR_STATE_READY; + ss->input_ev = ev; + ss->tot_msgnum = messagenumber; + ss->recvmsgnum = -1; + ss->chnum = 0; + + elems[i] = ss; + + return 0; +} + static int start_client(const char *remote_address, uint16_t port, const unsigned char *ifname, const char *local_address, int messagenumber, int i) { @@ -1092,6 +1238,72 @@ static int start_client(const char *remote_address, uint16_t port, const unsigne return 0; } +static void start_allocation_flood(const char *remote_address, uint16_t port, const unsigned char *ifname, + const char *local_address, int allocation_count, int mclient) { + + const bool unlimited = allocation_count <= 0; + const uint64_t per_client_target = unlimited ? 0 : (uint64_t)allocation_count; + const uint64_t total_target = unlimited ? 0 : (per_client_target * (uint64_t)mclient); + + __turn_getMSTime(); + const uint64_t start_time = current_time; + tot_allocations = 0; + synthetic_peer_counter = 0; + reset_load_generator_rate_stats(); + + while (unlimited || (tot_allocations < total_target)) { + for (int i = 0; i < mclient; ++i) { + app_ur_conn_info clnet_info_probe; + app_ur_conn_info clnet_info; + ioa_addr synthetic_peer_addr; + memset(&clnet_info_probe, 0, sizeof(clnet_info_probe)); + memset(&clnet_info, 0, sizeof(clnet_info)); + memset(&synthetic_peer_addr, 0, sizeof(synthetic_peer_addr)); + clnet_info_probe.fd = -1; + clnet_info.fd = -1; + + generate_unique_allocation_peer(&synthetic_peer_addr); + + if (start_allocate_only_connection(port, remote_address, ifname, local_address, clnet_verbose, &clnet_info_probe, + &clnet_info, &synthetic_peer_addr) < 0) { + exit(-1); + } + + turn_refresh_allocation(clnet_verbose, &clnet_info, 0); + + app_ur_session ss_probe; + app_ur_session ss_alloc; + memset(&ss_probe, 0, sizeof(ss_probe)); + memset(&ss_alloc, 0, sizeof(ss_alloc)); + ss_probe.pinfo = clnet_info_probe; + ss_alloc.pinfo = clnet_info; + if (ss_probe.pinfo.fd >= 0 || ss_probe.pinfo.ssl) { + uc_delete_session_elem_data(&ss_probe); + } + uc_delete_session_elem_data(&ss_alloc); + + ++tot_allocations; + + __turn_getMSTime(); + if (show_statistics) { + print_load_generator_rate(__FUNCTION__); + TURN_LOG_FUNC(TURN_LOG_LEVEL_INFO, "%s: total_allocations=%llu\n", __FUNCTION__, + (unsigned long long)tot_allocations); + show_statistics = false; + } + + if (!unlimited && (tot_allocations >= total_target)) { + break; + } + } + } + + __turn_getMSTime(); + print_load_generator_rate(__FUNCTION__); + TURN_LOG_FUNC(TURN_LOG_LEVEL_INFO, "%s: total_allocations=%llu\n", __FUNCTION__, (unsigned long long)tot_allocations); + TURN_LOG_FUNC(TURN_LOG_LEVEL_INFO, "Total allocation flood time is %u\n", (unsigned int)(current_time - start_time)); +} + static int start_c2c(const char *remote_address, uint16_t port, const unsigned char *ifname, const char *local_address, int messagenumber, int i) { @@ -1303,7 +1515,7 @@ static int refresh_channel(app_ur_session *elem, uint16_t method, uint32_t lt) { static inline int client_timer_handler(app_ur_session *elem, int *done) { if (elem) { - if (!turn_time_before(current_mstime, elem->refresh_time)) { + if (uses_turn_allocation() && !turn_time_before(current_mstime, elem->refresh_time)) { refresh_channel(elem, 0, 600); } @@ -1311,15 +1523,17 @@ static inline int client_timer_handler(app_ur_session *elem, int *done) { return 0; } - int max_num = 50; + const bool unlimited = uses_unlimited_message_count(elem); + int max_num = get_send_burst_limit(); int cur_num = 0; while (!turn_time_before(current_mstime, elem->to_send_timems)) { if (cur_num++ >= max_num) { break; } - if (elem->wmsgnum >= elem->tot_msgnum) { - if (!turn_time_before(current_mstime, elem->finished_time) || (tot_recv_messages >= tot_messages)) { + if (!unlimited && (elem->wmsgnum >= elem->tot_msgnum)) { + if (!turn_time_before(current_mstime, elem->finished_time) || + (!is_invalid_flood_mode() && (tot_recv_messages >= tot_messages))) { /* TURN_LOG_FUNC(TURN_LOG_LEVEL_INFO,"%s: elem=0x%x: 111.111: c=%d, t=%d, r=%d, w=%d\n",__FUNCTION__,(int)elem,elem->wait_cycles,elem->tot_msgnum,elem->rmsgnum,elem->wmsgnum); @@ -1347,7 +1561,10 @@ static inline int client_timer_handler(app_ur_session *elem, int *done) { } } else { *done += 1; - client_write(elem); + if (client_write(elem) < 0) { + client_shutdown(elem); + return 1; + } elem->finished_time = current_mstime + STOPPING_TIME * 1000; } } @@ -1393,6 +1610,11 @@ void start_mclient(const char *remote_address, uint16_t port, const unsigned cha total_clients = mclient; + if (is_alloc_flood_mode()) { + start_allocation_flood(remote_address, port, ifname, local_address, messagenumber, mclient); + return; + } + if (c2c) { // mclient must be a multiple of 4: if (!no_rtcp) { @@ -1416,6 +1638,7 @@ void start_mclient(const char *remote_address, uint16_t port, const unsigned cha __turn_getMSTime(); uint32_t stime = current_time; + reset_load_generator_rate_stats(); memset(buffer_to_send, 7, clmessage_length); @@ -1426,7 +1649,7 @@ void start_mclient(const char *remote_address, uint16_t port, const unsigned cha if (c2c) { if (!no_rtcp) { for (int i = 0; i < (mclient >> 2); i++) { - if (!dos) { + if (!dos && !is_load_generator_mode()) { usleep(SLEEP_INTERVAL); } if (start_c2c(remote_address, port, ifname, local_address, messagenumber, i << 2) < 0) { @@ -1436,7 +1659,7 @@ void start_mclient(const char *remote_address, uint16_t port, const unsigned cha } } else { for (int i = 0; i < (mclient >> 1); i++) { - if (!dos) { + if (!dos && !is_load_generator_mode()) { usleep(SLEEP_INTERVAL); } if (start_c2c(remote_address, port, ifname, local_address, messagenumber, i << 1) < 0) { @@ -1448,7 +1671,7 @@ void start_mclient(const char *remote_address, uint16_t port, const unsigned cha } else { if (!no_rtcp) { for (int i = 0; i < (mclient >> 1); i++) { - if (!dos) { + if (!dos && !is_load_generator_mode()) { usleep(SLEEP_INTERVAL); } if (start_client(remote_address, port, ifname, local_address, messagenumber, i << 1) < 0) { @@ -1458,10 +1681,13 @@ void start_mclient(const char *remote_address, uint16_t port, const unsigned cha } } else { for (int i = 0; i < mclient; i++) { - if (!dos) { + if (!dos && !is_load_generator_mode()) { usleep(SLEEP_INTERVAL); } - if (start_client(remote_address, port, ifname, local_address, messagenumber, i) < 0) { + const int rc = is_invalid_flood_mode() + ? start_invalid_client(remote_address, port, ifname, local_address, messagenumber, i) + : start_client(remote_address, port, ifname, local_address, messagenumber, i); + if (rc < 0) { exit(-1); } tot_clients++; @@ -1481,7 +1707,7 @@ void start_mclient(const char *remote_address, uint16_t port, const unsigned cha struct timeval tv; tv.tv_sec = 0; - tv.tv_usec = 1000; + tv.tv_usec = (is_packet_flood_mode() || is_invalid_flood_mode()) ? 100 : 1000; evtimer_add(ev, &tv); @@ -1550,7 +1776,11 @@ void start_mclient(const char *remote_address, uint16_t port, const unsigned cha stime = current_time; for (int i = 0; i < total_clients; i++) { - elems[i]->to_send_timems = current_mstime + 1000 + ((uint32_t)turn_random_number()) % 5000; + if (is_packet_flood_mode() || is_invalid_flood_mode()) { + elems[i]->to_send_timems = current_mstime; + } else { + elems[i]->to_send_timems = current_mstime + 1000 + ((uint32_t)turn_random_number()) % 5000; + } } tot_messages = elems[0]->tot_msgnum * total_clients; @@ -1567,6 +1797,7 @@ void start_mclient(const char *remote_address, uint16_t port, const unsigned cha } if (show_statistics) { + print_load_generator_rate(__FUNCTION__); TURN_LOG_FUNC(TURN_LOG_LEVEL_INFO, "%s: msz=%d, tot_send_msgs=%lu, tot_recv_msgs=%lu, tot_send_bytes ~ %llu, tot_recv_bytes ~ %llu\n", __FUNCTION__, msz, (unsigned long)tot_send_messages, (unsigned long)tot_recv_messages, @@ -1575,6 +1806,9 @@ void start_mclient(const char *remote_address, uint16_t port, const unsigned cha } } + __turn_getMSTime(); + print_load_generator_rate(__FUNCTION__); + TURN_LOG_FUNC(TURN_LOG_LEVEL_INFO, "%s: tot_send_msgs=%lu, tot_recv_msgs=%lu\n", __FUNCTION__, (unsigned long)tot_send_messages, (unsigned long)tot_recv_messages); @@ -1592,16 +1826,28 @@ void start_mclient(const char *remote_address, uint16_t port, const unsigned cha total_loss = tot_send_messages - tot_recv_messages; TURN_LOG_FUNC(TURN_LOG_LEVEL_INFO, "Total transmit time is %u\n", ((unsigned int)(current_time - stime))); - TURN_LOG_FUNC(TURN_LOG_LEVEL_INFO, "Total lost packets %llu (%f%c), total send dropped %llu (%f%c)\n", - (unsigned long long)total_loss, (((double)total_loss / (double)tot_send_messages) * 100.00), '%', - (unsigned long long)tot_send_dropped, - (((double)tot_send_dropped / (double)(tot_send_messages + tot_send_dropped)) * 100.00), '%'); - TURN_LOG_FUNC(TURN_LOG_LEVEL_INFO, "Average round trip delay %f ms; min = %lu ms, max = %lu ms\n", - ((double)total_latency / (double)((tot_recv_messages < 1) ? 1 : tot_recv_messages)), - (unsigned long)min_latency, (unsigned long)max_latency); - TURN_LOG_FUNC(TURN_LOG_LEVEL_INFO, "Average jitter %f ms; min = %lu ms, max = %lu ms\n", - ((double)total_jitter / (double)tot_recv_messages), (unsigned long)min_jitter, - (unsigned long)max_jitter); + if (is_invalid_flood_mode()) { + TURN_LOG_FUNC(TURN_LOG_LEVEL_INFO, "Total send dropped %llu (%f%c)\n", (unsigned long long)tot_send_dropped, + (((double)tot_send_dropped / + (double)((tot_send_messages + tot_send_dropped) ? (tot_send_messages + tot_send_dropped) : 1)) * + 100.00), + '%'); + } else { + TURN_LOG_FUNC(TURN_LOG_LEVEL_INFO, "Total lost packets %llu (%f%c), total send dropped %llu (%f%c)\n", + (unsigned long long)total_loss, + (((double)total_loss / (double)(tot_send_messages ? tot_send_messages : 1)) * 100.00), '%', + (unsigned long long)tot_send_dropped, + (((double)tot_send_dropped / + (double)((tot_send_messages + tot_send_dropped) ? (tot_send_messages + tot_send_dropped) : 1)) * + 100.00), + '%'); + TURN_LOG_FUNC(TURN_LOG_LEVEL_INFO, "Average round trip delay %f ms; min = %lu ms, max = %lu ms\n", + ((double)total_latency / (double)((tot_recv_messages < 1) ? 1 : tot_recv_messages)), + (unsigned long)min_latency, (unsigned long)max_latency); + TURN_LOG_FUNC(TURN_LOG_LEVEL_INFO, "Average jitter %f ms; min = %lu ms, max = %lu ms\n", + ((double)total_jitter / (double)((tot_recv_messages < 1) ? 1 : tot_recv_messages)), + (unsigned long)min_jitter, (unsigned long)max_jitter); + } free(elems); } diff --git a/src/apps/uclient/uclient.h b/src/apps/uclient/uclient.h index 511f26ac..eb791922 100644 --- a/src/apps/uclient/uclient.h +++ b/src/apps/uclient/uclient.h @@ -46,6 +46,13 @@ extern "C" { ////////////////////////////////////////////// +typedef enum { + UCLIENT_LOAD_MODE_NONE = 0, + UCLIENT_LOAD_MODE_PACKET_FLOOD, + UCLIENT_LOAD_MODE_ALLOC_FLOOD, + UCLIENT_LOAD_MODE_INVALID_FLOOD +} uclient_load_mode; + #define STOPPING_TIME (10) #define STARTING_TCP_RELAY_TIME (30) @@ -86,6 +93,8 @@ extern bool no_permissions; extern bool extra_requests; extern band_limit_t bps; extern bool dual_allocation; +extern bool unique_client_ports; +extern uclient_load_mode load_mode; extern char origin[STUN_MAX_ORIGIN_SIZE + 1]; @@ -96,6 +105,10 @@ extern oauth_key okey_array[3]; #define OAUTH_SESSION_LIFETIME (555) #define is_TCP_relay() (relay_transport == STUN_ATTRIBUTE_TRANSPORT_TCP_VALUE) +#define is_packet_flood_mode() (load_mode == UCLIENT_LOAD_MODE_PACKET_FLOOD) +#define is_alloc_flood_mode() (load_mode == UCLIENT_LOAD_MODE_ALLOC_FLOOD) +#define is_invalid_flood_mode() (load_mode == UCLIENT_LOAD_MODE_INVALID_FLOOD) +#define is_load_generator_mode() (load_mode != UCLIENT_LOAD_MODE_NONE) void start_mclient(const char *remote_address, uint16_t port, const unsigned char *ifname, const char *local_address, int messagenumber, int mclient);