mirror of
https://git.haproxy.org/git/haproxy.git/
synced 2025-08-07 07:37:02 +02:00
As reported by @Tristan971 in GH #2116, the congestion control window could be zero due to an inversion in the code about the reduction factor to be applied. On a new loss event, it must be applied to the slow start threshold and the window should never be below ->min_cwnd (2*max_udp_payload_sz). Same issue in both newReno and cubic algorithm. Furthermore in newReno, only the threshold was decremented. Must be backported to 2.6 and 2.7.
330 lines
9.0 KiB
C
330 lines
9.0 KiB
C
#include <haproxy/quic_cc.h>
|
|
#include <haproxy/ticks.h>
|
|
#include <haproxy/trace.h>
|
|
|
|
/* This source file is highly inspired from Linux kernel source file
|
|
* implementation for TCP Cubic. In fact, we have no choice if we do
|
|
* not want to use any floating point operations to be fast!
|
|
* (See net/ipv4/tcp_cubic.c)
|
|
*/
|
|
#define TRACE_SOURCE &trace_quic
|
|
|
|
#define CUBIC_BETA_SCALE 1024
|
|
#define CUBIC_BETA_SCALE_SHIFT 10
|
|
/* beta = 0.7 ; C = 0.4 */
|
|
#define CUBIC_BETA 717 /* CUBIC_BETA / CUBIC_BETA_SCALE = 0.7 */
|
|
#define CUBIC_C 410 /* CUBIC_C / CUBIC_BETA_SCALE = 0.4 */
|
|
|
|
#define CUBIC_BETA_SCALE_FACTOR_SHIFT (3 * CUBIC_BETA_SCALE_SHIFT)
|
|
#define TIME_SCALE_FACTOR_SHIFT 10
|
|
|
|
/* The maximum value which may be cubed an multiplied by CUBIC_BETA */
|
|
#define CUBIC_DIFF_TIME_LIMIT 355535ULL /* ms */
|
|
|
|
/* K cube factor: (1 - beta) / c */
|
|
struct cubic {
|
|
uint32_t state;
|
|
uint32_t ssthresh;
|
|
uint32_t remaining_inc;
|
|
uint32_t remaining_tcp_inc;
|
|
uint32_t epoch_start;
|
|
uint32_t origin_point;
|
|
uint32_t K;
|
|
uint32_t last_w_max;
|
|
uint32_t tcp_wnd;
|
|
uint32_t recovery_start_time;
|
|
};
|
|
|
|
static void quic_cc_cubic_reset(struct quic_cc *cc)
|
|
{
|
|
struct cubic *c = quic_cc_priv(cc);
|
|
|
|
TRACE_ENTER(QUIC_EV_CONN_CC, cc->qc);
|
|
c->state = QUIC_CC_ST_SS;
|
|
c->ssthresh = QUIC_CC_INFINITE_SSTHESH;
|
|
c->remaining_inc = 0;
|
|
c->remaining_tcp_inc = 0;
|
|
c->epoch_start = 0;
|
|
c->origin_point = 0;
|
|
c->K = 0;
|
|
c->last_w_max = 0;
|
|
c->tcp_wnd = 0;
|
|
c->recovery_start_time = 0;
|
|
TRACE_LEAVE(QUIC_EV_CONN_CC, cc->qc);
|
|
}
|
|
|
|
static int quic_cc_cubic_init(struct quic_cc *cc)
|
|
{
|
|
quic_cc_cubic_reset(cc);
|
|
return 1;
|
|
}
|
|
|
|
/* Cubic root.
|
|
* Highly inspired from Linux kernel sources.
|
|
* See net/ipv4/tcp_cubic.c
|
|
*/
|
|
static uint32_t cubic_root(uint64_t val)
|
|
{
|
|
uint32_t x, b, shift;
|
|
|
|
static const uint8_t v[] = {
|
|
0, 54, 54, 54, 118, 118, 118, 118,
|
|
123, 129, 134, 138, 143, 147, 151, 156,
|
|
157, 161, 164, 168, 170, 173, 176, 179,
|
|
181, 185, 187, 190, 192, 194, 197, 199,
|
|
200, 202, 204, 206, 209, 211, 213, 215,
|
|
217, 219, 221, 222, 224, 225, 227, 229,
|
|
231, 232, 234, 236, 237, 239, 240, 242,
|
|
244, 245, 246, 248, 250, 251, 252, 254,
|
|
};
|
|
|
|
if (!val || (b = my_flsl(val)) < 7) {
|
|
/* val in [0..63] */
|
|
return ((uint32_t)v[(uint32_t)val] + 35) >> 6;
|
|
}
|
|
|
|
b = ((b * 84) >> 8) - 1;
|
|
shift = (val >> (b * 3));
|
|
|
|
x = ((uint32_t)(((uint32_t)v[shift] + 10) << b)) >> 6;
|
|
|
|
x = 2 * x + (uint32_t)(val / ((uint64_t)x * (uint64_t)(x - 1)));
|
|
x = ((x * 341) >> 10);
|
|
|
|
return x;
|
|
}
|
|
|
|
static inline void quic_cubic_update(struct quic_cc *cc, uint32_t acked)
|
|
{
|
|
struct cubic *c = quic_cc_priv(cc);
|
|
struct quic_path *path = container_of(cc, struct quic_path, cc);
|
|
/* Current cwnd as number of packets */
|
|
uint32_t t, target, inc, inc_diff;
|
|
uint64_t delta, diff;
|
|
|
|
TRACE_ENTER(QUIC_EV_CONN_CC, cc->qc);
|
|
if (!c->epoch_start) {
|
|
c->epoch_start = now_ms;
|
|
if (c->last_w_max <= path->cwnd) {
|
|
c->K = 0;
|
|
c->origin_point = path->cwnd;
|
|
}
|
|
else {
|
|
/* K = cubic_root((1 - beta) * W_max / C) */
|
|
c->K = cubic_root((c->last_w_max - path->cwnd) *
|
|
(CUBIC_BETA_SCALE - CUBIC_BETA) / CUBIC_C / path->mtu) << TIME_SCALE_FACTOR_SHIFT;
|
|
c->origin_point = c->last_w_max;
|
|
}
|
|
|
|
c->tcp_wnd = path->cwnd;
|
|
c->remaining_inc = 0;
|
|
c->remaining_tcp_inc = 0;
|
|
}
|
|
|
|
t = now_ms + path->loss.rtt_min - c->epoch_start;
|
|
if (t < c->K) {
|
|
diff = c->K - t;
|
|
}
|
|
else {
|
|
diff = t - c->K;
|
|
}
|
|
|
|
if (diff > CUBIC_DIFF_TIME_LIMIT) {
|
|
/* TODO : should not happen if we handle the case
|
|
* of very late acks receipt. This must be handled as a congestion
|
|
* control event: a very late ack should trigger a congestion
|
|
* control algorithm reset.
|
|
*/
|
|
quic_cc_cubic_reset(cc);
|
|
goto leave;
|
|
}
|
|
|
|
delta = path->mtu * ((CUBIC_C * diff * diff * diff) >> (10 + 3 * TIME_SCALE_FACTOR_SHIFT));
|
|
if (t < c->K)
|
|
target = c->origin_point - delta;
|
|
else
|
|
target = c->origin_point + delta;
|
|
|
|
if (target > path->cwnd) {
|
|
inc_diff = c->remaining_inc + path->mtu * (target - path->cwnd);
|
|
c->remaining_inc = inc_diff % path->cwnd;
|
|
inc = inc_diff / path->cwnd;
|
|
}
|
|
else {
|
|
/* small increment */
|
|
inc_diff = c->remaining_inc + path->mtu;
|
|
c->remaining_inc = inc_diff % (100 * path->cwnd);
|
|
inc = inc_diff / (100 * path->cwnd);
|
|
}
|
|
|
|
inc_diff = c->remaining_tcp_inc + path->mtu * acked;
|
|
c->tcp_wnd += inc_diff / path->cwnd;
|
|
c->remaining_tcp_inc = inc_diff % path->cwnd;
|
|
/* TCP friendliness */
|
|
if (c->tcp_wnd > path->cwnd) {
|
|
uint32_t tcp_inc = path->mtu * (c->tcp_wnd - path->cwnd) / path->cwnd;
|
|
if (tcp_inc > inc)
|
|
inc = tcp_inc;
|
|
}
|
|
|
|
path->cwnd += inc;
|
|
path->mcwnd = QUIC_MAX(path->cwnd, path->mcwnd);
|
|
leave:
|
|
TRACE_LEAVE(QUIC_EV_CONN_CC, cc->qc);
|
|
}
|
|
|
|
static void quic_cc_cubic_slow_start(struct quic_cc *cc)
|
|
{
|
|
TRACE_ENTER(QUIC_EV_CONN_CC, cc->qc);
|
|
quic_cc_cubic_reset(cc);
|
|
TRACE_LEAVE(QUIC_EV_CONN_CC, cc->qc);
|
|
}
|
|
|
|
static void quic_enter_recovery(struct quic_cc *cc)
|
|
{
|
|
struct quic_path *path = container_of(cc, struct quic_path, cc);
|
|
struct cubic *c = quic_cc_priv(cc);
|
|
/* Current cwnd as number of packets */
|
|
|
|
TRACE_ENTER(QUIC_EV_CONN_CC, cc->qc);
|
|
c->epoch_start = 0;
|
|
c->recovery_start_time = now_ms;
|
|
/* Fast convergence */
|
|
if (path->cwnd < c->last_w_max) {
|
|
/* (1 + beta) * path->cwnd / 2 */
|
|
c->last_w_max = (path->cwnd * (CUBIC_BETA_SCALE + CUBIC_BETA) / 2) >> CUBIC_BETA_SCALE_SHIFT;
|
|
}
|
|
else {
|
|
c->last_w_max = path->cwnd;
|
|
}
|
|
c->ssthresh = (CUBIC_BETA * path->cwnd) >> CUBIC_BETA_SCALE_SHIFT;
|
|
path->cwnd = QUIC_MAX(c->ssthresh, (uint32_t)path->min_cwnd);
|
|
c->state = QUIC_CC_ST_RP;
|
|
TRACE_LEAVE(QUIC_EV_CONN_CC, cc->qc, NULL, cc);
|
|
}
|
|
|
|
/* Congestion slow-start callback. */
|
|
static void quic_cc_cubic_ss_cb(struct quic_cc *cc, struct quic_cc_event *ev)
|
|
{
|
|
struct quic_path *path = container_of(cc, struct quic_path, cc);
|
|
struct cubic *c = quic_cc_priv(cc);
|
|
|
|
TRACE_ENTER(QUIC_EV_CONN_CC, cc->qc);
|
|
TRACE_PROTO("CC cubic", QUIC_EV_CONN_CC, cc->qc, ev);
|
|
switch (ev->type) {
|
|
case QUIC_CC_EVT_ACK:
|
|
if (path->cwnd < QUIC_CC_INFINITE_SSTHESH - ev->ack.acked)
|
|
path->cwnd += ev->ack.acked;
|
|
/* Exit to congestion avoidance if slow start threshold is reached. */
|
|
if (path->cwnd >= c->ssthresh)
|
|
c->state = QUIC_CC_ST_CA;
|
|
path->mcwnd = QUIC_MAX(path->cwnd, path->mcwnd);
|
|
break;
|
|
|
|
case QUIC_CC_EVT_LOSS:
|
|
quic_enter_recovery(cc);
|
|
break;
|
|
|
|
case QUIC_CC_EVT_ECN_CE:
|
|
/* TODO */
|
|
break;
|
|
}
|
|
|
|
out:
|
|
TRACE_PROTO("CC cubic", QUIC_EV_CONN_CC, cc->qc, NULL, cc);
|
|
TRACE_LEAVE(QUIC_EV_CONN_CC, cc->qc);
|
|
}
|
|
|
|
/* Congestion avoidance callback. */
|
|
static void quic_cc_cubic_ca_cb(struct quic_cc *cc, struct quic_cc_event *ev)
|
|
{
|
|
TRACE_ENTER(QUIC_EV_CONN_CC, cc->qc);
|
|
TRACE_PROTO("CC cubic", QUIC_EV_CONN_CC, cc->qc, ev);
|
|
switch (ev->type) {
|
|
case QUIC_CC_EVT_ACK:
|
|
quic_cubic_update(cc, ev->ack.acked);
|
|
break;
|
|
case QUIC_CC_EVT_LOSS:
|
|
quic_enter_recovery(cc);
|
|
break;
|
|
case QUIC_CC_EVT_ECN_CE:
|
|
/* TODO */
|
|
break;
|
|
}
|
|
|
|
out:
|
|
TRACE_PROTO("CC cubic", QUIC_EV_CONN_CC, cc->qc, NULL, cc);
|
|
TRACE_LEAVE(QUIC_EV_CONN_CC, cc->qc);
|
|
}
|
|
|
|
/* Recovery period callback */
|
|
static void quic_cc_cubic_rp_cb(struct quic_cc *cc, struct quic_cc_event *ev)
|
|
{
|
|
struct cubic *c = quic_cc_priv(cc);
|
|
|
|
TRACE_ENTER(QUIC_EV_CONN_CC, cc->qc, ev);
|
|
TRACE_PROTO("CC cubic", QUIC_EV_CONN_CC, cc->qc, ev, cc);
|
|
|
|
switch (ev->type) {
|
|
case QUIC_CC_EVT_ACK:
|
|
/* RFC 9022 7.3.2. Recovery
|
|
* A recovery period ends and the sender enters congestion avoidance when a
|
|
* packet sent during the recovery period is acknowledged.
|
|
*/
|
|
if (tick_is_le(ev->ack.time_sent, c->recovery_start_time)) {
|
|
TRACE_PROTO("CC cubic (still in recov. period)", QUIC_EV_CONN_CC, cc->qc);
|
|
goto leave;
|
|
}
|
|
|
|
c->state = QUIC_CC_ST_CA;
|
|
c->recovery_start_time = TICK_ETERNITY;
|
|
break;
|
|
case QUIC_CC_EVT_LOSS:
|
|
break;
|
|
case QUIC_CC_EVT_ECN_CE:
|
|
/* TODO */
|
|
break;
|
|
}
|
|
|
|
leave:
|
|
TRACE_PROTO("CC cubic", QUIC_EV_CONN_CC, cc->qc, NULL, cc);
|
|
TRACE_LEAVE(QUIC_EV_CONN_CC, cc->qc, NULL, cc);
|
|
}
|
|
|
|
static void (*quic_cc_cubic_state_cbs[])(struct quic_cc *cc,
|
|
struct quic_cc_event *ev) = {
|
|
[QUIC_CC_ST_SS] = quic_cc_cubic_ss_cb,
|
|
[QUIC_CC_ST_CA] = quic_cc_cubic_ca_cb,
|
|
[QUIC_CC_ST_RP] = quic_cc_cubic_rp_cb,
|
|
};
|
|
|
|
static void quic_cc_cubic_event(struct quic_cc *cc, struct quic_cc_event *ev)
|
|
{
|
|
struct cubic *c = quic_cc_priv(cc);
|
|
|
|
return quic_cc_cubic_state_cbs[c->state](cc, ev);
|
|
}
|
|
|
|
static void quic_cc_cubic_state_trace(struct buffer *buf, const struct quic_cc *cc)
|
|
{
|
|
struct quic_path *path;
|
|
struct cubic *c = quic_cc_priv(cc);
|
|
|
|
path = container_of(cc, struct quic_path, cc);
|
|
chunk_appendf(buf, " state=%s cwnd=%llu mcwnd=%llu ssthresh=%d rpst=%dms",
|
|
quic_cc_state_str(c->state),
|
|
(unsigned long long)path->cwnd,
|
|
(unsigned long long)path->mcwnd,
|
|
(int)c->ssthresh,
|
|
!tick_isset(c->recovery_start_time) ? -1 :
|
|
TICKS_TO_MS(tick_remain(c->recovery_start_time, now_ms)));
|
|
}
|
|
|
|
struct quic_cc_algo quic_cc_algo_cubic = {
|
|
.type = QUIC_CC_ALGO_TP_CUBIC,
|
|
.init = quic_cc_cubic_init,
|
|
.event = quic_cc_cubic_event,
|
|
.slow_start = quic_cc_cubic_slow_start,
|
|
.state_trace = quic_cc_cubic_state_trace,
|
|
};
|