From bbaa7aef7ba25480b842cda274e57d820680bb57 Mon Sep 17 00:00:00 2001 From: Amaury Denoyelle Date: Thu, 9 Jan 2025 17:26:37 +0100 Subject: [PATCH] BUG/MINOR: quic: do not increase congestion window if app limited Previously, congestion window was increased any time each time a new acknowledge was received. However, it did not take into account the window filling level. In a network condition with negligible loss, this will cause the window to be incremented until the maximum value (by default 480k), even though the application does not have enough data to fill it. In most cases, this issue is not noticeable. However, it may lead to excessive memory consumption when a QUIC connection is suddendly interrupted, as in this case haproxy will fill the window with retransmission. It even has caused OOM crash when thousands of clients were interrupted at once on a local network benchmark. Fix this by first checking window level prior to every incrementation via a new helper function quic_cwnd_may_increase(). It was arbitrarily decided that the window must be at least 50% full when the ACK is handled prior to increment it. This value is a good compromise to keep window in check while still allowing fast increment when needed. Note that this patch only concerns cubic and newreno algorithm. BBR has already its notion of application limited which ensures the window is only incremented when necessary. This should be backported up to 2.6. --- include/haproxy/quic_cc.h | 2 ++ src/quic_cc.c | 19 +++++++++++++++++++ src/quic_cc_cubic.c | 26 +++++++++++++++++--------- src/quic_cc_newreno.c | 16 ++++++++++------ src/quic_rx.c | 2 +- 5 files changed, 49 insertions(+), 16 deletions(-) diff --git a/include/haproxy/quic_cc.h b/include/haproxy/quic_cc.h index edc22ee98..b16996d0c 100644 --- a/include/haproxy/quic_cc.h +++ b/include/haproxy/quic_cc.h @@ -117,5 +117,7 @@ static inline size_t quic_cc_path_prep_data(struct quic_cc_path *path) return path->cwnd - path->prep_in_flight; } +int quic_cwnd_may_increase(const struct quic_cc_path *path); + #endif /* USE_QUIC */ #endif /* _PROTO_QUIC_CC_H */ diff --git a/src/quic_cc.c b/src/quic_cc.c index 811a766dd..603914cd4 100644 --- a/src/quic_cc.c +++ b/src/quic_cc.c @@ -65,3 +65,22 @@ uint quic_cc_default_pacing_burst(const struct quic_cc *cc) struct quic_cc_path *path = container_of(cc, struct quic_cc_path, cc); return path->pacing_burst; } + +/* Returns true if congestion window on path ought to be increased. */ +int quic_cwnd_may_increase(const struct quic_cc_path *path) +{ + /* RFC 9002 7.8. Underutilizing the Congestion Window + * + * When bytes in flight is smaller than the congestion window and + * sending is not pacing limited, the congestion window is + * underutilized. This can happen due to insufficient application data + * or flow control limits. When this occurs, the congestion window + * SHOULD NOT be increased in either slow start or congestion avoidance. + */ + + /* Consider that congestion window can be increased if it is at least + * half full or window size is less than 16k. These conditions should + * not be restricted too much to prevent slow window growing. + */ + return 2 * path->in_flight >= path->cwnd || path->cwnd < 16384; +} diff --git a/src/quic_cc_cubic.c b/src/quic_cc_cubic.c index a1768a151..9783d8f85 100644 --- a/src/quic_cc_cubic.c +++ b/src/quic_cc_cubic.c @@ -382,9 +382,11 @@ static inline void quic_cubic_update(struct quic_cc *cc, uint32_t acked) inc = W_est_inc; } - path->cwnd += inc; - path->cwnd = QUIC_MIN(path->max_cwnd, path->cwnd); - path->mcwnd = QUIC_MAX(path->cwnd, path->mcwnd); + if (quic_cwnd_may_increase(path)) { + path->cwnd += inc; + path->cwnd = QUIC_MIN(path->max_cwnd, path->cwnd); + path->mcwnd = QUIC_MAX(path->cwnd, path->mcwnd); + } leave: TRACE_LEAVE(QUIC_EV_CONN_CC, cc->qc); } @@ -453,8 +455,10 @@ static void quic_cc_cubic_ss_cb(struct quic_cc *cc, struct quic_cc_event *ev) if (path->cwnd >= QUIC_CC_INFINITE_SSTHESH - acked) goto out; - path->cwnd += acked; - path->mcwnd = QUIC_MAX(path->cwnd, path->mcwnd); + if (quic_cwnd_may_increase(path)) { + path->cwnd += acked; + path->mcwnd = QUIC_MAX(path->cwnd, path->mcwnd); + } quic_cc_hystart_track_min_rtt(cc, h, path->loss.latest_rtt); if (ev->ack.pn >= h->wnd_end) h->wnd_end = UINT64_MAX; @@ -465,8 +469,10 @@ static void quic_cc_cubic_ss_cb(struct quic_cc *cc, struct quic_cc_event *ev) } } else if (path->cwnd < QUIC_CC_INFINITE_SSTHESH - ev->ack.acked) { - path->cwnd += ev->ack.acked; - path->cwnd = QUIC_MIN(path->max_cwnd, path->cwnd); + if (quic_cwnd_may_increase(path)) { + path->cwnd += ev->ack.acked; + path->cwnd = QUIC_MIN(path->max_cwnd, path->cwnd); + } } /* Exit to congestion avoidance if slow start threshold is reached. */ if (path->cwnd >= c->ssthresh) @@ -544,8 +550,10 @@ static void quic_cc_cubic_cs_cb(struct quic_cc *cc, struct quic_cc_event *ev) if (path->cwnd >= QUIC_CC_INFINITE_SSTHESH - acked) goto out; - path->cwnd += acked; - path->mcwnd = QUIC_MAX(path->cwnd, path->mcwnd); + if (quic_cwnd_may_increase(path)) { + path->cwnd += acked; + path->mcwnd = QUIC_MAX(path->cwnd, path->mcwnd); + } quic_cc_hystart_track_min_rtt(cc, h, path->loss.latest_rtt); if (quic_cc_hystart_may_reenter_ss(h)) { /* Exit to slow start */ diff --git a/src/quic_cc_newreno.c b/src/quic_cc_newreno.c index ec63bc3e8..989a7c105 100644 --- a/src/quic_cc_newreno.c +++ b/src/quic_cc_newreno.c @@ -86,9 +86,11 @@ static void quic_cc_nr_ss_cb(struct quic_cc *cc, struct quic_cc_event *ev) path = container_of(cc, struct quic_cc_path, cc); switch (ev->type) { case QUIC_CC_EVT_ACK: - path->cwnd += ev->ack.acked; - path->cwnd = QUIC_MIN(path->max_cwnd, path->cwnd); - path->mcwnd = QUIC_MAX(path->cwnd, path->mcwnd); + if (quic_cwnd_may_increase(path)) { + path->cwnd += ev->ack.acked; + path->cwnd = QUIC_MIN(path->max_cwnd, path->cwnd); + path->mcwnd = QUIC_MAX(path->cwnd, path->mcwnd); + } /* Exit to congestion avoidance if slow start threshold is reached. */ if (path->cwnd > nr->ssthresh) nr->state = QUIC_CC_ST_CA; @@ -124,9 +126,11 @@ static void quic_cc_nr_ca_cb(struct quic_cc *cc, struct quic_cc_event *ev) */ acked = ev->ack.acked * path->mtu + nr->remain_acked; nr->remain_acked = acked % path->cwnd; - path->cwnd += acked / path->cwnd; - path->cwnd = QUIC_MIN(path->max_cwnd, path->cwnd); - path->mcwnd = QUIC_MAX(path->cwnd, path->mcwnd); + if (quic_cwnd_may_increase(path)) { + path->cwnd += acked / path->cwnd; + path->cwnd = QUIC_MIN(path->max_cwnd, path->cwnd); + path->mcwnd = QUIC_MAX(path->cwnd, path->mcwnd); + } break; } diff --git a/src/quic_rx.c b/src/quic_rx.c index c3510975c..27eebec0d 100644 --- a/src/quic_rx.c +++ b/src/quic_rx.c @@ -439,7 +439,6 @@ static void qc_notify_cc_of_newly_acked_pkts(struct quic_conn *qc, list_for_each_entry_safe(pkt, tmp, newly_acked_pkts, list) { pkt->pktns->tx.in_flight -= pkt->in_flight_len; p->prep_in_flight -= pkt->in_flight_len; - p->in_flight -= pkt->in_flight_len; if (pkt->flags & QUIC_FL_TX_PACKET_ACK_ELICITING) p->ifae_pkts--; /* If this packet contained an ACK frame, proceed to the @@ -455,6 +454,7 @@ static void qc_notify_cc_of_newly_acked_pkts(struct quic_conn *qc, ev.ack.pn = pkt->pn_node.key; /* Note that this event is not emitted for BBR. */ quic_cc_event(&p->cc, &ev); + p->in_flight -= pkt->in_flight_len; if (drs && (pkt->flags & QUIC_FL_TX_PACKET_ACK_ELICITING)) quic_cc_drs_update_rate_sample(drs, pkt, time_ns); LIST_DEL_INIT(&pkt->list);