BUG/MINOR: quic: do not increase congestion window if app limited

Previously, congestion window was increased any time each time a new
acknowledge was received. However, it did not take into account the
window filling level. In a network condition with negligible loss, this
will cause the window to be incremented until the maximum value (by
default 480k), even though the application does not have enough data to
fill it.

In most cases, this issue is not noticeable. However, it may lead to
excessive memory consumption when a QUIC connection is suddendly
interrupted, as in this case haproxy will fill the window with
retransmission. It even has caused OOM crash when thousands of clients
were interrupted at once on a local network benchmark.

Fix this by first checking window level prior to every incrementation
via a new helper function quic_cwnd_may_increase(). It was arbitrarily
decided that the window must be at least 50% full when the ACK is
handled prior to increment it. This value is a good compromise to keep
window in check while still allowing fast increment when needed.

Note that this patch only concerns cubic and newreno algorithm. BBR has
already its notion of application limited which ensures the window is
only incremented when necessary.

This should be backported up to 2.6.
This commit is contained in:
Amaury Denoyelle 2025-01-09 17:26:37 +01:00
parent 7c0820892f
commit bbaa7aef7b
5 changed files with 49 additions and 16 deletions

View File

@ -117,5 +117,7 @@ static inline size_t quic_cc_path_prep_data(struct quic_cc_path *path)
return path->cwnd - path->prep_in_flight;
}
int quic_cwnd_may_increase(const struct quic_cc_path *path);
#endif /* USE_QUIC */
#endif /* _PROTO_QUIC_CC_H */

View File

@ -65,3 +65,22 @@ uint quic_cc_default_pacing_burst(const struct quic_cc *cc)
struct quic_cc_path *path = container_of(cc, struct quic_cc_path, cc);
return path->pacing_burst;
}
/* Returns true if congestion window on path ought to be increased. */
int quic_cwnd_may_increase(const struct quic_cc_path *path)
{
/* RFC 9002 7.8. Underutilizing the Congestion Window
*
* When bytes in flight is smaller than the congestion window and
* sending is not pacing limited, the congestion window is
* underutilized. This can happen due to insufficient application data
* or flow control limits. When this occurs, the congestion window
* SHOULD NOT be increased in either slow start or congestion avoidance.
*/
/* Consider that congestion window can be increased if it is at least
* half full or window size is less than 16k. These conditions should
* not be restricted too much to prevent slow window growing.
*/
return 2 * path->in_flight >= path->cwnd || path->cwnd < 16384;
}

View File

@ -382,9 +382,11 @@ static inline void quic_cubic_update(struct quic_cc *cc, uint32_t acked)
inc = W_est_inc;
}
if (quic_cwnd_may_increase(path)) {
path->cwnd += inc;
path->cwnd = QUIC_MIN(path->max_cwnd, path->cwnd);
path->mcwnd = QUIC_MAX(path->cwnd, path->mcwnd);
}
leave:
TRACE_LEAVE(QUIC_EV_CONN_CC, cc->qc);
}
@ -453,8 +455,10 @@ static void quic_cc_cubic_ss_cb(struct quic_cc *cc, struct quic_cc_event *ev)
if (path->cwnd >= QUIC_CC_INFINITE_SSTHESH - acked)
goto out;
if (quic_cwnd_may_increase(path)) {
path->cwnd += acked;
path->mcwnd = QUIC_MAX(path->cwnd, path->mcwnd);
}
quic_cc_hystart_track_min_rtt(cc, h, path->loss.latest_rtt);
if (ev->ack.pn >= h->wnd_end)
h->wnd_end = UINT64_MAX;
@ -465,9 +469,11 @@ static void quic_cc_cubic_ss_cb(struct quic_cc *cc, struct quic_cc_event *ev)
}
}
else if (path->cwnd < QUIC_CC_INFINITE_SSTHESH - ev->ack.acked) {
if (quic_cwnd_may_increase(path)) {
path->cwnd += ev->ack.acked;
path->cwnd = QUIC_MIN(path->max_cwnd, path->cwnd);
}
}
/* Exit to congestion avoidance if slow start threshold is reached. */
if (path->cwnd >= c->ssthresh)
c->state = QUIC_CC_ST_CA;
@ -544,8 +550,10 @@ static void quic_cc_cubic_cs_cb(struct quic_cc *cc, struct quic_cc_event *ev)
if (path->cwnd >= QUIC_CC_INFINITE_SSTHESH - acked)
goto out;
if (quic_cwnd_may_increase(path)) {
path->cwnd += acked;
path->mcwnd = QUIC_MAX(path->cwnd, path->mcwnd);
}
quic_cc_hystart_track_min_rtt(cc, h, path->loss.latest_rtt);
if (quic_cc_hystart_may_reenter_ss(h)) {
/* Exit to slow start */

View File

@ -86,9 +86,11 @@ static void quic_cc_nr_ss_cb(struct quic_cc *cc, struct quic_cc_event *ev)
path = container_of(cc, struct quic_cc_path, cc);
switch (ev->type) {
case QUIC_CC_EVT_ACK:
if (quic_cwnd_may_increase(path)) {
path->cwnd += ev->ack.acked;
path->cwnd = QUIC_MIN(path->max_cwnd, path->cwnd);
path->mcwnd = QUIC_MAX(path->cwnd, path->mcwnd);
}
/* Exit to congestion avoidance if slow start threshold is reached. */
if (path->cwnd > nr->ssthresh)
nr->state = QUIC_CC_ST_CA;
@ -124,9 +126,11 @@ static void quic_cc_nr_ca_cb(struct quic_cc *cc, struct quic_cc_event *ev)
*/
acked = ev->ack.acked * path->mtu + nr->remain_acked;
nr->remain_acked = acked % path->cwnd;
if (quic_cwnd_may_increase(path)) {
path->cwnd += acked / path->cwnd;
path->cwnd = QUIC_MIN(path->max_cwnd, path->cwnd);
path->mcwnd = QUIC_MAX(path->cwnd, path->mcwnd);
}
break;
}

View File

@ -439,7 +439,6 @@ static void qc_notify_cc_of_newly_acked_pkts(struct quic_conn *qc,
list_for_each_entry_safe(pkt, tmp, newly_acked_pkts, list) {
pkt->pktns->tx.in_flight -= pkt->in_flight_len;
p->prep_in_flight -= pkt->in_flight_len;
p->in_flight -= pkt->in_flight_len;
if (pkt->flags & QUIC_FL_TX_PACKET_ACK_ELICITING)
p->ifae_pkts--;
/* If this packet contained an ACK frame, proceed to the
@ -455,6 +454,7 @@ static void qc_notify_cc_of_newly_acked_pkts(struct quic_conn *qc,
ev.ack.pn = pkt->pn_node.key;
/* Note that this event is not emitted for BBR. */
quic_cc_event(&p->cc, &ev);
p->in_flight -= pkt->in_flight_len;
if (drs && (pkt->flags & QUIC_FL_TX_PACKET_ACK_ELICITING))
quic_cc_drs_update_rate_sample(drs, pkt, time_ns);
LIST_DEL_INIT(&pkt->list);