mirror of
https://git.haproxy.org/git/haproxy.git/
synced 2025-10-30 08:00:59 +01:00
QUIC streamdesc layer is responsible to handle reception of ACK for streams. It removes stream data from the underlying buffers on ACK reception. Streamdesc layer treats ACK in order at the stream level. Out of order ACKs are buffered in a tree until they can be handled on older data acknowledgement reception. Previously, qf_stream instance which comes from the quic_tx_packet was used as tree node to buffer such ranges. Introduce a new type dedicated to represent out of order stream ack data range. This type is named qc_stream_ack. It contains minimal infos only relative to the acknowledged stream data range. This allows to reduce size of frequently used quic_frame with the removal of tree node from qf_stream. Another side effect of this change is that now quic_frame are always released immediately on ACK reception, both in-order and out-of-order. This allows to also release the quic_tx_packet instance which should reduce memory consumption. The drawback of this change is that qc_stream_ack instance must be allocated on out-of-order ACK reception. As such, qc_stream_desc_ack() may fail if an error happens on allocation. For the moment, such error is silenly recovered up to qc_treat_rx_pkts() with the dropping of the received packet containing the ACK frame. In the future, it may be useful to close the connection as this error may only happens on low memory usage.
273 lines
8.1 KiB
C
273 lines
8.1 KiB
C
#include <import/eb64tree.h>
|
|
|
|
#include <haproxy/quic_conn.h>
|
|
#include <haproxy/quic_frame.h>
|
|
#include <haproxy/quic_retransmit.h>
|
|
#include <haproxy/quic_stream-t.h>
|
|
#include <haproxy/quic_trace.h>
|
|
#include <haproxy/quic_tx.h>
|
|
#include <haproxy/trace.h>
|
|
|
|
#define TRACE_SOURCE &trace_quic
|
|
|
|
/* Check if STREAM frame <f> content has already been acknowledged before
|
|
* retransmitting it. If only a subset of <f> content is acknowledged, frame is
|
|
* updated to only cover the unacked data.
|
|
*
|
|
* Returns true if frame content is fully acknowledged, false if partially or
|
|
* not at all.
|
|
*/
|
|
int qc_stream_frm_is_acked(struct quic_conn *qc, struct quic_frame *f)
|
|
{
|
|
const struct qf_stream *frm = &f->stream;
|
|
const struct qc_stream_desc *s = frm->stream;
|
|
const int frm_fin = f->type & QUIC_STREAM_FRAME_TYPE_FIN_BIT;
|
|
|
|
if (!eb64_lookup(&qc->streams_by_id, frm->id)) {
|
|
TRACE_DEVEL("STREAM frame already acked : stream released", QUIC_EV_CONN_PRSAFRM, qc, f);
|
|
return 1;
|
|
}
|
|
|
|
/* Frame cannot advertise FIN for a smaller data range. */
|
|
BUG_ON(frm_fin && frm->offset + frm->len < s->ack_offset);
|
|
|
|
if (frm->offset + frm->len < s->ack_offset ||
|
|
(frm->offset + frm->len == s->ack_offset &&
|
|
(!frm_fin || !(s->flags & QC_SD_FL_WAIT_FOR_FIN)))) {
|
|
TRACE_DEVEL("STREAM frame already acked : fully acked range", QUIC_EV_CONN_PRSAFRM, qc, f);
|
|
return 1;
|
|
}
|
|
|
|
if (frm->offset < s->ack_offset &&
|
|
frm->offset + frm->len > s->ack_offset) {
|
|
/* Data range partially acked, remove it from STREAM frame. */
|
|
const uint64_t diff = s->ack_offset - frm->offset;
|
|
TRACE_DEVEL("updated partially acked frame", QUIC_EV_CONN_PRSAFRM, qc, f);
|
|
qc_stream_frm_mv_fwd(f, diff);
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
/* Duplicate all frames from <pkt_frm_list> list into <out_frm_list> list
|
|
* for <qc> QUIC connection.
|
|
* This is a best effort function which never fails even if no memory could be
|
|
* allocated to duplicate these frames.
|
|
*/
|
|
static void qc_dup_pkt_frms(struct quic_conn *qc,
|
|
struct list *pkt_frm_list, struct list *out_frm_list)
|
|
{
|
|
struct quic_frame *frm, *frmbak;
|
|
struct list tmp = LIST_HEAD_INIT(tmp);
|
|
|
|
TRACE_ENTER(QUIC_EV_CONN_PRSAFRM, qc);
|
|
|
|
list_for_each_entry_safe(frm, frmbak, pkt_frm_list, list) {
|
|
struct quic_frame *dup_frm, *origin;
|
|
|
|
if (frm->flags & QUIC_FL_TX_FRAME_ACKED) {
|
|
TRACE_DEVEL("already acknowledged frame", QUIC_EV_CONN_PRSAFRM, qc, frm);
|
|
continue;
|
|
}
|
|
|
|
switch (frm->type) {
|
|
case QUIC_FT_STREAM_8 ... QUIC_FT_STREAM_F:
|
|
{
|
|
/* Do not resend this frame if in the "already acked range" */
|
|
if (qc_stream_frm_is_acked(qc, frm))
|
|
continue;
|
|
|
|
frm->stream.dup = 1;
|
|
break;
|
|
}
|
|
|
|
default:
|
|
break;
|
|
}
|
|
|
|
/* If <frm> is already a copy of another frame, we must take
|
|
* its original frame as source for the copy.
|
|
*/
|
|
origin = frm->origin ? frm->origin : frm;
|
|
dup_frm = qc_frm_dup(origin);
|
|
if (!dup_frm) {
|
|
TRACE_ERROR("could not duplicate frame", QUIC_EV_CONN_PRSAFRM, qc, frm);
|
|
break;
|
|
}
|
|
|
|
TRACE_DEVEL("built probing frame", QUIC_EV_CONN_PRSAFRM, qc, origin);
|
|
if (origin->pkt) {
|
|
TRACE_DEVEL("duplicated from packet", QUIC_EV_CONN_PRSAFRM,
|
|
qc, dup_frm, &origin->pkt->pn_node.key);
|
|
}
|
|
else {
|
|
/* <origin> is a frame which was sent from a packet detected as lost. */
|
|
TRACE_DEVEL("duplicated from lost packet", QUIC_EV_CONN_PRSAFRM, qc);
|
|
}
|
|
|
|
LIST_APPEND(&tmp, &dup_frm->list);
|
|
}
|
|
|
|
LIST_SPLICE(out_frm_list, &tmp);
|
|
|
|
TRACE_LEAVE(QUIC_EV_CONN_PRSAFRM, qc);
|
|
}
|
|
|
|
/* Boolean function which return 1 if <pkt> TX packet is only made of
|
|
* already acknowledged frame.
|
|
*/
|
|
static inline int qc_pkt_with_only_acked_frms(struct quic_tx_packet *pkt)
|
|
{
|
|
struct quic_frame *frm;
|
|
|
|
list_for_each_entry(frm, &pkt->frms, list)
|
|
if (!(frm->flags & QUIC_FL_TX_FRAME_ACKED))
|
|
return 0;
|
|
|
|
return 1;
|
|
}
|
|
|
|
/* Prepare a fast retransmission from <qel> encryption level */
|
|
void qc_prep_fast_retrans(struct quic_conn *qc,
|
|
struct quic_pktns *pktns,
|
|
struct list *frms1, struct list *frms2)
|
|
{
|
|
struct eb_root *pkts = &pktns->tx.pkts;
|
|
struct list *frms = frms1;
|
|
struct eb64_node *node;
|
|
struct quic_tx_packet *pkt;
|
|
|
|
TRACE_ENTER(QUIC_EV_CONN_SPPKTS, qc);
|
|
|
|
BUG_ON(frms1 == frms2);
|
|
|
|
pkt = NULL;
|
|
node = eb64_first(pkts);
|
|
start:
|
|
while (node) {
|
|
struct quic_tx_packet *p;
|
|
|
|
p = eb64_entry(node, struct quic_tx_packet, pn_node);
|
|
node = eb64_next(node);
|
|
/* Skip the empty and coalesced packets */
|
|
TRACE_PRINTF(TRACE_LEVEL_PROTO, QUIC_EV_CONN_SPPKTS, qc, 0, 0, 0,
|
|
"--> pn=%llu (%d %d %d)", (ull)p->pn_node.key,
|
|
LIST_ISEMPTY(&p->frms), !!(p->flags & QUIC_FL_TX_PACKET_COALESCED),
|
|
qc_pkt_with_only_acked_frms(p));
|
|
if (!LIST_ISEMPTY(&p->frms) && !qc_pkt_with_only_acked_frms(p)) {
|
|
pkt = p;
|
|
break;
|
|
}
|
|
}
|
|
|
|
if (!pkt)
|
|
goto leave;
|
|
|
|
/* When building a packet from another one, the field which may increase the
|
|
* packet size is the packet number. And the maximum increase is 4 bytes.
|
|
*/
|
|
if (!quic_peer_validated_addr(qc) && qc_is_listener(qc) &&
|
|
pkt->len + 4 > quic_may_send_bytes(qc)) {
|
|
qc->flags |= QUIC_FL_CONN_ANTI_AMPLIFICATION_REACHED;
|
|
TRACE_PROTO("anti-amplification limit would be reached", QUIC_EV_CONN_SPPKTS, qc, pkt);
|
|
goto leave;
|
|
}
|
|
|
|
TRACE_PROTO("duplicating packet", QUIC_EV_CONN_SPPKTS, qc, pkt);
|
|
qc_dup_pkt_frms(qc, &pkt->frms, frms);
|
|
if (frms == frms1 && frms2) {
|
|
frms = frms2;
|
|
goto start;
|
|
}
|
|
leave:
|
|
TRACE_LEAVE(QUIC_EV_CONN_SPPKTS, qc);
|
|
}
|
|
|
|
/* Prepare a fast retransmission during a handshake after a client
|
|
* has resent Initial packets. According to the RFC a server may retransmit
|
|
* Initial packets send them coalescing with others (Handshake here).
|
|
* (Listener only function).
|
|
*/
|
|
void qc_prep_hdshk_fast_retrans(struct quic_conn *qc,
|
|
struct list *ifrms, struct list *hfrms)
|
|
{
|
|
struct list itmp = LIST_HEAD_INIT(itmp);
|
|
struct list htmp = LIST_HEAD_INIT(htmp);
|
|
|
|
struct quic_enc_level *iqel = qc->iel;
|
|
struct quic_enc_level *hqel = qc->hel;
|
|
struct quic_enc_level *qel = iqel;
|
|
struct eb_root *pkts;
|
|
struct eb64_node *node;
|
|
struct quic_tx_packet *pkt;
|
|
struct list *tmp = &itmp;
|
|
|
|
TRACE_ENTER(QUIC_EV_CONN_SPPKTS, qc);
|
|
start:
|
|
pkt = NULL;
|
|
pkts = &qel->pktns->tx.pkts;
|
|
node = eb64_first(pkts);
|
|
/* Skip the empty packet (they have already been retransmitted) */
|
|
while (node) {
|
|
struct quic_tx_packet *p;
|
|
|
|
p = eb64_entry(node, struct quic_tx_packet, pn_node);
|
|
TRACE_PRINTF(TRACE_LEVEL_PROTO, QUIC_EV_CONN_SPPKTS, qc, 0, 0, 0,
|
|
"--> pn=%llu (%d %d)", (ull)p->pn_node.key,
|
|
LIST_ISEMPTY(&p->frms), !!(p->flags & QUIC_FL_TX_PACKET_COALESCED));
|
|
if (!LIST_ISEMPTY(&p->frms) && !(p->flags & QUIC_FL_TX_PACKET_COALESCED) &&
|
|
!qc_pkt_with_only_acked_frms(p)) {
|
|
pkt = p;
|
|
break;
|
|
}
|
|
|
|
node = eb64_next(node);
|
|
}
|
|
|
|
if (!pkt)
|
|
goto end;
|
|
|
|
/* When building a packet from another one, the field which may increase the
|
|
* packet size is the packet number. And the maximum increase is 4 bytes.
|
|
*/
|
|
if (!quic_peer_validated_addr(qc) && qc_is_listener(qc)) {
|
|
size_t dglen = pkt->len + 4;
|
|
size_t may_send;
|
|
|
|
may_send = quic_may_send_bytes(qc);
|
|
dglen += pkt->next ? pkt->next->len + 4 : 0;
|
|
if (dglen > may_send) {
|
|
qc->flags |= QUIC_FL_CONN_ANTI_AMPLIFICATION_REACHED;
|
|
TRACE_PROTO("anti-amplification limit would be reached", QUIC_EV_CONN_SPPKTS, qc, pkt);
|
|
if (pkt->next)
|
|
TRACE_PROTO("anti-amplification limit would be reached", QUIC_EV_CONN_SPPKTS, qc, pkt->next);
|
|
if (qel == iqel && may_send >= QUIC_INITIAL_PACKET_MINLEN)
|
|
TRACE_PROTO("will probe Initial packet number space", QUIC_EV_CONN_SPPKTS, qc);
|
|
else
|
|
goto end;
|
|
}
|
|
}
|
|
|
|
qel->pktns->tx.pto_probe += 1;
|
|
|
|
/* No risk to loop here, #packet per datagram is bounded */
|
|
requeue:
|
|
TRACE_PROTO("duplicating packet", QUIC_EV_CONN_PRSAFRM, qc, NULL, &pkt->pn_node.key);
|
|
qc_dup_pkt_frms(qc, &pkt->frms, tmp);
|
|
if (qel == iqel) {
|
|
if (pkt->next && pkt->next->type == QUIC_PACKET_TYPE_HANDSHAKE) {
|
|
pkt = pkt->next;
|
|
tmp = &htmp;
|
|
hqel->pktns->tx.pto_probe += 1;
|
|
TRACE_DEVEL("looping for next packet", QUIC_EV_CONN_SPPKTS, qc);
|
|
goto requeue;
|
|
}
|
|
}
|
|
|
|
end:
|
|
LIST_SPLICE(ifrms, &itmp);
|
|
LIST_SPLICE(hfrms, &htmp);
|
|
|
|
TRACE_LEAVE(QUIC_EV_CONN_SPPKTS, qc);
|
|
}
|