mirror of
https://git.haproxy.org/git/haproxy.git/
synced 2025-09-22 06:11:32 +02:00
MEDIUM: mux-h2: try to coalesce outgoing WINDOW_UPDATE frames
Glenn Strauss from Lighttpd reported a corner case affecting curl+lighttpd that causes some uploads to degenerate to extremely suboptimal conditions under certain circumstances, and noted that many other implementations were possibly not safe against this degradation. Glenn's detailed analysis is available here: https://github.com/nghttp2/nghttp2/issues/1722 In short, curl uses a 65536 bytes buffer and the default stream window is 65535, with 16384 bytes per frame. Curl will then send 3 frames of 16384 bytes followed by one of 16383, will wait for a window update to send the last byte before recycling the buffer to read the next 64kB. On each round like this, one extra single-byte frame will be sent, and if ACKs for these single-byte frames are not aggregated, this will only allow the client to send one extra byte at a time. At some point it is possible (at least Glenn observed it) to have mostly 1-byte frames in the transfer, resulting in huge CPU usage and a long transfer. It was not possible to reproduce this with haproxy, even when playing with frame sizes, buffer sizes nor window sizes. One reason seems to be that we're using the same buffer size for the connection and the stream and that the frame headers prevent the filling of the window from happening on the same boundaries as on the sender. However it does occasionally happen to see up to two 1-byte data frames in a row, indicating that there's definitely room for improvement. The WINDOW_UPDATE frames for the connection are sent at the end of the demuxing, but the ones for the streams are currently sent immediately after a DATA frame is processed, mostly for convenience. But we don't need to proceed like this, we already have the counter of unacked bytes in rcvd_s, so we can simply use that to decide when to send an ACK. It must just be done before processing a new frame. The benefit is that contiguous frames for the same stream will now only produce a single WU, like for the connection. On complicated tests involving a client that was limited to 100 Mbps transfers and a dummy Lua-based payload consumer, it was possible to see the number of stream WU frames being halved for a 100 MB transfer, which is already a nice saving anyway. Glenn proposed a better workaround consisting in increasing the default window size to 65536. This will be done in a separate patch so that both can be studied independently in field and backported as needed. This patch is not much complicated and shold be backportable. It just needs to be tested in development first.
This commit is contained in:
parent
1cd43aa194
commit
617592c9eb
24
src/mux_h2.c
24
src/mux_h2.c
@ -106,7 +106,7 @@ struct h2c {
|
|||||||
uint32_t streams_limit; /* maximum number of concurrent streams the peer supports */
|
uint32_t streams_limit; /* maximum number of concurrent streams the peer supports */
|
||||||
int32_t max_id; /* highest ID known on this connection, <0 before preface */
|
int32_t max_id; /* highest ID known on this connection, <0 before preface */
|
||||||
uint32_t rcvd_c; /* newly received data to ACK for the connection */
|
uint32_t rcvd_c; /* newly received data to ACK for the connection */
|
||||||
uint32_t rcvd_s; /* newly received data to ACK for the current stream (dsi) */
|
uint32_t rcvd_s; /* newly received data to ACK for the current stream (dsi) or zero */
|
||||||
|
|
||||||
/* states for the demux direction */
|
/* states for the demux direction */
|
||||||
struct hpack_dht *ddht; /* demux dynamic header table */
|
struct hpack_dht *ddht; /* demux dynamic header table */
|
||||||
@ -3379,8 +3379,6 @@ static void h2_process_demux(struct h2c *h2c)
|
|||||||
}
|
}
|
||||||
|
|
||||||
if (h2c->st0 == H2_CS_FRAME_H) {
|
if (h2c->st0 == H2_CS_FRAME_H) {
|
||||||
h2c->rcvd_s = 0;
|
|
||||||
|
|
||||||
TRACE_STATE("expecting H2 frame header", H2_EV_RX_FRAME|H2_EV_RX_FHDR, h2c->conn);
|
TRACE_STATE("expecting H2 frame header", H2_EV_RX_FRAME|H2_EV_RX_FHDR, h2c->conn);
|
||||||
if (!h2_peek_frame_hdr(&h2c->dbuf, 0, &hdr)) {
|
if (!h2_peek_frame_hdr(&h2c->dbuf, 0, &hdr)) {
|
||||||
h2c->flags |= H2_CF_DEM_SHORT_READ;
|
h2c->flags |= H2_CF_DEM_SHORT_READ;
|
||||||
@ -3398,6 +3396,16 @@ static void h2_process_demux(struct h2c *h2c)
|
|||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (h2c->rcvd_s && h2c->dsi != hdr.sid) {
|
||||||
|
/* changed stream with a pending WU, need to
|
||||||
|
* send it now.
|
||||||
|
*/
|
||||||
|
TRACE_PROTO("sending stream WINDOW_UPDATE frame on stream switch", H2_EV_TX_FRAME|H2_EV_TX_WU, h2c->conn);
|
||||||
|
ret = h2c_send_strm_wu(h2c);
|
||||||
|
if (ret <= 0)
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
padlen = 0;
|
padlen = 0;
|
||||||
if (h2_ft_bit(hdr.ft) & H2_FT_PADDED_MASK && hdr.ff & H2_F_PADDED) {
|
if (h2_ft_bit(hdr.ft) & H2_FT_PADDED_MASK && hdr.ff & H2_F_PADDED) {
|
||||||
/* If the frame is padded (HEADERS, PUSH_PROMISE or DATA),
|
/* If the frame is padded (HEADERS, PUSH_PROMISE or DATA),
|
||||||
@ -3568,8 +3576,8 @@ static void h2_process_demux(struct h2c *h2c)
|
|||||||
HA_ATOMIC_INC(&h2c->px_counters->data_rcvd);
|
HA_ATOMIC_INC(&h2c->px_counters->data_rcvd);
|
||||||
|
|
||||||
if (h2c->st0 == H2_CS_FRAME_A) {
|
if (h2c->st0 == H2_CS_FRAME_A) {
|
||||||
TRACE_PROTO("sending stream WINDOW_UPDATE frame", H2_EV_TX_FRAME|H2_EV_TX_WU, h2c->conn, h2s);
|
/* rcvd_s will suffice to trigger the sending of a WU */
|
||||||
ret = h2c_send_strm_wu(h2c);
|
h2c->st0 = H2_CS_FRAME_H;
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
|
|
||||||
@ -3639,6 +3647,12 @@ static void h2_process_demux(struct h2c *h2c)
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (h2c->rcvd_s > 0 &&
|
||||||
|
!(h2c->flags & (H2_CF_MUX_MFULL | H2_CF_DEM_MBUSY | H2_CF_DEM_MROOM))) {
|
||||||
|
TRACE_PROTO("sending stream WINDOW_UPDATE frame", H2_EV_TX_FRAME|H2_EV_TX_WU, h2c->conn, h2s);
|
||||||
|
h2c_send_strm_wu(h2c);
|
||||||
|
}
|
||||||
|
|
||||||
if (h2c->rcvd_c > 0 &&
|
if (h2c->rcvd_c > 0 &&
|
||||||
!(h2c->flags & (H2_CF_MUX_MFULL | H2_CF_DEM_MBUSY | H2_CF_DEM_MROOM))) {
|
!(h2c->flags & (H2_CF_MUX_MFULL | H2_CF_DEM_MBUSY | H2_CF_DEM_MROOM))) {
|
||||||
TRACE_PROTO("sending H2 WINDOW_UPDATE frame", H2_EV_TX_FRAME|H2_EV_TX_WU, h2c->conn);
|
TRACE_PROTO("sending H2 WINDOW_UPDATE frame", H2_EV_TX_FRAME|H2_EV_TX_WU, h2c->conn);
|
||||||
|
Loading…
x
Reference in New Issue
Block a user