diff --git a/src/mux_h2.c b/src/mux_h2.c
index 3c1d7051d..ddc49eb98 100644
--- a/src/mux_h2.c
+++ b/src/mux_h2.c
@@ -2804,16 +2804,81 @@ static int h2c_update_strm_rx_win(struct h2c *h2c)
 {
 	struct h2s *h2s;
 	int win = 0;
+	int rxbsz;
 
 	h2s = h2c_st_by_id(h2c, h2c->dsi);
 	if (h2s && h2s->h2c) {
-		/* real stream */
+		/* This is real stream. Principle: We have a total number of
+		 * allocatable rxbufs per connection (bl_size). We always grant
+		 * one to any existing stream (as long as there are some left),
+		 * so we deduce from those all non receiving streams. We
+		 * further deduce 1/8 to assign to any new stream. The
+		 * remaining ones can be evenly shared between receiving
+		 * streams. Finally, for front streams, this is rounded
+		 * up to the buffer size while for back streams we round
+		 * it down. The rationale here is that front to back is
+		 * very fast to flush and slow to refill while it's the
+		 * opposite in the other way so we try to minimize HoL.
+		 */
 		if (h2s->st < H2_SS_ERROR) {
+			int allocatable;
+			int non_rx;
+			int reserved;
+			int to_share;
+			int opt_size;
+
 			/* let's use the configured static window size */
 			win = (h2c->flags & H2_CF_IS_BACK) ?
 				h2_be_settings_initial_window_size:
 				h2_fe_settings_initial_window_size;
 			win = win ? win : h2_settings_initial_window_size;
+
+			/* try to optimally align incoming frames to align copies
+			 * to HTX, but stick to 16384 if lower since that's what most
+			 * implems use and some might refrain from sending until a
+			 * full frame is permitted. We won't be causing HoL for 56
+			 * extra bytes anyway.
+			 */
+			opt_size = global.tune.bufsize - sizeof(struct htx) - sizeof(struct htx_blk);
+			if (opt_size < 16384)
+				opt_size = 16384;
+
+			/* default to one buffer when not receiving data */
+			rxbsz = opt_size;
+
+			/* only advertise a larger window if we really expect more data than
+			 * the default window (or we don't know how much we expect).
+			 */
+			if ((h2s->flags & H2_SF_EXPECT_RXDATA) &&
+			    (!(h2s->flags & H2_SF_DATA_CLEN) || h2s->body_len > (ullong)win)) {
+				non_rx = MAX((int)(h2c->nb_sc - h2c->receiving_streams), 0);
+				allocatable = MAX((int)(bl_size(h2c->shared_rx_bufs) - non_rx), 0);
+				reserved = (h2c->streams_limit - h2c->nb_sc + 7) / 8;
+				to_share = MAX(allocatable - reserved, 0);
+
+				rxbsz = opt_size * to_share;
+				rxbsz = (rxbsz + h2c->receiving_streams - 1) / h2c->receiving_streams;
+
+				/* Only provide integral multiples of buffer size.
+				 * On the front, we know that largest values are
+				 * important for bandwidth, and that the data are
+				 * quickly consumed by the servers. Also, any
+				 * overestimate only impacts that client. On the
+				 * backend, large values have little impact on
+				 * performance (low latency), but they can cause
+				 * HoL between multiple clients. Thus we round up
+				 * on the front and down on the back.
+				 */
+				if (rxbsz) {
+					if (!(h2c->flags & H2_CF_IS_BACK))
+						rxbsz += opt_size - 1;
+
+					rxbsz = rxbsz / opt_size * opt_size;
+				}
+			}
+
+			if (rxbsz > win)
+				win = rxbsz;
 		}
 
 		/* Principle below: the calculate the next max window offset