REORG: http: move the HTTP/1 chunk parser to h1.{c,h}

Functions http_parse_chunk_size(), http_skip_chunk_crlf() and http_forward_trailers() were moved to h1.h and h1.c respectively so that they can be called from outside. The parts that were inline remained inline as it's critical for performance (+41% perf difference reported in an earlier test). For now the "http_" prefix remains in their name since they still depend on the http_msg type.
2026-01-09 19:01:01 +01:00 · 2017-09-21 08:40:02 +02:00 · 2017-09-21 08:40:02 +02:00 · db4893d6a4
commit db4893d6a4
parent 0da5b3bddc
4 changed files with 219 additions and 213 deletions
--- a/include/proto/h1.h
+++ b/include/proto/h1.h
@ -22,11 +22,15 @@
 #ifndef _PROTO_H1_H
 #define _PROTO_H1_H

+#include <common/buffer.h>
 #include <common/compiler.h>
 #include <common/config.h>
+#include <common/standard.h>
 #include <types/h1.h>
+#include <types/proto_http.h>

 extern const uint8_t h1_char_classes[256];
+int http_forward_trailers(struct http_msg *msg);

 #define H1_FLG_CTL  0x01
 #define H1_FLG_SEP  0x02
@ -121,5 +125,145 @@ static inline const char *h1_msg_state_str(enum h1_state msg_state)
 	}
 }

+/* This function may be called only in HTTP_MSG_CHUNK_CRLF. It reads the CRLF or
+ * a possible LF alone at the end of a chunk. The caller should adjust msg->next
+ * in order to include this part into the next forwarding phase.  Note that the
+ * caller must ensure that ->p points to the first byte to parse.  It returns
+ * the number of bytes parsed on success, so the caller can set msg_state to
+ * HTTP_MSG_CHUNK_SIZE. If not enough data are available, the function does not
+ * change anything and returns zero. If a parse error is encountered, the
+ * function returns < 0. Note: this function is designed to parse wrapped CRLF
+ * at the end of the buffer.
+ */
+static inline int http_skip_chunk_crlf(struct http_msg *msg)
+{
+	const struct buffer *buf = msg->chn->buf;
+	const char *ptr;
+	int bytes;
+
+	/* NB: we'll check data availabilty at the end. It's not a
+	 * problem because whatever we match first will be checked
+	 * against the correct length.
+	 */
+	bytes = 1;
+	ptr = b_ptr(buf, msg->next);
+	if (*ptr == '\r') {
+		bytes++;
+		ptr++;
+		if (ptr >= buf->data + buf->size)
+			ptr = buf->data;
+	}
+
+	if (msg->next + bytes > buf->i)
+		return 0;
+
+	if (*ptr != '\n') {
+		msg->err_pos = buffer_count(buf, buf->p, ptr);
+		return -1;
+	}
+	return bytes;
+}
+
+/* Parse the chunk size at msg->next. Once done, caller should adjust ->next to
+ * point to the first byte of data after the chunk size, so that we know we can
+ * forward exactly msg->next bytes. msg->sol contains the exact number of bytes
+ * forming the chunk size. That way it is always possible to differentiate
+ * between the start of the body and the start of the data.  Return the number
+ * of byte parsed on success, 0 when some data is missing, <0 on error.  Note:
+ * this function is designed to parse wrapped CRLF at the end of the buffer.
+ */
+static inline int http_parse_chunk_size(struct http_msg *msg)
+{
+	const struct buffer *buf = msg->chn->buf;
+	const char *ptr = b_ptr(buf, msg->next);
+	const char *ptr_old = ptr;
+	const char *end = buf->data + buf->size;
+	const char *stop = bi_end(buf);
+	unsigned int chunk = 0;
+
+	/* The chunk size is in the following form, though we are only
+	 * interested in the size and CRLF :
+	 *    1*HEXDIGIT *WSP *[ ';' extensions ] CRLF
+	 */
+	while (1) {
+		int c;
+		if (ptr == stop)
+			return 0;
+		c = hex2i(*ptr);
+		if (c < 0) /* not a hex digit anymore */
+			break;
+		if (unlikely(++ptr >= end))
+			ptr = buf->data;
+		if (unlikely(chunk & 0xF8000000)) /* integer overflow will occur if result >= 2GB */
+			goto error;
+		chunk = (chunk << 4) + c;
+	}
+
+	/* empty size not allowed */
+	if (unlikely(ptr == ptr_old))
+		goto error;
+
+	while (HTTP_IS_SPHT(*ptr)) {
+		if (++ptr >= end)
+			ptr = buf->data;
+		if (unlikely(ptr == stop))
+			return 0;
+	}
+
+	/* Up to there, we know that at least one byte is present at *ptr. Check
+	 * for the end of chunk size.
+	 */
+	while (1) {
+		if (likely(HTTP_IS_CRLF(*ptr))) {
+			/* we now have a CR or an LF at ptr */
+			if (likely(*ptr == '\r')) {
+				if (++ptr >= end)
+					ptr = buf->data;
+				if (ptr == stop)
+					return 0;
+			}
+
+			if (unlikely(*ptr != '\n'))
+				goto error;
+			if (++ptr >= end)
+				ptr = buf->data;
+			/* done */
+			break;
+		}
+		else if (likely(*ptr == ';')) {
+			/* chunk extension, ends at next CRLF */
+			if (++ptr >= end)
+				ptr = buf->data;
+			if (ptr == stop)
+				return 0;
+
+			while (!HTTP_IS_CRLF(*ptr)) {
+				if (++ptr >= end)
+					ptr = buf->data;
+				if (ptr == stop)
+					return 0;
+			}
+			/* we have a CRLF now, loop above */
+			continue;
+		}
+		else
+			goto error;
+	}
+
+	/* OK we found our CRLF and now <ptr> points to the next byte, which may
+	 * or may not be present. We save the number of bytes parsed into
+	 * msg->sol.
+	 */
+	msg->sol = ptr - ptr_old;
+	if (unlikely(ptr < ptr_old))
+		msg->sol += buf->size;
+	msg->chunk_len = chunk;
+	msg->body_len += chunk;
+	return msg->sol;
+ error:
+	msg->err_pos = buffer_count(buf, buf->p, ptr);
+	return -1;
+}
+

 #endif /* _PROTO_H1_H */
--- a/include/types/proto_http.h
+++ b/include/types/proto_http.h
@ -27,6 +27,7 @@
 #include <common/mini-clist.h>
 #include <common/regex.h>

+#include <types/channel.h>
 #include <types/h1.h>
 #include <types/hdr_idx.h>
 #include <types/filters.h>
--- a/src/h1.c
+++ b/src/h1.c
@ -153,3 +153,77 @@ const unsigned char h1_char_classes[256] = {
 	['~'] = H1_FLG_TOK,
 	[127] = H1_FLG_CTL,
 };
+
+
+/* This function skips trailers in the buffer associated with HTTP message
+ * <msg>. The first visited position is msg->next. If the end of the trailers is
+ * found, the function returns >0. So, the caller can automatically schedul it
+ * to be forwarded, and switch msg->msg_state to HTTP_MSG_DONE. If not enough
+ * data are available, the function does not change anything except maybe
+ * msg->sol if it could parse some lines, and returns zero.  If a parse error
+ * is encountered, the function returns < 0 and does not change anything except
+ * maybe msg->sol. Note that the message must already be in HTTP_MSG_TRAILERS
+ * state before calling this function, which implies that all non-trailers data
+ * have already been scheduled for forwarding, and that msg->next exactly
+ * matches the length of trailers already parsed and not forwarded. It is also
+ * important to note that this function is designed to be able to parse wrapped
+ * headers at end of buffer.
+ */
+int http_forward_trailers(struct http_msg *msg)
+{
+	const struct buffer *buf = msg->chn->buf;
+
+	/* we have msg->next which points to next line. Look for CRLF. But
+	 * first, we reset msg->sol */
+	msg->sol = 0;
+	while (1) {
+		const char *p1 = NULL, *p2 = NULL;
+		const char *start = b_ptr(buf, msg->next + msg->sol);
+		const char *stop  = bi_end(buf);
+		const char *ptr   = start;
+		int bytes = 0;
+
+		/* scan current line and stop at LF or CRLF */
+		while (1) {
+			if (ptr == stop)
+				return 0;
+
+			if (*ptr == '\n') {
+				if (!p1)
+					p1 = ptr;
+				p2 = ptr;
+				break;
+			}
+
+			if (*ptr == '\r') {
+				if (p1) {
+					msg->err_pos = buffer_count(buf, buf->p, ptr);
+					return -1;
+				}
+				p1 = ptr;
+			}
+
+			ptr++;
+			if (ptr >= buf->data + buf->size)
+				ptr = buf->data;
+		}
+
+		/* after LF; point to beginning of next line */
+		p2++;
+		if (p2 >= buf->data + buf->size)
+			p2 = buf->data;
+
+		bytes = p2 - start;
+		if (bytes < 0)
+			bytes += buf->size;
+		msg->sol += bytes;
+
+		/* LF/CRLF at beginning of line => end of trailers at p2.
+		 * Everything was scheduled for forwarding, there's nothing left
+		 * from this message. */
+		if (p1 == start)
+			return 1;
+
+		/* OK, next line then */
+	}
+}
--- a/src/proto_http.c
+++ b/src/proto_http.c
@ -2108,219 +2108,6 @@ void http_change_connection_header(struct http_txn *txn, struct http_msg *msg, i
 	return;
 }

-/* Parse the chunk size at msg->next. Once done, caller should adjust ->next to
- * point to the first byte of data after the chunk size, so that we know we can
- * forward exactly msg->next bytes. msg->sol contains the exact number of bytes
- * forming the chunk size. That way it is always possible to differentiate
- * between the start of the body and the start of the data.  Return the number
- * of byte parsed on success, 0 when some data is missing, <0 on error.  Note:
- * this function is designed to parse wrapped CRLF at the end of the buffer.
- */
-static inline int http_parse_chunk_size(struct http_msg *msg)
-{
-	const struct buffer *buf = msg->chn->buf;
-	const char *ptr = b_ptr(buf, msg->next);
-	const char *ptr_old = ptr;
-	const char *end = buf->data + buf->size;
-	const char *stop = bi_end(buf);
-	unsigned int chunk = 0;
-
-	/* The chunk size is in the following form, though we are only
-	 * interested in the size and CRLF :
-	 *    1*HEXDIGIT *WSP *[ ';' extensions ] CRLF
-	 */
-	while (1) {
-		int c;
-		if (ptr == stop)
-			return 0;
-		c = hex2i(*ptr);
-		if (c < 0) /* not a hex digit anymore */
-			break;
-		if (unlikely(++ptr >= end))
-			ptr = buf->data;
-		if (chunk & 0xF8000000) /* integer overflow will occur if result >= 2GB */
-			goto error;
-		chunk = (chunk << 4) + c;
-	}
-
-	/* empty size not allowed */
-	if (unlikely(ptr == ptr_old))
-		goto error;
-
-	while (HTTP_IS_SPHT(*ptr)) {
-		if (++ptr >= end)
-			ptr = buf->data;
-		if (unlikely(ptr == stop))
-			return 0;
-	}
-
-	/* Up to there, we know that at least one byte is present at *ptr. Check
-	 * for the end of chunk size.
-	 */
-	while (1) {
-		if (likely(HTTP_IS_CRLF(*ptr))) {
-			/* we now have a CR or an LF at ptr */
-			if (likely(*ptr == '\r')) {
-				if (++ptr >= end)
-					ptr = buf->data;
-				if (ptr == stop)
-					return 0;
-			}
-
-			if (*ptr != '\n')
-				goto error;
-			if (++ptr >= end)
-				ptr = buf->data;
-			/* done */
-			break;
-		}
-		else if (*ptr == ';') {
-			/* chunk extension, ends at next CRLF */
-			if (++ptr >= end)
-				ptr = buf->data;
-			if (ptr == stop)
-				return 0;
-
-			while (!HTTP_IS_CRLF(*ptr)) {
-				if (++ptr >= end)
-					ptr = buf->data;
-				if (ptr == stop)
-					return 0;
-			}
-			/* we have a CRLF now, loop above */
-			continue;
-		}
-		else
-			goto error;
-	}
-
-	/* OK we found our CRLF and now <ptr> points to the next byte, which may
-	 * or may not be present. We save the number of bytes parsed into
-	 * msg->sol.
-	 */
-	msg->sol = ptr - ptr_old;
-	if (unlikely(ptr < ptr_old))
-		msg->sol += buf->size;
-	msg->chunk_len = chunk;
-	msg->body_len += chunk;
-	return msg->sol;
- error:
-	msg->err_pos = buffer_count(buf, buf->p, ptr);
-	return -1;
-}
-
-/* This function skips trailers in the buffer associated with HTTP message
- * <msg>. The first visited position is msg->next. If the end of the trailers is
- * found, the function returns >0. So, the caller can automatically schedul it
- * to be forwarded, and switch msg->msg_state to HTTP_MSG_DONE. If not enough
- * data are available, the function does not change anything except maybe
- * msg->sol if it could parse some lines, and returns zero.  If a parse error
- * is encountered, the function returns < 0 and does not change anything except
- * maybe msg->sol. Note that the message must already be in HTTP_MSG_TRAILERS
- * state before calling this function, which implies that all non-trailers data
- * have already been scheduled for forwarding, and that msg->next exactly
- * matches the length of trailers already parsed and not forwarded. It is also
- * important to note that this function is designed to be able to parse wrapped
- * headers at end of buffer.
- */
-static int http_forward_trailers(struct http_msg *msg)
-{
-	const struct buffer *buf = msg->chn->buf;
-
-	/* we have msg->next which points to next line. Look for CRLF. But
-	 * first, we reset msg->sol */
-	msg->sol = 0;
-	while (1) {
-		const char *p1 = NULL, *p2 = NULL;
-		const char *start = b_ptr(buf, msg->next + msg->sol);
-		const char *stop  = bi_end(buf);
-		const char *ptr   = start;
-		int bytes = 0;
-
-		/* scan current line and stop at LF or CRLF */
-		while (1) {
-			if (ptr == stop)
-				return 0;
-
-			if (*ptr == '\n') {
-				if (!p1)
-					p1 = ptr;
-				p2 = ptr;
-				break;
-			}
-
-			if (*ptr == '\r') {
-				if (p1) {
-					msg->err_pos = buffer_count(buf, buf->p, ptr);
-					return -1;
-				}
-				p1 = ptr;
-			}
-
-			ptr++;
-			if (ptr >= buf->data + buf->size)
-				ptr = buf->data;
-		}
-
-		/* after LF; point to beginning of next line */
-		p2++;
-		if (p2 >= buf->data + buf->size)
-			p2 = buf->data;
-
-		bytes = p2 - start;
-		if (bytes < 0)
-			bytes += buf->size;
-		msg->sol += bytes;
-
-		/* LF/CRLF at beginning of line => end of trailers at p2.
-		 * Everything was scheduled for forwarding, there's nothing left
-		 * from this message. */
-		if (p1 == start)
-			return 1;
-
-		/* OK, next line then */
-	}
-}
-
-/* This function may be called only in HTTP_MSG_CHUNK_CRLF. It reads the CRLF or
- * a possible LF alone at the end of a chunk. The caller should adjust msg->next
- * in order to include this part into the next forwarding phase.  Note that the
- * caller must ensure that ->p points to the first byte to parse.  It returns
- * the number of bytes parsed on success, so the caller can set msg_state to
- * HTTP_MSG_CHUNK_SIZE. If not enough data are available, the function does not
- * change anything and returns zero. If a parse error is encountered, the
- * function returns < 0. Note: this function is designed to parse wrapped CRLF
- * at the end of the buffer.
- */
-static inline int http_skip_chunk_crlf(struct http_msg *msg)
-{
-	const struct buffer *buf = msg->chn->buf;
-	const char *ptr;
-	int bytes;
-
-	/* NB: we'll check data availabilty at the end. It's not a
-	 * problem because whatever we match first will be checked
-	 * against the correct length.
-	 */
-	bytes = 1;
-	ptr = b_ptr(buf, msg->next);
-	if (*ptr == '\r') {
-		bytes++;
-		ptr++;
-		if (ptr >= buf->data + buf->size)
-			ptr = buf->data;
-	}
-
-	if (msg->next + bytes > buf->i)
-		return 0;
-
-	if (*ptr != '\n') {
-		msg->err_pos = buffer_count(buf, buf->p, ptr);
-		return -1;
-	}
-	return bytes;
-}
-
 /* Parses a qvalue and returns it multipled by 1000, from 0 to 1000. If the
 * value is larger than 1000, it is bound to 1000. The parser consumes up to
 * 1 digit, one dot and 3 digits and stops on the first invalid character.