REORG: http: move the HTTP/1 chunk parser to h1.{c,h}

Functions http_parse_chunk_size(), http_skip_chunk_crlf() and http_forward_trailers() were moved to h1.h and h1.c respectively so that they can be called from outside. The parts that were inline remained inline as it's critical for performance (+41% perf difference reported in an earlier test). For now the "http_" prefix remains in their name since they still depend on the http_msg type.
2025-08-11 01:26:58 +02:00 · 2017-09-21 08:40:02 +02:00 · 2017-09-21 08:40:02 +02:00 · db4893d6a4
commit db4893d6a4
parent 0da5b3bddc
4 changed files with 219 additions and 213 deletions
--- a/include/proto/h1.h
+++ b/include/proto/h1.h
@ -22,11 +22,15 @@
 #ifndef _PROTO_H1_H
 #define _PROTO_H1_H
 #include <common/buffer.h>
 #include <common/compiler.h>
 #include <common/config.h>
 #include <common/standard.h>
 #include <types/h1.h>
 #include <types/proto_http.h>
 extern const uint8_t h1_char_classes[256];
 int http_forward_trailers(struct http_msg *msg);
 #define H1_FLG_CTL  0x01
 #define H1_FLG_SEP  0x02
@ -121,5 +125,145 @@ static inline const char *h1_msg_state_str(enum h1_state msg_state)
 	}
 }
 /* This function may be called only in HTTP_MSG_CHUNK_CRLF. It reads the CRLF or
 * a possible LF alone at the end of a chunk. The caller should adjust msg->next
 * in order to include this part into the next forwarding phase.  Note that the
 * caller must ensure that ->p points to the first byte to parse.  It returns
 * the number of bytes parsed on success, so the caller can set msg_state to
 * HTTP_MSG_CHUNK_SIZE. If not enough data are available, the function does not
 * change anything and returns zero. If a parse error is encountered, the
 * function returns < 0. Note: this function is designed to parse wrapped CRLF
 * at the end of the buffer.
 */
 static inline int http_skip_chunk_crlf(struct http_msg *msg)
 {
 	const struct buffer *buf = msg->chn->buf;
 	const char *ptr;
 	int bytes;
 	/* NB: we'll check data availabilty at the end. It's not a
 	 * problem because whatever we match first will be checked
 	 * against the correct length.
 	 */
 	bytes = 1;
 	ptr = b_ptr(buf, msg->next);
 	if (*ptr == '\r') {
 		bytes++;
 		ptr++;
 		if (ptr >= buf->data + buf->size)
 			ptr = buf->data;
 	}
 	if (msg->next + bytes > buf->i)
 		return 0;
 	if (*ptr != '\n') {
 		msg->err_pos = buffer_count(buf, buf->p, ptr);
 		return -1;
 	}
 	return bytes;
 }
 /* Parse the chunk size at msg->next. Once done, caller should adjust ->next to
 * point to the first byte of data after the chunk size, so that we know we can
 * forward exactly msg->next bytes. msg->sol contains the exact number of bytes
 * forming the chunk size. That way it is always possible to differentiate
 * between the start of the body and the start of the data.  Return the number
 * of byte parsed on success, 0 when some data is missing, <0 on error.  Note:
 * this function is designed to parse wrapped CRLF at the end of the buffer.
 */
 static inline int http_parse_chunk_size(struct http_msg *msg)
 {
 	const struct buffer *buf = msg->chn->buf;
 	const char *ptr = b_ptr(buf, msg->next);
 	const char *ptr_old = ptr;
 	const char *end = buf->data + buf->size;
 	const char *stop = bi_end(buf);
 	unsigned int chunk = 0;
 	/* The chunk size is in the following form, though we are only
 	 * interested in the size and CRLF :
 	 *    1*HEXDIGIT *WSP *[ ';' extensions ] CRLF
 	 */
 	while (1) {
 		int c;
 		if (ptr == stop)
 			return 0;
 		c = hex2i(*ptr);
 		if (c < 0) /* not a hex digit anymore */
 			break;
 		if (unlikely(++ptr >= end))
 			ptr = buf->data;
 		if (unlikely(chunk & 0xF8000000)) /* integer overflow will occur if result >= 2GB */
 			goto error;
 		chunk = (chunk << 4) + c;
 	}
 	/* empty size not allowed */
 	if (unlikely(ptr == ptr_old))
 		goto error;
 	while (HTTP_IS_SPHT(*ptr)) {
 		if (++ptr >= end)
 			ptr = buf->data;
 		if (unlikely(ptr == stop))
 			return 0;
 	}
 	/* Up to there, we know that at least one byte is present at *ptr. Check
 	 * for the end of chunk size.
 	 */
 	while (1) {
 		if (likely(HTTP_IS_CRLF(*ptr))) {
 			/* we now have a CR or an LF at ptr */
 			if (likely(*ptr == '\r')) {
 				if (++ptr >= end)
 					ptr = buf->data;
 				if (ptr == stop)
 					return 0;
 			}
 			if (unlikely(*ptr != '\n'))
 				goto error;
 			if (++ptr >= end)
 				ptr = buf->data;
 			/* done */
 			break;
 		}
 		else if (likely(*ptr == ';')) {
 			/* chunk extension, ends at next CRLF */
 			if (++ptr >= end)
 				ptr = buf->data;
 			if (ptr == stop)
 				return 0;
 			while (!HTTP_IS_CRLF(*ptr)) {
 				if (++ptr >= end)
 					ptr = buf->data;
 				if (ptr == stop)
 					return 0;
 			}
 			/* we have a CRLF now, loop above */
 			continue;
 		}
 		else
 			goto error;
 	}
 	/* OK we found our CRLF and now <ptr> points to the next byte, which may
 	 * or may not be present. We save the number of bytes parsed into
 	 * msg->sol.
 	 */
 	msg->sol = ptr - ptr_old;
 	if (unlikely(ptr < ptr_old))
 		msg->sol += buf->size;
 	msg->chunk_len = chunk;
 	msg->body_len += chunk;
 	return msg->sol;
 error:
 	msg->err_pos = buffer_count(buf, buf->p, ptr);
 	return -1;
 }
 #endif /* _PROTO_H1_H */
--- a/include/types/proto_http.h
+++ b/include/types/proto_http.h
@ -27,6 +27,7 @@
 #include <common/mini-clist.h>
 #include <common/regex.h>
 #include <types/channel.h>
 #include <types/h1.h>
 #include <types/hdr_idx.h>
 #include <types/filters.h>
--- a/src/h1.c
+++ b/src/h1.c
@ -153,3 +153,77 @@ const unsigned char h1_char_classes[256] = {
 	['~'] = H1_FLG_TOK,
 	[127] = H1_FLG_CTL,
 };
 /* This function skips trailers in the buffer associated with HTTP message
 * <msg>. The first visited position is msg->next. If the end of the trailers is
 * found, the function returns >0. So, the caller can automatically schedul it
 * to be forwarded, and switch msg->msg_state to HTTP_MSG_DONE. If not enough
 * data are available, the function does not change anything except maybe
 * msg->sol if it could parse some lines, and returns zero.  If a parse error
 * is encountered, the function returns < 0 and does not change anything except
 * maybe msg->sol. Note that the message must already be in HTTP_MSG_TRAILERS
 * state before calling this function, which implies that all non-trailers data
 * have already been scheduled for forwarding, and that msg->next exactly
 * matches the length of trailers already parsed and not forwarded. It is also
 * important to note that this function is designed to be able to parse wrapped
 * headers at end of buffer.
 */
 int http_forward_trailers(struct http_msg *msg)
 {
 	const struct buffer *buf = msg->chn->buf;
 	/* we have msg->next which points to next line. Look for CRLF. But
 	 * first, we reset msg->sol */
 	msg->sol = 0;
 	while (1) {
 		const char *p1 = NULL, *p2 = NULL;
 		const char *start = b_ptr(buf, msg->next + msg->sol);
 		const char *stop  = bi_end(buf);
 		const char *ptr   = start;
 		int bytes = 0;
 		/* scan current line and stop at LF or CRLF */
 		while (1) {
 			if (ptr == stop)
 				return 0;
 			if (*ptr == '\n') {
 				if (!p1)
 					p1 = ptr;
 				p2 = ptr;
 				break;
 			}
 			if (*ptr == '\r') {
 				if (p1) {
 					msg->err_pos = buffer_count(buf, buf->p, ptr);
 					return -1;
 				}
 				p1 = ptr;
 			}
 			ptr++;
 			if (ptr >= buf->data + buf->size)
 				ptr = buf->data;
 		}
 		/* after LF; point to beginning of next line */
 		p2++;
 		if (p2 >= buf->data + buf->size)
 			p2 = buf->data;
 		bytes = p2 - start;
 		if (bytes < 0)
 			bytes += buf->size;
 		msg->sol += bytes;
 		/* LF/CRLF at beginning of line => end of trailers at p2.
 		 * Everything was scheduled for forwarding, there's nothing left
 		 * from this message. */
 		if (p1 == start)
 			return 1;
 		/* OK, next line then */
 	}
 }
--- a/src/proto_http.c
+++ b/src/proto_http.c
@ -2108,219 +2108,6 @@ void http_change_connection_header(struct http_txn *txn, struct http_msg *msg, i
 	return;
 }
 /* Parse the chunk size at msg->next. Once done, caller should adjust ->next to
 * point to the first byte of data after the chunk size, so that we know we can
 * forward exactly msg->next bytes. msg->sol contains the exact number of bytes
 * forming the chunk size. That way it is always possible to differentiate
 * between the start of the body and the start of the data.  Return the number
 * of byte parsed on success, 0 when some data is missing, <0 on error.  Note:
 * this function is designed to parse wrapped CRLF at the end of the buffer.
 */
 static inline int http_parse_chunk_size(struct http_msg *msg)
 {
 	const struct buffer *buf = msg->chn->buf;
 	const char *ptr = b_ptr(buf, msg->next);
 	const char *ptr_old = ptr;
 	const char *end = buf->data + buf->size;
 	const char *stop = bi_end(buf);
 	unsigned int chunk = 0;
 	/* The chunk size is in the following form, though we are only
 	 * interested in the size and CRLF :
 	 *    1*HEXDIGIT *WSP *[ ';' extensions ] CRLF
 	 */
 	while (1) {
 		int c;
 		if (ptr == stop)
 			return 0;
 		c = hex2i(*ptr);
 		if (c < 0) /* not a hex digit anymore */
 			break;
 		if (unlikely(++ptr >= end))
 			ptr = buf->data;
 		if (chunk & 0xF8000000) /* integer overflow will occur if result >= 2GB */
 			goto error;
 		chunk = (chunk << 4) + c;
 	}
 	/* empty size not allowed */
 	if (unlikely(ptr == ptr_old))
 		goto error;
 	while (HTTP_IS_SPHT(*ptr)) {
 		if (++ptr >= end)
 			ptr = buf->data;
 		if (unlikely(ptr == stop))
 			return 0;
 	}
 	/* Up to there, we know that at least one byte is present at *ptr. Check
 	 * for the end of chunk size.
 	 */
 	while (1) {
 		if (likely(HTTP_IS_CRLF(*ptr))) {
 			/* we now have a CR or an LF at ptr */
 			if (likely(*ptr == '\r')) {
 				if (++ptr >= end)
 					ptr = buf->data;
 				if (ptr == stop)
 					return 0;
 			}
 			if (*ptr != '\n')
 				goto error;
 			if (++ptr >= end)
 				ptr = buf->data;
 			/* done */
 			break;
 		}
 		else if (*ptr == ';') {
 			/* chunk extension, ends at next CRLF */
 			if (++ptr >= end)
 				ptr = buf->data;
 			if (ptr == stop)
 				return 0;
 			while (!HTTP_IS_CRLF(*ptr)) {
 				if (++ptr >= end)
 					ptr = buf->data;
 				if (ptr == stop)
 					return 0;
 			}
 			/* we have a CRLF now, loop above */
 			continue;
 		}
 		else
 			goto error;
 	}
 	/* OK we found our CRLF and now <ptr> points to the next byte, which may
 	 * or may not be present. We save the number of bytes parsed into
 	 * msg->sol.
 	 */
 	msg->sol = ptr - ptr_old;
 	if (unlikely(ptr < ptr_old))
 		msg->sol += buf->size;
 	msg->chunk_len = chunk;
 	msg->body_len += chunk;
 	return msg->sol;
 error:
 	msg->err_pos = buffer_count(buf, buf->p, ptr);
 	return -1;
 }
 /* This function skips trailers in the buffer associated with HTTP message
 * <msg>. The first visited position is msg->next. If the end of the trailers is
 * found, the function returns >0. So, the caller can automatically schedul it
 * to be forwarded, and switch msg->msg_state to HTTP_MSG_DONE. If not enough
 * data are available, the function does not change anything except maybe
 * msg->sol if it could parse some lines, and returns zero.  If a parse error
 * is encountered, the function returns < 0 and does not change anything except
 * maybe msg->sol. Note that the message must already be in HTTP_MSG_TRAILERS
 * state before calling this function, which implies that all non-trailers data
 * have already been scheduled for forwarding, and that msg->next exactly
 * matches the length of trailers already parsed and not forwarded. It is also
 * important to note that this function is designed to be able to parse wrapped
 * headers at end of buffer.
 */
 static int http_forward_trailers(struct http_msg *msg)
 {
 	const struct buffer *buf = msg->chn->buf;
 	/* we have msg->next which points to next line. Look for CRLF. But
 	 * first, we reset msg->sol */
 	msg->sol = 0;
 	while (1) {
 		const char *p1 = NULL, *p2 = NULL;
 		const char *start = b_ptr(buf, msg->next + msg->sol);
 		const char *stop  = bi_end(buf);
 		const char *ptr   = start;
 		int bytes = 0;
 		/* scan current line and stop at LF or CRLF */
 		while (1) {
 			if (ptr == stop)
 				return 0;
 			if (*ptr == '\n') {
 				if (!p1)
 					p1 = ptr;
 				p2 = ptr;
 				break;
 			}
 			if (*ptr == '\r') {
 				if (p1) {
 					msg->err_pos = buffer_count(buf, buf->p, ptr);
 					return -1;
 				}
 				p1 = ptr;
 			}
 			ptr++;
 			if (ptr >= buf->data + buf->size)
 				ptr = buf->data;
 		}
 		/* after LF; point to beginning of next line */
 		p2++;
 		if (p2 >= buf->data + buf->size)
 			p2 = buf->data;
 		bytes = p2 - start;
 		if (bytes < 0)
 			bytes += buf->size;
 		msg->sol += bytes;
 		/* LF/CRLF at beginning of line => end of trailers at p2.
 		 * Everything was scheduled for forwarding, there's nothing left
 		 * from this message. */
 		if (p1 == start)
 			return 1;
 		/* OK, next line then */
 	}
 }
 /* This function may be called only in HTTP_MSG_CHUNK_CRLF. It reads the CRLF or
 * a possible LF alone at the end of a chunk. The caller should adjust msg->next
 * in order to include this part into the next forwarding phase.  Note that the
 * caller must ensure that ->p points to the first byte to parse.  It returns
 * the number of bytes parsed on success, so the caller can set msg_state to
 * HTTP_MSG_CHUNK_SIZE. If not enough data are available, the function does not
 * change anything and returns zero. If a parse error is encountered, the
 * function returns < 0. Note: this function is designed to parse wrapped CRLF
 * at the end of the buffer.
 */
 static inline int http_skip_chunk_crlf(struct http_msg *msg)
 {
 	const struct buffer *buf = msg->chn->buf;
 	const char *ptr;
 	int bytes;
 	/* NB: we'll check data availabilty at the end. It's not a
 	 * problem because whatever we match first will be checked
 	 * against the correct length.
 	 */
 	bytes = 1;
 	ptr = b_ptr(buf, msg->next);
 	if (*ptr == '\r') {
 		bytes++;
 		ptr++;
 		if (ptr >= buf->data + buf->size)
 			ptr = buf->data;
 	}
 	if (msg->next + bytes > buf->i)
 		return 0;
 	if (*ptr != '\n') {
 		msg->err_pos = buffer_count(buf, buf->p, ptr);
 		return -1;
 	}
 	return bytes;
 }
 /* Parses a qvalue and returns it multipled by 1000, from 0 to 1000. If the
 * value is larger than 1000, it is bound to 1000. The parser consumes up to
 * 1 digit, one dot and 3 digits and stops on the first invalid character.