mirror of
https://git.haproxy.org/git/haproxy.git/
synced 2025-08-11 01:26:58 +02:00
REORG: http: move the HTTP/1 chunk parser to h1.{c,h}
Functions http_parse_chunk_size(), http_skip_chunk_crlf() and http_forward_trailers() were moved to h1.h and h1.c respectively so that they can be called from outside. The parts that were inline remained inline as it's critical for performance (+41% perf difference reported in an earlier test). For now the "http_" prefix remains in their name since they still depend on the http_msg type.
This commit is contained in:
parent
0da5b3bddc
commit
db4893d6a4
@ -22,11 +22,15 @@
|
|||||||
#ifndef _PROTO_H1_H
|
#ifndef _PROTO_H1_H
|
||||||
#define _PROTO_H1_H
|
#define _PROTO_H1_H
|
||||||
|
|
||||||
|
#include <common/buffer.h>
|
||||||
#include <common/compiler.h>
|
#include <common/compiler.h>
|
||||||
#include <common/config.h>
|
#include <common/config.h>
|
||||||
|
#include <common/standard.h>
|
||||||
#include <types/h1.h>
|
#include <types/h1.h>
|
||||||
|
#include <types/proto_http.h>
|
||||||
|
|
||||||
extern const uint8_t h1_char_classes[256];
|
extern const uint8_t h1_char_classes[256];
|
||||||
|
int http_forward_trailers(struct http_msg *msg);
|
||||||
|
|
||||||
#define H1_FLG_CTL 0x01
|
#define H1_FLG_CTL 0x01
|
||||||
#define H1_FLG_SEP 0x02
|
#define H1_FLG_SEP 0x02
|
||||||
@ -121,5 +125,145 @@ static inline const char *h1_msg_state_str(enum h1_state msg_state)
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* This function may be called only in HTTP_MSG_CHUNK_CRLF. It reads the CRLF or
|
||||||
|
* a possible LF alone at the end of a chunk. The caller should adjust msg->next
|
||||||
|
* in order to include this part into the next forwarding phase. Note that the
|
||||||
|
* caller must ensure that ->p points to the first byte to parse. It returns
|
||||||
|
* the number of bytes parsed on success, so the caller can set msg_state to
|
||||||
|
* HTTP_MSG_CHUNK_SIZE. If not enough data are available, the function does not
|
||||||
|
* change anything and returns zero. If a parse error is encountered, the
|
||||||
|
* function returns < 0. Note: this function is designed to parse wrapped CRLF
|
||||||
|
* at the end of the buffer.
|
||||||
|
*/
|
||||||
|
static inline int http_skip_chunk_crlf(struct http_msg *msg)
|
||||||
|
{
|
||||||
|
const struct buffer *buf = msg->chn->buf;
|
||||||
|
const char *ptr;
|
||||||
|
int bytes;
|
||||||
|
|
||||||
|
/* NB: we'll check data availabilty at the end. It's not a
|
||||||
|
* problem because whatever we match first will be checked
|
||||||
|
* against the correct length.
|
||||||
|
*/
|
||||||
|
bytes = 1;
|
||||||
|
ptr = b_ptr(buf, msg->next);
|
||||||
|
if (*ptr == '\r') {
|
||||||
|
bytes++;
|
||||||
|
ptr++;
|
||||||
|
if (ptr >= buf->data + buf->size)
|
||||||
|
ptr = buf->data;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (msg->next + bytes > buf->i)
|
||||||
|
return 0;
|
||||||
|
|
||||||
|
if (*ptr != '\n') {
|
||||||
|
msg->err_pos = buffer_count(buf, buf->p, ptr);
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
return bytes;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Parse the chunk size at msg->next. Once done, caller should adjust ->next to
|
||||||
|
* point to the first byte of data after the chunk size, so that we know we can
|
||||||
|
* forward exactly msg->next bytes. msg->sol contains the exact number of bytes
|
||||||
|
* forming the chunk size. That way it is always possible to differentiate
|
||||||
|
* between the start of the body and the start of the data. Return the number
|
||||||
|
* of byte parsed on success, 0 when some data is missing, <0 on error. Note:
|
||||||
|
* this function is designed to parse wrapped CRLF at the end of the buffer.
|
||||||
|
*/
|
||||||
|
static inline int http_parse_chunk_size(struct http_msg *msg)
|
||||||
|
{
|
||||||
|
const struct buffer *buf = msg->chn->buf;
|
||||||
|
const char *ptr = b_ptr(buf, msg->next);
|
||||||
|
const char *ptr_old = ptr;
|
||||||
|
const char *end = buf->data + buf->size;
|
||||||
|
const char *stop = bi_end(buf);
|
||||||
|
unsigned int chunk = 0;
|
||||||
|
|
||||||
|
/* The chunk size is in the following form, though we are only
|
||||||
|
* interested in the size and CRLF :
|
||||||
|
* 1*HEXDIGIT *WSP *[ ';' extensions ] CRLF
|
||||||
|
*/
|
||||||
|
while (1) {
|
||||||
|
int c;
|
||||||
|
if (ptr == stop)
|
||||||
|
return 0;
|
||||||
|
c = hex2i(*ptr);
|
||||||
|
if (c < 0) /* not a hex digit anymore */
|
||||||
|
break;
|
||||||
|
if (unlikely(++ptr >= end))
|
||||||
|
ptr = buf->data;
|
||||||
|
if (unlikely(chunk & 0xF8000000)) /* integer overflow will occur if result >= 2GB */
|
||||||
|
goto error;
|
||||||
|
chunk = (chunk << 4) + c;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* empty size not allowed */
|
||||||
|
if (unlikely(ptr == ptr_old))
|
||||||
|
goto error;
|
||||||
|
|
||||||
|
while (HTTP_IS_SPHT(*ptr)) {
|
||||||
|
if (++ptr >= end)
|
||||||
|
ptr = buf->data;
|
||||||
|
if (unlikely(ptr == stop))
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Up to there, we know that at least one byte is present at *ptr. Check
|
||||||
|
* for the end of chunk size.
|
||||||
|
*/
|
||||||
|
while (1) {
|
||||||
|
if (likely(HTTP_IS_CRLF(*ptr))) {
|
||||||
|
/* we now have a CR or an LF at ptr */
|
||||||
|
if (likely(*ptr == '\r')) {
|
||||||
|
if (++ptr >= end)
|
||||||
|
ptr = buf->data;
|
||||||
|
if (ptr == stop)
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (unlikely(*ptr != '\n'))
|
||||||
|
goto error;
|
||||||
|
if (++ptr >= end)
|
||||||
|
ptr = buf->data;
|
||||||
|
/* done */
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
else if (likely(*ptr == ';')) {
|
||||||
|
/* chunk extension, ends at next CRLF */
|
||||||
|
if (++ptr >= end)
|
||||||
|
ptr = buf->data;
|
||||||
|
if (ptr == stop)
|
||||||
|
return 0;
|
||||||
|
|
||||||
|
while (!HTTP_IS_CRLF(*ptr)) {
|
||||||
|
if (++ptr >= end)
|
||||||
|
ptr = buf->data;
|
||||||
|
if (ptr == stop)
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
/* we have a CRLF now, loop above */
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
goto error;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* OK we found our CRLF and now <ptr> points to the next byte, which may
|
||||||
|
* or may not be present. We save the number of bytes parsed into
|
||||||
|
* msg->sol.
|
||||||
|
*/
|
||||||
|
msg->sol = ptr - ptr_old;
|
||||||
|
if (unlikely(ptr < ptr_old))
|
||||||
|
msg->sol += buf->size;
|
||||||
|
msg->chunk_len = chunk;
|
||||||
|
msg->body_len += chunk;
|
||||||
|
return msg->sol;
|
||||||
|
error:
|
||||||
|
msg->err_pos = buffer_count(buf, buf->p, ptr);
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
#endif /* _PROTO_H1_H */
|
#endif /* _PROTO_H1_H */
|
||||||
|
@ -27,6 +27,7 @@
|
|||||||
#include <common/mini-clist.h>
|
#include <common/mini-clist.h>
|
||||||
#include <common/regex.h>
|
#include <common/regex.h>
|
||||||
|
|
||||||
|
#include <types/channel.h>
|
||||||
#include <types/h1.h>
|
#include <types/h1.h>
|
||||||
#include <types/hdr_idx.h>
|
#include <types/hdr_idx.h>
|
||||||
#include <types/filters.h>
|
#include <types/filters.h>
|
||||||
|
74
src/h1.c
74
src/h1.c
@ -153,3 +153,77 @@ const unsigned char h1_char_classes[256] = {
|
|||||||
['~'] = H1_FLG_TOK,
|
['~'] = H1_FLG_TOK,
|
||||||
[127] = H1_FLG_CTL,
|
[127] = H1_FLG_CTL,
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
||||||
|
/* This function skips trailers in the buffer associated with HTTP message
|
||||||
|
* <msg>. The first visited position is msg->next. If the end of the trailers is
|
||||||
|
* found, the function returns >0. So, the caller can automatically schedul it
|
||||||
|
* to be forwarded, and switch msg->msg_state to HTTP_MSG_DONE. If not enough
|
||||||
|
* data are available, the function does not change anything except maybe
|
||||||
|
* msg->sol if it could parse some lines, and returns zero. If a parse error
|
||||||
|
* is encountered, the function returns < 0 and does not change anything except
|
||||||
|
* maybe msg->sol. Note that the message must already be in HTTP_MSG_TRAILERS
|
||||||
|
* state before calling this function, which implies that all non-trailers data
|
||||||
|
* have already been scheduled for forwarding, and that msg->next exactly
|
||||||
|
* matches the length of trailers already parsed and not forwarded. It is also
|
||||||
|
* important to note that this function is designed to be able to parse wrapped
|
||||||
|
* headers at end of buffer.
|
||||||
|
*/
|
||||||
|
int http_forward_trailers(struct http_msg *msg)
|
||||||
|
{
|
||||||
|
const struct buffer *buf = msg->chn->buf;
|
||||||
|
|
||||||
|
/* we have msg->next which points to next line. Look for CRLF. But
|
||||||
|
* first, we reset msg->sol */
|
||||||
|
msg->sol = 0;
|
||||||
|
while (1) {
|
||||||
|
const char *p1 = NULL, *p2 = NULL;
|
||||||
|
const char *start = b_ptr(buf, msg->next + msg->sol);
|
||||||
|
const char *stop = bi_end(buf);
|
||||||
|
const char *ptr = start;
|
||||||
|
int bytes = 0;
|
||||||
|
|
||||||
|
/* scan current line and stop at LF or CRLF */
|
||||||
|
while (1) {
|
||||||
|
if (ptr == stop)
|
||||||
|
return 0;
|
||||||
|
|
||||||
|
if (*ptr == '\n') {
|
||||||
|
if (!p1)
|
||||||
|
p1 = ptr;
|
||||||
|
p2 = ptr;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (*ptr == '\r') {
|
||||||
|
if (p1) {
|
||||||
|
msg->err_pos = buffer_count(buf, buf->p, ptr);
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
p1 = ptr;
|
||||||
|
}
|
||||||
|
|
||||||
|
ptr++;
|
||||||
|
if (ptr >= buf->data + buf->size)
|
||||||
|
ptr = buf->data;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* after LF; point to beginning of next line */
|
||||||
|
p2++;
|
||||||
|
if (p2 >= buf->data + buf->size)
|
||||||
|
p2 = buf->data;
|
||||||
|
|
||||||
|
bytes = p2 - start;
|
||||||
|
if (bytes < 0)
|
||||||
|
bytes += buf->size;
|
||||||
|
msg->sol += bytes;
|
||||||
|
|
||||||
|
/* LF/CRLF at beginning of line => end of trailers at p2.
|
||||||
|
* Everything was scheduled for forwarding, there's nothing left
|
||||||
|
* from this message. */
|
||||||
|
if (p1 == start)
|
||||||
|
return 1;
|
||||||
|
|
||||||
|
/* OK, next line then */
|
||||||
|
}
|
||||||
|
}
|
||||||
|
213
src/proto_http.c
213
src/proto_http.c
@ -2108,219 +2108,6 @@ void http_change_connection_header(struct http_txn *txn, struct http_msg *msg, i
|
|||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Parse the chunk size at msg->next. Once done, caller should adjust ->next to
|
|
||||||
* point to the first byte of data after the chunk size, so that we know we can
|
|
||||||
* forward exactly msg->next bytes. msg->sol contains the exact number of bytes
|
|
||||||
* forming the chunk size. That way it is always possible to differentiate
|
|
||||||
* between the start of the body and the start of the data. Return the number
|
|
||||||
* of byte parsed on success, 0 when some data is missing, <0 on error. Note:
|
|
||||||
* this function is designed to parse wrapped CRLF at the end of the buffer.
|
|
||||||
*/
|
|
||||||
static inline int http_parse_chunk_size(struct http_msg *msg)
|
|
||||||
{
|
|
||||||
const struct buffer *buf = msg->chn->buf;
|
|
||||||
const char *ptr = b_ptr(buf, msg->next);
|
|
||||||
const char *ptr_old = ptr;
|
|
||||||
const char *end = buf->data + buf->size;
|
|
||||||
const char *stop = bi_end(buf);
|
|
||||||
unsigned int chunk = 0;
|
|
||||||
|
|
||||||
/* The chunk size is in the following form, though we are only
|
|
||||||
* interested in the size and CRLF :
|
|
||||||
* 1*HEXDIGIT *WSP *[ ';' extensions ] CRLF
|
|
||||||
*/
|
|
||||||
while (1) {
|
|
||||||
int c;
|
|
||||||
if (ptr == stop)
|
|
||||||
return 0;
|
|
||||||
c = hex2i(*ptr);
|
|
||||||
if (c < 0) /* not a hex digit anymore */
|
|
||||||
break;
|
|
||||||
if (unlikely(++ptr >= end))
|
|
||||||
ptr = buf->data;
|
|
||||||
if (chunk & 0xF8000000) /* integer overflow will occur if result >= 2GB */
|
|
||||||
goto error;
|
|
||||||
chunk = (chunk << 4) + c;
|
|
||||||
}
|
|
||||||
|
|
||||||
/* empty size not allowed */
|
|
||||||
if (unlikely(ptr == ptr_old))
|
|
||||||
goto error;
|
|
||||||
|
|
||||||
while (HTTP_IS_SPHT(*ptr)) {
|
|
||||||
if (++ptr >= end)
|
|
||||||
ptr = buf->data;
|
|
||||||
if (unlikely(ptr == stop))
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
/* Up to there, we know that at least one byte is present at *ptr. Check
|
|
||||||
* for the end of chunk size.
|
|
||||||
*/
|
|
||||||
while (1) {
|
|
||||||
if (likely(HTTP_IS_CRLF(*ptr))) {
|
|
||||||
/* we now have a CR or an LF at ptr */
|
|
||||||
if (likely(*ptr == '\r')) {
|
|
||||||
if (++ptr >= end)
|
|
||||||
ptr = buf->data;
|
|
||||||
if (ptr == stop)
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (*ptr != '\n')
|
|
||||||
goto error;
|
|
||||||
if (++ptr >= end)
|
|
||||||
ptr = buf->data;
|
|
||||||
/* done */
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
else if (*ptr == ';') {
|
|
||||||
/* chunk extension, ends at next CRLF */
|
|
||||||
if (++ptr >= end)
|
|
||||||
ptr = buf->data;
|
|
||||||
if (ptr == stop)
|
|
||||||
return 0;
|
|
||||||
|
|
||||||
while (!HTTP_IS_CRLF(*ptr)) {
|
|
||||||
if (++ptr >= end)
|
|
||||||
ptr = buf->data;
|
|
||||||
if (ptr == stop)
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
/* we have a CRLF now, loop above */
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
else
|
|
||||||
goto error;
|
|
||||||
}
|
|
||||||
|
|
||||||
/* OK we found our CRLF and now <ptr> points to the next byte, which may
|
|
||||||
* or may not be present. We save the number of bytes parsed into
|
|
||||||
* msg->sol.
|
|
||||||
*/
|
|
||||||
msg->sol = ptr - ptr_old;
|
|
||||||
if (unlikely(ptr < ptr_old))
|
|
||||||
msg->sol += buf->size;
|
|
||||||
msg->chunk_len = chunk;
|
|
||||||
msg->body_len += chunk;
|
|
||||||
return msg->sol;
|
|
||||||
error:
|
|
||||||
msg->err_pos = buffer_count(buf, buf->p, ptr);
|
|
||||||
return -1;
|
|
||||||
}
|
|
||||||
|
|
||||||
/* This function skips trailers in the buffer associated with HTTP message
|
|
||||||
* <msg>. The first visited position is msg->next. If the end of the trailers is
|
|
||||||
* found, the function returns >0. So, the caller can automatically schedul it
|
|
||||||
* to be forwarded, and switch msg->msg_state to HTTP_MSG_DONE. If not enough
|
|
||||||
* data are available, the function does not change anything except maybe
|
|
||||||
* msg->sol if it could parse some lines, and returns zero. If a parse error
|
|
||||||
* is encountered, the function returns < 0 and does not change anything except
|
|
||||||
* maybe msg->sol. Note that the message must already be in HTTP_MSG_TRAILERS
|
|
||||||
* state before calling this function, which implies that all non-trailers data
|
|
||||||
* have already been scheduled for forwarding, and that msg->next exactly
|
|
||||||
* matches the length of trailers already parsed and not forwarded. It is also
|
|
||||||
* important to note that this function is designed to be able to parse wrapped
|
|
||||||
* headers at end of buffer.
|
|
||||||
*/
|
|
||||||
static int http_forward_trailers(struct http_msg *msg)
|
|
||||||
{
|
|
||||||
const struct buffer *buf = msg->chn->buf;
|
|
||||||
|
|
||||||
/* we have msg->next which points to next line. Look for CRLF. But
|
|
||||||
* first, we reset msg->sol */
|
|
||||||
msg->sol = 0;
|
|
||||||
while (1) {
|
|
||||||
const char *p1 = NULL, *p2 = NULL;
|
|
||||||
const char *start = b_ptr(buf, msg->next + msg->sol);
|
|
||||||
const char *stop = bi_end(buf);
|
|
||||||
const char *ptr = start;
|
|
||||||
int bytes = 0;
|
|
||||||
|
|
||||||
/* scan current line and stop at LF or CRLF */
|
|
||||||
while (1) {
|
|
||||||
if (ptr == stop)
|
|
||||||
return 0;
|
|
||||||
|
|
||||||
if (*ptr == '\n') {
|
|
||||||
if (!p1)
|
|
||||||
p1 = ptr;
|
|
||||||
p2 = ptr;
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (*ptr == '\r') {
|
|
||||||
if (p1) {
|
|
||||||
msg->err_pos = buffer_count(buf, buf->p, ptr);
|
|
||||||
return -1;
|
|
||||||
}
|
|
||||||
p1 = ptr;
|
|
||||||
}
|
|
||||||
|
|
||||||
ptr++;
|
|
||||||
if (ptr >= buf->data + buf->size)
|
|
||||||
ptr = buf->data;
|
|
||||||
}
|
|
||||||
|
|
||||||
/* after LF; point to beginning of next line */
|
|
||||||
p2++;
|
|
||||||
if (p2 >= buf->data + buf->size)
|
|
||||||
p2 = buf->data;
|
|
||||||
|
|
||||||
bytes = p2 - start;
|
|
||||||
if (bytes < 0)
|
|
||||||
bytes += buf->size;
|
|
||||||
msg->sol += bytes;
|
|
||||||
|
|
||||||
/* LF/CRLF at beginning of line => end of trailers at p2.
|
|
||||||
* Everything was scheduled for forwarding, there's nothing left
|
|
||||||
* from this message. */
|
|
||||||
if (p1 == start)
|
|
||||||
return 1;
|
|
||||||
|
|
||||||
/* OK, next line then */
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/* This function may be called only in HTTP_MSG_CHUNK_CRLF. It reads the CRLF or
|
|
||||||
* a possible LF alone at the end of a chunk. The caller should adjust msg->next
|
|
||||||
* in order to include this part into the next forwarding phase. Note that the
|
|
||||||
* caller must ensure that ->p points to the first byte to parse. It returns
|
|
||||||
* the number of bytes parsed on success, so the caller can set msg_state to
|
|
||||||
* HTTP_MSG_CHUNK_SIZE. If not enough data are available, the function does not
|
|
||||||
* change anything and returns zero. If a parse error is encountered, the
|
|
||||||
* function returns < 0. Note: this function is designed to parse wrapped CRLF
|
|
||||||
* at the end of the buffer.
|
|
||||||
*/
|
|
||||||
static inline int http_skip_chunk_crlf(struct http_msg *msg)
|
|
||||||
{
|
|
||||||
const struct buffer *buf = msg->chn->buf;
|
|
||||||
const char *ptr;
|
|
||||||
int bytes;
|
|
||||||
|
|
||||||
/* NB: we'll check data availabilty at the end. It's not a
|
|
||||||
* problem because whatever we match first will be checked
|
|
||||||
* against the correct length.
|
|
||||||
*/
|
|
||||||
bytes = 1;
|
|
||||||
ptr = b_ptr(buf, msg->next);
|
|
||||||
if (*ptr == '\r') {
|
|
||||||
bytes++;
|
|
||||||
ptr++;
|
|
||||||
if (ptr >= buf->data + buf->size)
|
|
||||||
ptr = buf->data;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (msg->next + bytes > buf->i)
|
|
||||||
return 0;
|
|
||||||
|
|
||||||
if (*ptr != '\n') {
|
|
||||||
msg->err_pos = buffer_count(buf, buf->p, ptr);
|
|
||||||
return -1;
|
|
||||||
}
|
|
||||||
return bytes;
|
|
||||||
}
|
|
||||||
|
|
||||||
/* Parses a qvalue and returns it multipled by 1000, from 0 to 1000. If the
|
/* Parses a qvalue and returns it multipled by 1000, from 0 to 1000. If the
|
||||||
* value is larger than 1000, it is bound to 1000. The parser consumes up to
|
* value is larger than 1000, it is bound to 1000. The parser consumes up to
|
||||||
* 1 digit, one dot and 3 digits and stops on the first invalid character.
|
* 1 digit, one dot and 3 digits and stops on the first invalid character.
|
||||||
|
Loading…
Reference in New Issue
Block a user