diff --git a/include/proto/h1.h b/include/proto/h1.h index b29d76b7a..68ba106f2 100644 --- a/include/proto/h1.h +++ b/include/proto/h1.h @@ -29,16 +29,7 @@ #include #include #include -#include -const char *http_parse_reqline(struct http_msg *msg, - enum h1_state state, const char *ptr, const char *end, - unsigned int *ret_ptr, enum h1_state *ret_state); -const char *http_parse_stsline(struct http_msg *msg, - enum h1_state state, const char *ptr, const char *end, - unsigned int *ret_ptr, enum h1_state *ret_state); -void http_msg_analyzer(struct http_msg *msg, struct hdr_idx *idx); -int http_forward_trailers(struct http_msg *msg); int h1_headers_to_hdr_list(char *start, const char *stop, struct http_hdr *hdr, unsigned int hdr_num, struct h1m *h1m, union h1_sl *slp); @@ -48,36 +39,6 @@ int h1_parse_cont_len_header(struct h1m *h1m, struct ist *value); void h1_parse_xfer_enc_header(struct h1m *h1m, struct ist value); void h1_parse_connection_header(struct h1m *h1m, struct ist value); -/* Macros used in the HTTP/1 parser, to check for the expected presence of - * certain bytes (ef: LF) or to skip to next byte and yield in case of failure. - */ - - -/* Expects to find an LF at . If not, set to and jump to - * . - */ -#define EXPECT_LF_HERE(ptr, bad, state, where) \ - do { \ - if (unlikely(*(ptr) != '\n')) { \ - state = (where); \ - goto bad; \ - } \ - } while (0) - -/* Increments pointer , continues to label if it's still below - * pointer , or goes to and sets to if the end - * of buffer was reached. - */ -#define EAT_AND_JUMP_OR_RETURN(ptr, end, more, stop, state, where) \ - do { \ - if (likely(++(ptr) < (end))) \ - goto more; \ - else { \ - state = (where); \ - goto stop; \ - } \ - } while (0) - /* for debugging, reports the HTTP/1 message state name (legacy version) */ static inline const char *h1_msg_state_str(enum h1_state msg_state) { diff --git a/include/proto/proto_http.h b/include/proto/proto_http.h index c32a555c2..329e64154 100644 --- a/include/proto/proto_http.h +++ b/include/proto/proto_http.h @@ -51,6 +51,13 @@ int http_upgrade_v09_to_v10(struct http_txn *txn); void http_msg_analyzer(struct http_msg *msg, struct hdr_idx *idx); void http_txn_reset_req(struct http_txn *txn); void http_txn_reset_res(struct http_txn *txn); +const char *http_parse_reqline(struct http_msg *msg, + enum h1_state state, const char *ptr, const char *end, + unsigned int *ret_ptr, enum h1_state *ret_state); +const char *http_parse_stsline(struct http_msg *msg, + enum h1_state state, const char *ptr, const char *end, + unsigned int *ret_ptr, enum h1_state *ret_state); +int http_forward_trailers(struct http_msg *msg); int http_legacy_replace_header(struct hdr_idx *idx, struct http_msg *msg, const char *name, unsigned int name_len, const char *str, struct my_regex *re, diff --git a/src/h1.c b/src/h1.c index c61737221..c2195bc54 100644 --- a/src/h1.c +++ b/src/h1.c @@ -18,648 +18,6 @@ #include #include -/* - * This function parses a status line between and , starting with - * parser state . Only states HTTP_MSG_RPVER, HTTP_MSG_RPVER_SP, - * HTTP_MSG_RPCODE, HTTP_MSG_RPCODE_SP and HTTP_MSG_RPREASON are handled. Others - * will give undefined results. - * Note that it is upon the caller's responsibility to ensure that ptr < end, - * and that msg->sol points to the beginning of the response. - * If a complete line is found (which implies that at least one CR or LF is - * found before , the updated is returned, otherwise NULL is - * returned indicating an incomplete line (which does not mean that parts have - * not been updated). In the incomplete case, if or are - * non-NULL, they are fed with the new and values to be passed - * upon next call. - * - * This function was intentionally designed to be called from - * http_msg_analyzer() with the lowest overhead. It should integrate perfectly - * within its state machine and use the same macros, hence the need for same - * labels and variable names. Note that msg->sol is left unchanged. - */ -const char *http_parse_stsline(struct http_msg *msg, - enum h1_state state, const char *ptr, const char *end, - unsigned int *ret_ptr, enum h1_state *ret_state) -{ - const char *msg_start = ci_head(msg->chn); - - switch (state) { - case HTTP_MSG_RPVER: - http_msg_rpver: - if (likely(HTTP_IS_VER_TOKEN(*ptr))) - EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rpver, http_msg_ood, state, HTTP_MSG_RPVER); - - if (likely(HTTP_IS_SPHT(*ptr))) { - msg->sl.st.v_l = ptr - msg_start; - EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rpver_sp, http_msg_ood, state, HTTP_MSG_RPVER_SP); - } - msg->err_state = HTTP_MSG_RPVER; - state = HTTP_MSG_ERROR; - break; - - case HTTP_MSG_RPVER_SP: - http_msg_rpver_sp: - if (likely(!HTTP_IS_LWS(*ptr))) { - msg->sl.st.c = ptr - msg_start; - goto http_msg_rpcode; - } - if (likely(HTTP_IS_SPHT(*ptr))) - EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rpver_sp, http_msg_ood, state, HTTP_MSG_RPVER_SP); - /* so it's a CR/LF, this is invalid */ - msg->err_state = HTTP_MSG_RPVER_SP; - state = HTTP_MSG_ERROR; - break; - - case HTTP_MSG_RPCODE: - http_msg_rpcode: - if (likely(!HTTP_IS_LWS(*ptr))) - EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rpcode, http_msg_ood, state, HTTP_MSG_RPCODE); - - if (likely(HTTP_IS_SPHT(*ptr))) { - msg->sl.st.c_l = ptr - msg_start - msg->sl.st.c; - EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rpcode_sp, http_msg_ood, state, HTTP_MSG_RPCODE_SP); - } - - /* so it's a CR/LF, so there is no reason phrase */ - msg->sl.st.c_l = ptr - msg_start - msg->sl.st.c; - http_msg_rsp_reason: - /* FIXME: should we support HTTP responses without any reason phrase ? */ - msg->sl.st.r = ptr - msg_start; - msg->sl.st.r_l = 0; - goto http_msg_rpline_eol; - - case HTTP_MSG_RPCODE_SP: - http_msg_rpcode_sp: - if (likely(!HTTP_IS_LWS(*ptr))) { - msg->sl.st.r = ptr - msg_start; - goto http_msg_rpreason; - } - if (likely(HTTP_IS_SPHT(*ptr))) - EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rpcode_sp, http_msg_ood, state, HTTP_MSG_RPCODE_SP); - /* so it's a CR/LF, so there is no reason phrase */ - goto http_msg_rsp_reason; - - case HTTP_MSG_RPREASON: - http_msg_rpreason: - if (likely(!HTTP_IS_CRLF(*ptr))) - EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rpreason, http_msg_ood, state, HTTP_MSG_RPREASON); - msg->sl.st.r_l = ptr - msg_start - msg->sl.st.r; - http_msg_rpline_eol: - /* We have seen the end of line. Note that we do not - * necessarily have the \n yet, but at least we know that we - * have EITHER \r OR \n, otherwise the response would not be - * complete. We can then record the response length and return - * to the caller which will be able to register it. - */ - msg->sl.st.l = ptr - msg_start - msg->sol; - return ptr; - - default: -#ifdef DEBUG_FULL - fprintf(stderr, "FIXME !!!! impossible state at %s:%d = %d\n", __FILE__, __LINE__, state); - exit(1); -#endif - ; - } - - http_msg_ood: - /* out of valid data */ - if (ret_state) - *ret_state = state; - if (ret_ptr) - *ret_ptr = ptr - msg_start; - return NULL; -} - -/* - * This function parses a request line between and , starting with - * parser state . Only states HTTP_MSG_RQMETH, HTTP_MSG_RQMETH_SP, - * HTTP_MSG_RQURI, HTTP_MSG_RQURI_SP and HTTP_MSG_RQVER are handled. Others - * will give undefined results. - * Note that it is upon the caller's responsibility to ensure that ptr < end, - * and that msg->sol points to the beginning of the request. - * If a complete line is found (which implies that at least one CR or LF is - * found before , the updated is returned, otherwise NULL is - * returned indicating an incomplete line (which does not mean that parts have - * not been updated). In the incomplete case, if or are - * non-NULL, they are fed with the new and values to be passed - * upon next call. - * - * This function was intentionally designed to be called from - * http_msg_analyzer() with the lowest overhead. It should integrate perfectly - * within its state machine and use the same macros, hence the need for same - * labels and variable names. Note that msg->sol is left unchanged. - */ -const char *http_parse_reqline(struct http_msg *msg, - enum h1_state state, const char *ptr, const char *end, - unsigned int *ret_ptr, enum h1_state *ret_state) -{ - const char *msg_start = ci_head(msg->chn); - - switch (state) { - case HTTP_MSG_RQMETH: - http_msg_rqmeth: - if (likely(HTTP_IS_TOKEN(*ptr))) - EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rqmeth, http_msg_ood, state, HTTP_MSG_RQMETH); - - if (likely(HTTP_IS_SPHT(*ptr))) { - msg->sl.rq.m_l = ptr - msg_start; - EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rqmeth_sp, http_msg_ood, state, HTTP_MSG_RQMETH_SP); - } - - if (likely(HTTP_IS_CRLF(*ptr))) { - /* HTTP 0.9 request */ - msg->sl.rq.m_l = ptr - msg_start; - http_msg_req09_uri: - msg->sl.rq.u = ptr - msg_start; - http_msg_req09_uri_e: - msg->sl.rq.u_l = ptr - msg_start - msg->sl.rq.u; - http_msg_req09_ver: - msg->sl.rq.v = ptr - msg_start; - msg->sl.rq.v_l = 0; - goto http_msg_rqline_eol; - } - msg->err_state = HTTP_MSG_RQMETH; - state = HTTP_MSG_ERROR; - break; - - case HTTP_MSG_RQMETH_SP: - http_msg_rqmeth_sp: - if (likely(!HTTP_IS_LWS(*ptr))) { - msg->sl.rq.u = ptr - msg_start; - goto http_msg_rquri; - } - if (likely(HTTP_IS_SPHT(*ptr))) - EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rqmeth_sp, http_msg_ood, state, HTTP_MSG_RQMETH_SP); - /* so it's a CR/LF, meaning an HTTP 0.9 request */ - goto http_msg_req09_uri; - - case HTTP_MSG_RQURI: - http_msg_rquri: -#if defined(__x86_64__) || \ - defined(__i386__) || defined(__i486__) || defined(__i586__) || defined(__i686__) || \ - defined(__ARM_ARCH_7A__) - /* speedup: skip bytes not between 0x21 and 0x7e inclusive */ - while (ptr <= end - sizeof(int)) { - int x = *(int *)ptr - 0x21212121; - if (x & 0x80808080) - break; - - x -= 0x5e5e5e5e; - if (!(x & 0x80808080)) - break; - - ptr += sizeof(int); - } -#endif - if (ptr >= end) { - state = HTTP_MSG_RQURI; - goto http_msg_ood; - } - http_msg_rquri2: - if (likely((unsigned char)(*ptr - 33) <= 93)) /* 33 to 126 included */ - EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rquri2, http_msg_ood, state, HTTP_MSG_RQURI); - - if (likely(HTTP_IS_SPHT(*ptr))) { - msg->sl.rq.u_l = ptr - msg_start - msg->sl.rq.u; - EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rquri_sp, http_msg_ood, state, HTTP_MSG_RQURI_SP); - } - - if (likely((unsigned char)*ptr >= 128)) { - /* non-ASCII chars are forbidden unless option - * accept-invalid-http-request is enabled in the frontend. - * In any case, we capture the faulty char. - */ - if (msg->err_pos < -1) - goto invalid_char; - if (msg->err_pos == -1) - msg->err_pos = ptr - msg_start; - EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rquri, http_msg_ood, state, HTTP_MSG_RQURI); - } - - if (likely(HTTP_IS_CRLF(*ptr))) { - /* so it's a CR/LF, meaning an HTTP 0.9 request */ - goto http_msg_req09_uri_e; - } - - /* OK forbidden chars, 0..31 or 127 */ - invalid_char: - msg->err_pos = ptr - msg_start; - msg->err_state = HTTP_MSG_RQURI; - state = HTTP_MSG_ERROR; - break; - - case HTTP_MSG_RQURI_SP: - http_msg_rquri_sp: - if (likely(!HTTP_IS_LWS(*ptr))) { - msg->sl.rq.v = ptr - msg_start; - goto http_msg_rqver; - } - if (likely(HTTP_IS_SPHT(*ptr))) - EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rquri_sp, http_msg_ood, state, HTTP_MSG_RQURI_SP); - /* so it's a CR/LF, meaning an HTTP 0.9 request */ - goto http_msg_req09_ver; - - case HTTP_MSG_RQVER: - http_msg_rqver: - if (likely(HTTP_IS_VER_TOKEN(*ptr))) - EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rqver, http_msg_ood, state, HTTP_MSG_RQVER); - - if (likely(HTTP_IS_CRLF(*ptr))) { - msg->sl.rq.v_l = ptr - msg_start - msg->sl.rq.v; - http_msg_rqline_eol: - /* We have seen the end of line. Note that we do not - * necessarily have the \n yet, but at least we know that we - * have EITHER \r OR \n, otherwise the request would not be - * complete. We can then record the request length and return - * to the caller which will be able to register it. - */ - msg->sl.rq.l = ptr - msg_start - msg->sol; - return ptr; - } - - /* neither an HTTP_VER token nor a CRLF */ - msg->err_state = HTTP_MSG_RQVER; - state = HTTP_MSG_ERROR; - break; - - default: -#ifdef DEBUG_FULL - fprintf(stderr, "FIXME !!!! impossible state at %s:%d = %d\n", __FILE__, __LINE__, state); - exit(1); -#endif - ; - } - - http_msg_ood: - /* out of valid data */ - if (ret_state) - *ret_state = state; - if (ret_ptr) - *ret_ptr = ptr - msg_start; - return NULL; -} - -/* - * This function parses an HTTP message, either a request or a response, - * depending on the initial msg->msg_state. The caller is responsible for - * ensuring that the message does not wrap. The function can be preempted - * everywhere when data are missing and recalled at the exact same location - * with no information loss. The message may even be realigned between two - * calls. The header index is re-initialized when switching from - * MSG_R[PQ]BEFORE to MSG_RPVER|MSG_RQMETH. It modifies msg->sol among other - * fields. Note that msg->sol will be initialized after completing the first - * state, so that none of the msg pointers has to be initialized prior to the - * first call. - */ -void http_msg_analyzer(struct http_msg *msg, struct hdr_idx *idx) -{ - enum h1_state state; /* updated only when leaving the FSM */ - register const char *ptr, *end; /* request pointers, to avoid dereferences */ - struct buffer *buf = &msg->chn->buf; - char *input = b_head(buf); - - state = msg->msg_state; - ptr = input + msg->next; - end = b_stop(buf); - - if (unlikely(ptr >= end)) - goto http_msg_ood; - - switch (state) { - /* - * First, states that are specific to the response only. - * We check them first so that request and headers are - * closer to each other (accessed more often). - */ - case HTTP_MSG_RPBEFORE: - http_msg_rpbefore: - if (likely(HTTP_IS_TOKEN(*ptr))) { - /* we have a start of message, but we have to check - * first if we need to remove some CRLF. We can only - * do this when o=0. - */ - if (unlikely(ptr != input)) { - if (co_data(msg->chn)) - goto http_msg_ood; - /* Remove empty leading lines, as recommended by RFC2616. */ - b_del(buf, ptr - input); - input = b_head(buf); - } - msg->sol = 0; - msg->sl.st.l = 0; /* used in debug mode */ - hdr_idx_init(idx); - state = HTTP_MSG_RPVER; - goto http_msg_rpver; - } - - if (unlikely(!HTTP_IS_CRLF(*ptr))) { - state = HTTP_MSG_RPBEFORE; - goto http_msg_invalid; - } - - if (unlikely(*ptr == '\n')) - EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rpbefore, http_msg_ood, state, HTTP_MSG_RPBEFORE); - EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rpbefore_cr, http_msg_ood, state, HTTP_MSG_RPBEFORE_CR); - /* stop here */ - - case HTTP_MSG_RPBEFORE_CR: - http_msg_rpbefore_cr: - EXPECT_LF_HERE(ptr, http_msg_invalid, state, HTTP_MSG_RPBEFORE_CR); - EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rpbefore, http_msg_ood, state, HTTP_MSG_RPBEFORE); - /* stop here */ - - case HTTP_MSG_RPVER: - http_msg_rpver: - case HTTP_MSG_RPVER_SP: - case HTTP_MSG_RPCODE: - case HTTP_MSG_RPCODE_SP: - case HTTP_MSG_RPREASON: - ptr = (char *)http_parse_stsline(msg, - state, ptr, end, - &msg->next, &msg->msg_state); - if (unlikely(!ptr)) - return; - - /* we have a full response and we know that we have either a CR - * or an LF at . - */ - hdr_idx_set_start(idx, msg->sl.st.l, *ptr == '\r'); - - msg->sol = ptr - input; - if (likely(*ptr == '\r')) - EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rpline_end, http_msg_ood, state, HTTP_MSG_RPLINE_END); - goto http_msg_rpline_end; - - case HTTP_MSG_RPLINE_END: - http_msg_rpline_end: - /* msg->sol must point to the first of CR or LF. */ - EXPECT_LF_HERE(ptr, http_msg_invalid, state, HTTP_MSG_RPLINE_END); - EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_hdr_first, http_msg_ood, state, HTTP_MSG_HDR_FIRST); - /* stop here */ - - /* - * Second, states that are specific to the request only - */ - case HTTP_MSG_RQBEFORE: - http_msg_rqbefore: - if (likely(HTTP_IS_TOKEN(*ptr))) { - /* we have a start of message, but we have to check - * first if we need to remove some CRLF. We can only - * do this when o=0. - */ - if (likely(ptr != input)) { - if (co_data(msg->chn)) - goto http_msg_ood; - /* Remove empty leading lines, as recommended by RFC2616. */ - b_del(buf, ptr - input); - input = b_head(buf); - } - msg->sol = 0; - msg->sl.rq.l = 0; /* used in debug mode */ - state = HTTP_MSG_RQMETH; - goto http_msg_rqmeth; - } - - if (unlikely(!HTTP_IS_CRLF(*ptr))) { - state = HTTP_MSG_RQBEFORE; - goto http_msg_invalid; - } - - if (unlikely(*ptr == '\n')) - EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rqbefore, http_msg_ood, state, HTTP_MSG_RQBEFORE); - EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rqbefore_cr, http_msg_ood, state, HTTP_MSG_RQBEFORE_CR); - /* stop here */ - - case HTTP_MSG_RQBEFORE_CR: - http_msg_rqbefore_cr: - EXPECT_LF_HERE(ptr, http_msg_invalid, state, HTTP_MSG_RQBEFORE_CR); - EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rqbefore, http_msg_ood, state, HTTP_MSG_RQBEFORE); - /* stop here */ - - case HTTP_MSG_RQMETH: - http_msg_rqmeth: - case HTTP_MSG_RQMETH_SP: - case HTTP_MSG_RQURI: - case HTTP_MSG_RQURI_SP: - case HTTP_MSG_RQVER: - ptr = (char *)http_parse_reqline(msg, - state, ptr, end, - &msg->next, &msg->msg_state); - if (unlikely(!ptr)) - return; - - /* we have a full request and we know that we have either a CR - * or an LF at . - */ - hdr_idx_set_start(idx, msg->sl.rq.l, *ptr == '\r'); - - msg->sol = ptr - input; - if (likely(*ptr == '\r')) - EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rqline_end, http_msg_ood, state, HTTP_MSG_RQLINE_END); - goto http_msg_rqline_end; - - case HTTP_MSG_RQLINE_END: - http_msg_rqline_end: - /* check for HTTP/0.9 request : no version information available. - * msg->sol must point to the first of CR or LF. - */ - if (unlikely(msg->sl.rq.v_l == 0)) - goto http_msg_last_lf; - - EXPECT_LF_HERE(ptr, http_msg_invalid, state, HTTP_MSG_RQLINE_END); - EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_hdr_first, http_msg_ood, state, HTTP_MSG_HDR_FIRST); - /* stop here */ - - /* - * Common states below - */ - case HTTP_MSG_HDR_FIRST: - http_msg_hdr_first: - msg->sol = ptr - input; - if (likely(!HTTP_IS_CRLF(*ptr))) { - goto http_msg_hdr_name; - } - - if (likely(*ptr == '\r')) - EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_last_lf, http_msg_ood, state, HTTP_MSG_LAST_LF); - goto http_msg_last_lf; - - case HTTP_MSG_HDR_NAME: - http_msg_hdr_name: - /* assumes msg->sol points to the first char */ - if (likely(HTTP_IS_TOKEN(*ptr))) - EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_hdr_name, http_msg_ood, state, HTTP_MSG_HDR_NAME); - - if (likely(*ptr == ':')) - EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_hdr_l1_sp, http_msg_ood, state, HTTP_MSG_HDR_L1_SP); - - if (likely(msg->err_pos < -1) || *ptr == '\n') { - state = HTTP_MSG_HDR_NAME; - goto http_msg_invalid; - } - - if (msg->err_pos == -1) /* capture error pointer */ - msg->err_pos = ptr - input; /* >= 0 now */ - - /* and we still accept this non-token character */ - EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_hdr_name, http_msg_ood, state, HTTP_MSG_HDR_NAME); - - case HTTP_MSG_HDR_L1_SP: - http_msg_hdr_l1_sp: - /* assumes msg->sol points to the first char */ - if (likely(HTTP_IS_SPHT(*ptr))) - EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_hdr_l1_sp, http_msg_ood, state, HTTP_MSG_HDR_L1_SP); - - /* header value can be basically anything except CR/LF */ - msg->sov = ptr - input; - - if (likely(!HTTP_IS_CRLF(*ptr))) { - goto http_msg_hdr_val; - } - - if (likely(*ptr == '\r')) - EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_hdr_l1_lf, http_msg_ood, state, HTTP_MSG_HDR_L1_LF); - goto http_msg_hdr_l1_lf; - - case HTTP_MSG_HDR_L1_LF: - http_msg_hdr_l1_lf: - EXPECT_LF_HERE(ptr, http_msg_invalid, state, HTTP_MSG_HDR_L1_LF); - EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_hdr_l1_lws, http_msg_ood, state, HTTP_MSG_HDR_L1_LWS); - - case HTTP_MSG_HDR_L1_LWS: - http_msg_hdr_l1_lws: - if (likely(HTTP_IS_SPHT(*ptr))) { - /* replace HT,CR,LF with spaces */ - for (; input + msg->sov < ptr; msg->sov++) - input[msg->sov] = ' '; - goto http_msg_hdr_l1_sp; - } - /* we had a header consisting only in spaces ! */ - msg->eol = msg->sov; - goto http_msg_complete_header; - - case HTTP_MSG_HDR_VAL: - http_msg_hdr_val: - /* assumes msg->sol points to the first char, and msg->sov - * points to the first character of the value. - */ - - /* speedup: we'll skip packs of 4 or 8 bytes not containing bytes 0x0D - * and lower. In fact since most of the time is spent in the loop, we - * also remove the sign bit test so that bytes 0x8e..0x0d break the - * loop, but we don't care since they're very rare in header values. - */ -#if defined(__x86_64__) - while (ptr <= end - sizeof(long)) { - if ((*(long *)ptr - 0x0e0e0e0e0e0e0e0eULL) & 0x8080808080808080ULL) - goto http_msg_hdr_val2; - ptr += sizeof(long); - } -#endif -#if defined(__x86_64__) || \ - defined(__i386__) || defined(__i486__) || defined(__i586__) || defined(__i686__) || \ - defined(__ARM_ARCH_7A__) - while (ptr <= end - sizeof(int)) { - if ((*(int*)ptr - 0x0e0e0e0e) & 0x80808080) - goto http_msg_hdr_val2; - ptr += sizeof(int); - } -#endif - if (ptr >= end) { - state = HTTP_MSG_HDR_VAL; - goto http_msg_ood; - } - http_msg_hdr_val2: - if (likely(!HTTP_IS_CRLF(*ptr))) - EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_hdr_val2, http_msg_ood, state, HTTP_MSG_HDR_VAL); - - msg->eol = ptr - input; - /* Note: we could also copy eol into ->eoh so that we have the - * real header end in case it ends with lots of LWS, but is this - * really needed ? - */ - if (likely(*ptr == '\r')) - EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_hdr_l2_lf, http_msg_ood, state, HTTP_MSG_HDR_L2_LF); - goto http_msg_hdr_l2_lf; - - case HTTP_MSG_HDR_L2_LF: - http_msg_hdr_l2_lf: - EXPECT_LF_HERE(ptr, http_msg_invalid, state, HTTP_MSG_HDR_L2_LF); - EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_hdr_l2_lws, http_msg_ood, state, HTTP_MSG_HDR_L2_LWS); - - case HTTP_MSG_HDR_L2_LWS: - http_msg_hdr_l2_lws: - if (unlikely(HTTP_IS_SPHT(*ptr))) { - /* LWS: replace HT,CR,LF with spaces */ - for (; input + msg->eol < ptr; msg->eol++) - input[msg->eol] = ' '; - goto http_msg_hdr_val; - } - http_msg_complete_header: - /* - * It was a new header, so the last one is finished. - * Assumes msg->sol points to the first char, msg->sov points - * to the first character of the value and msg->eol to the - * first CR or LF so we know how the line ends. We insert last - * header into the index. - */ - if (unlikely(hdr_idx_add(msg->eol - msg->sol, input[msg->eol] == '\r', - idx, idx->tail) < 0)) { - state = HTTP_MSG_HDR_L2_LWS; - goto http_msg_invalid; - } - - msg->sol = ptr - input; - if (likely(!HTTP_IS_CRLF(*ptr))) { - goto http_msg_hdr_name; - } - - if (likely(*ptr == '\r')) - EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_last_lf, http_msg_ood, state, HTTP_MSG_LAST_LF); - goto http_msg_last_lf; - - case HTTP_MSG_LAST_LF: - http_msg_last_lf: - /* Assumes msg->sol points to the first of either CR or LF. - * Sets ->sov and ->next to the total header length, ->eoh to - * the last CRLF, and ->eol to the last CRLF length (1 or 2). - */ - EXPECT_LF_HERE(ptr, http_msg_invalid, state, HTTP_MSG_LAST_LF); - ptr++; - msg->sov = msg->next = ptr - input; - msg->eoh = msg->sol; - msg->sol = 0; - msg->eol = msg->sov - msg->eoh; - msg->msg_state = HTTP_MSG_BODY; - return; - - case HTTP_MSG_ERROR: - /* this may only happen if we call http_msg_analyser() twice with an error */ - break; - - default: -#ifdef DEBUG_FULL - fprintf(stderr, "FIXME !!!! impossible state at %s:%d = %d\n", __FILE__, __LINE__, state); - exit(1); -#endif - ; - } - http_msg_ood: - /* out of data */ - msg->msg_state = state; - msg->next = ptr - input; - return; - - http_msg_invalid: - /* invalid message */ - msg->err_state = state; - msg->msg_state = HTTP_MSG_ERROR; - msg->next = ptr - input; - return; -} - - /* Parse the Content-Length header field of an HTTP/1 request. The function * checks all possible occurrences of a comma-delimited value, and verifies * if any of them doesn't match a previous value. It returns <0 if a value @@ -807,6 +165,35 @@ void h1_parse_connection_header(struct h1m *h1m, struct ist value) } } +/* Macros used in the HTTP/1 parser, to check for the expected presence of + * certain bytes (ef: LF) or to skip to next byte and yield in case of failure. + */ + +/* Expects to find an LF at . If not, set to and jump to + * . + */ +#define EXPECT_LF_HERE(ptr, bad, state, where) \ + do { \ + if (unlikely(*(ptr) != '\n')) { \ + state = (where); \ + goto bad; \ + } \ + } while (0) + +/* Increments pointer , continues to label if it's still below + * pointer , or goes to and sets to if the end + * of buffer was reached. + */ +#define EAT_AND_JUMP_OR_RETURN(ptr, end, more, stop, state, where) \ + do { \ + if (likely(++(ptr) < (end))) \ + goto more; \ + else { \ + state = (where); \ + goto stop; \ + } \ + } while (0) + /* This function parses a contiguous HTTP/1 headers block starting at * and ending before , at once, and converts it a list of (name,value) * pairs representing header fields into the array of size , @@ -1570,68 +957,3 @@ int h1_measure_trailers(const struct buffer *buf, unsigned int ofs, unsigned int } return count - ofs; } - -/* This function skips trailers in the buffer associated with HTTP message - * . The first visited position is msg->next. If the end of the trailers is - * found, the function returns >0. So, the caller can automatically schedul it - * to be forwarded, and switch msg->msg_state to HTTP_MSG_DONE. If not enough - * data are available, the function does not change anything except maybe - * msg->sol if it could parse some lines, and returns zero. If a parse error - * is encountered, the function returns < 0 and does not change anything except - * maybe msg->sol. Note that the message must already be in HTTP_MSG_TRAILERS - * state before calling this function, which implies that all non-trailers data - * have already been scheduled for forwarding, and that msg->next exactly - * matches the length of trailers already parsed and not forwarded. It is also - * important to note that this function is designed to be able to parse wrapped - * headers at end of buffer. - */ -int http_forward_trailers(struct http_msg *msg) -{ - const struct buffer *buf = &msg->chn->buf; - const char *parse = ci_head(msg->chn); - const char *stop = b_tail(buf); - - /* we have msg->next which points to next line. Look for CRLF. But - * first, we reset msg->sol */ - msg->sol = 0; - while (1) { - const char *p1 = NULL, *p2 = NULL; - const char *start = c_ptr(msg->chn, msg->next + msg->sol); - const char *ptr = start; - - /* scan current line and stop at LF or CRLF */ - while (1) { - if (ptr == stop) - return 0; - - if (*ptr == '\n') { - if (!p1) - p1 = ptr; - p2 = ptr; - break; - } - - if (*ptr == '\r') { - if (p1) { - msg->err_pos = b_dist(buf, parse, ptr); - return -1; - } - p1 = ptr; - } - - ptr = b_next(buf, ptr); - } - - /* after LF; point to beginning of next line */ - p2 = b_next(buf, p2); - msg->sol += b_dist(buf, start, p2); - - /* LF/CRLF at beginning of line => end of trailers at p2. - * Everything was scheduled for forwarding, there's nothing left - * from this message. */ - if (p1 == start) - return 1; - - /* OK, next line then */ - } -} diff --git a/src/http_fetch.c b/src/http_fetch.c index 995622b06..14b8ba239 100644 --- a/src/http_fetch.c +++ b/src/http_fetch.c @@ -32,6 +32,7 @@ #include #include +#include #include #include #include diff --git a/src/http_msg.c b/src/http_msg.c index fa5606d2c..d422f59a3 100644 --- a/src/http_msg.c +++ b/src/http_msg.c @@ -504,3 +504,737 @@ unsigned int http_get_fhdr(const struct http_msg *msg, const char *hname, int hl return 1; } +/* Macros used in the HTTP/1 parser, to check for the expected presence of + * certain bytes (ef: LF) or to skip to next byte and yield in case of failure. + */ + +/* Expects to find an LF at . If not, set to and jump to + * . + */ +#define EXPECT_LF_HERE(ptr, bad, state, where) \ + do { \ + if (unlikely(*(ptr) != '\n')) { \ + state = (where); \ + goto bad; \ + } \ + } while (0) + +/* Increments pointer , continues to label if it's still below + * pointer , or goes to and sets to if the end + * of buffer was reached. + */ +#define EAT_AND_JUMP_OR_RETURN(ptr, end, more, stop, state, where) \ + do { \ + if (likely(++(ptr) < (end))) \ + goto more; \ + else { \ + state = (where); \ + goto stop; \ + } \ + } while (0) + +/* + * This function parses a status line between and , starting with + * parser state . Only states HTTP_MSG_RPVER, HTTP_MSG_RPVER_SP, + * HTTP_MSG_RPCODE, HTTP_MSG_RPCODE_SP and HTTP_MSG_RPREASON are handled. Others + * will give undefined results. + * Note that it is upon the caller's responsibility to ensure that ptr < end, + * and that msg->sol points to the beginning of the response. + * If a complete line is found (which implies that at least one CR or LF is + * found before , the updated is returned, otherwise NULL is + * returned indicating an incomplete line (which does not mean that parts have + * not been updated). In the incomplete case, if or are + * non-NULL, they are fed with the new and values to be passed + * upon next call. + * + * This function was intentionally designed to be called from + * http_msg_analyzer() with the lowest overhead. It should integrate perfectly + * within its state machine and use the same macros, hence the need for same + * labels and variable names. Note that msg->sol is left unchanged. + */ +const char *http_parse_stsline(struct http_msg *msg, + enum h1_state state, const char *ptr, const char *end, + unsigned int *ret_ptr, enum h1_state *ret_state) +{ + const char *msg_start = ci_head(msg->chn); + + switch (state) { + case HTTP_MSG_RPVER: + http_msg_rpver: + if (likely(HTTP_IS_VER_TOKEN(*ptr))) + EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rpver, http_msg_ood, state, HTTP_MSG_RPVER); + + if (likely(HTTP_IS_SPHT(*ptr))) { + msg->sl.st.v_l = ptr - msg_start; + EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rpver_sp, http_msg_ood, state, HTTP_MSG_RPVER_SP); + } + msg->err_state = HTTP_MSG_RPVER; + state = HTTP_MSG_ERROR; + break; + + case HTTP_MSG_RPVER_SP: + http_msg_rpver_sp: + if (likely(!HTTP_IS_LWS(*ptr))) { + msg->sl.st.c = ptr - msg_start; + goto http_msg_rpcode; + } + if (likely(HTTP_IS_SPHT(*ptr))) + EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rpver_sp, http_msg_ood, state, HTTP_MSG_RPVER_SP); + /* so it's a CR/LF, this is invalid */ + msg->err_state = HTTP_MSG_RPVER_SP; + state = HTTP_MSG_ERROR; + break; + + case HTTP_MSG_RPCODE: + http_msg_rpcode: + if (likely(!HTTP_IS_LWS(*ptr))) + EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rpcode, http_msg_ood, state, HTTP_MSG_RPCODE); + + if (likely(HTTP_IS_SPHT(*ptr))) { + msg->sl.st.c_l = ptr - msg_start - msg->sl.st.c; + EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rpcode_sp, http_msg_ood, state, HTTP_MSG_RPCODE_SP); + } + + /* so it's a CR/LF, so there is no reason phrase */ + msg->sl.st.c_l = ptr - msg_start - msg->sl.st.c; + http_msg_rsp_reason: + /* FIXME: should we support HTTP responses without any reason phrase ? */ + msg->sl.st.r = ptr - msg_start; + msg->sl.st.r_l = 0; + goto http_msg_rpline_eol; + + case HTTP_MSG_RPCODE_SP: + http_msg_rpcode_sp: + if (likely(!HTTP_IS_LWS(*ptr))) { + msg->sl.st.r = ptr - msg_start; + goto http_msg_rpreason; + } + if (likely(HTTP_IS_SPHT(*ptr))) + EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rpcode_sp, http_msg_ood, state, HTTP_MSG_RPCODE_SP); + /* so it's a CR/LF, so there is no reason phrase */ + goto http_msg_rsp_reason; + + case HTTP_MSG_RPREASON: + http_msg_rpreason: + if (likely(!HTTP_IS_CRLF(*ptr))) + EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rpreason, http_msg_ood, state, HTTP_MSG_RPREASON); + msg->sl.st.r_l = ptr - msg_start - msg->sl.st.r; + http_msg_rpline_eol: + /* We have seen the end of line. Note that we do not + * necessarily have the \n yet, but at least we know that we + * have EITHER \r OR \n, otherwise the response would not be + * complete. We can then record the response length and return + * to the caller which will be able to register it. + */ + msg->sl.st.l = ptr - msg_start - msg->sol; + return ptr; + + default: +#ifdef DEBUG_FULL + fprintf(stderr, "FIXME !!!! impossible state at %s:%d = %d\n", __FILE__, __LINE__, state); + exit(1); +#endif + ; + } + + http_msg_ood: + /* out of valid data */ + if (ret_state) + *ret_state = state; + if (ret_ptr) + *ret_ptr = ptr - msg_start; + return NULL; +} + +/* + * This function parses a request line between and , starting with + * parser state . Only states HTTP_MSG_RQMETH, HTTP_MSG_RQMETH_SP, + * HTTP_MSG_RQURI, HTTP_MSG_RQURI_SP and HTTP_MSG_RQVER are handled. Others + * will give undefined results. + * Note that it is upon the caller's responsibility to ensure that ptr < end, + * and that msg->sol points to the beginning of the request. + * If a complete line is found (which implies that at least one CR or LF is + * found before , the updated is returned, otherwise NULL is + * returned indicating an incomplete line (which does not mean that parts have + * not been updated). In the incomplete case, if or are + * non-NULL, they are fed with the new and values to be passed + * upon next call. + * + * This function was intentionally designed to be called from + * http_msg_analyzer() with the lowest overhead. It should integrate perfectly + * within its state machine and use the same macros, hence the need for same + * labels and variable names. Note that msg->sol is left unchanged. + */ +const char *http_parse_reqline(struct http_msg *msg, + enum h1_state state, const char *ptr, const char *end, + unsigned int *ret_ptr, enum h1_state *ret_state) +{ + const char *msg_start = ci_head(msg->chn); + + switch (state) { + case HTTP_MSG_RQMETH: + http_msg_rqmeth: + if (likely(HTTP_IS_TOKEN(*ptr))) + EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rqmeth, http_msg_ood, state, HTTP_MSG_RQMETH); + + if (likely(HTTP_IS_SPHT(*ptr))) { + msg->sl.rq.m_l = ptr - msg_start; + EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rqmeth_sp, http_msg_ood, state, HTTP_MSG_RQMETH_SP); + } + + if (likely(HTTP_IS_CRLF(*ptr))) { + /* HTTP 0.9 request */ + msg->sl.rq.m_l = ptr - msg_start; + http_msg_req09_uri: + msg->sl.rq.u = ptr - msg_start; + http_msg_req09_uri_e: + msg->sl.rq.u_l = ptr - msg_start - msg->sl.rq.u; + http_msg_req09_ver: + msg->sl.rq.v = ptr - msg_start; + msg->sl.rq.v_l = 0; + goto http_msg_rqline_eol; + } + msg->err_state = HTTP_MSG_RQMETH; + state = HTTP_MSG_ERROR; + break; + + case HTTP_MSG_RQMETH_SP: + http_msg_rqmeth_sp: + if (likely(!HTTP_IS_LWS(*ptr))) { + msg->sl.rq.u = ptr - msg_start; + goto http_msg_rquri; + } + if (likely(HTTP_IS_SPHT(*ptr))) + EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rqmeth_sp, http_msg_ood, state, HTTP_MSG_RQMETH_SP); + /* so it's a CR/LF, meaning an HTTP 0.9 request */ + goto http_msg_req09_uri; + + case HTTP_MSG_RQURI: + http_msg_rquri: +#if defined(__x86_64__) || \ + defined(__i386__) || defined(__i486__) || defined(__i586__) || defined(__i686__) || \ + defined(__ARM_ARCH_7A__) + /* speedup: skip bytes not between 0x21 and 0x7e inclusive */ + while (ptr <= end - sizeof(int)) { + int x = *(int *)ptr - 0x21212121; + if (x & 0x80808080) + break; + + x -= 0x5e5e5e5e; + if (!(x & 0x80808080)) + break; + + ptr += sizeof(int); + } +#endif + if (ptr >= end) { + state = HTTP_MSG_RQURI; + goto http_msg_ood; + } + http_msg_rquri2: + if (likely((unsigned char)(*ptr - 33) <= 93)) /* 33 to 126 included */ + EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rquri2, http_msg_ood, state, HTTP_MSG_RQURI); + + if (likely(HTTP_IS_SPHT(*ptr))) { + msg->sl.rq.u_l = ptr - msg_start - msg->sl.rq.u; + EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rquri_sp, http_msg_ood, state, HTTP_MSG_RQURI_SP); + } + + if (likely((unsigned char)*ptr >= 128)) { + /* non-ASCII chars are forbidden unless option + * accept-invalid-http-request is enabled in the frontend. + * In any case, we capture the faulty char. + */ + if (msg->err_pos < -1) + goto invalid_char; + if (msg->err_pos == -1) + msg->err_pos = ptr - msg_start; + EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rquri, http_msg_ood, state, HTTP_MSG_RQURI); + } + + if (likely(HTTP_IS_CRLF(*ptr))) { + /* so it's a CR/LF, meaning an HTTP 0.9 request */ + goto http_msg_req09_uri_e; + } + + /* OK forbidden chars, 0..31 or 127 */ + invalid_char: + msg->err_pos = ptr - msg_start; + msg->err_state = HTTP_MSG_RQURI; + state = HTTP_MSG_ERROR; + break; + + case HTTP_MSG_RQURI_SP: + http_msg_rquri_sp: + if (likely(!HTTP_IS_LWS(*ptr))) { + msg->sl.rq.v = ptr - msg_start; + goto http_msg_rqver; + } + if (likely(HTTP_IS_SPHT(*ptr))) + EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rquri_sp, http_msg_ood, state, HTTP_MSG_RQURI_SP); + /* so it's a CR/LF, meaning an HTTP 0.9 request */ + goto http_msg_req09_ver; + + case HTTP_MSG_RQVER: + http_msg_rqver: + if (likely(HTTP_IS_VER_TOKEN(*ptr))) + EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rqver, http_msg_ood, state, HTTP_MSG_RQVER); + + if (likely(HTTP_IS_CRLF(*ptr))) { + msg->sl.rq.v_l = ptr - msg_start - msg->sl.rq.v; + http_msg_rqline_eol: + /* We have seen the end of line. Note that we do not + * necessarily have the \n yet, but at least we know that we + * have EITHER \r OR \n, otherwise the request would not be + * complete. We can then record the request length and return + * to the caller which will be able to register it. + */ + msg->sl.rq.l = ptr - msg_start - msg->sol; + return ptr; + } + + /* neither an HTTP_VER token nor a CRLF */ + msg->err_state = HTTP_MSG_RQVER; + state = HTTP_MSG_ERROR; + break; + + default: +#ifdef DEBUG_FULL + fprintf(stderr, "FIXME !!!! impossible state at %s:%d = %d\n", __FILE__, __LINE__, state); + exit(1); +#endif + ; + } + + http_msg_ood: + /* out of valid data */ + if (ret_state) + *ret_state = state; + if (ret_ptr) + *ret_ptr = ptr - msg_start; + return NULL; +} + +/* + * This function parses an HTTP message, either a request or a response, + * depending on the initial msg->msg_state. The caller is responsible for + * ensuring that the message does not wrap. The function can be preempted + * everywhere when data are missing and recalled at the exact same location + * with no information loss. The message may even be realigned between two + * calls. The header index is re-initialized when switching from + * MSG_R[PQ]BEFORE to MSG_RPVER|MSG_RQMETH. It modifies msg->sol among other + * fields. Note that msg->sol will be initialized after completing the first + * state, so that none of the msg pointers has to be initialized prior to the + * first call. + */ +void http_msg_analyzer(struct http_msg *msg, struct hdr_idx *idx) +{ + enum h1_state state; /* updated only when leaving the FSM */ + register const char *ptr, *end; /* request pointers, to avoid dereferences */ + struct buffer *buf = &msg->chn->buf; + char *input = b_head(buf); + + state = msg->msg_state; + ptr = input + msg->next; + end = b_stop(buf); + + if (unlikely(ptr >= end)) + goto http_msg_ood; + + switch (state) { + /* + * First, states that are specific to the response only. + * We check them first so that request and headers are + * closer to each other (accessed more often). + */ + case HTTP_MSG_RPBEFORE: + http_msg_rpbefore: + if (likely(HTTP_IS_TOKEN(*ptr))) { + /* we have a start of message, but we have to check + * first if we need to remove some CRLF. We can only + * do this when o=0. + */ + if (unlikely(ptr != input)) { + if (co_data(msg->chn)) + goto http_msg_ood; + /* Remove empty leading lines, as recommended by RFC2616. */ + b_del(buf, ptr - input); + input = b_head(buf); + } + msg->sol = 0; + msg->sl.st.l = 0; /* used in debug mode */ + hdr_idx_init(idx); + state = HTTP_MSG_RPVER; + goto http_msg_rpver; + } + + if (unlikely(!HTTP_IS_CRLF(*ptr))) { + state = HTTP_MSG_RPBEFORE; + goto http_msg_invalid; + } + + if (unlikely(*ptr == '\n')) + EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rpbefore, http_msg_ood, state, HTTP_MSG_RPBEFORE); + EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rpbefore_cr, http_msg_ood, state, HTTP_MSG_RPBEFORE_CR); + /* stop here */ + + case HTTP_MSG_RPBEFORE_CR: + http_msg_rpbefore_cr: + EXPECT_LF_HERE(ptr, http_msg_invalid, state, HTTP_MSG_RPBEFORE_CR); + EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rpbefore, http_msg_ood, state, HTTP_MSG_RPBEFORE); + /* stop here */ + + case HTTP_MSG_RPVER: + http_msg_rpver: + case HTTP_MSG_RPVER_SP: + case HTTP_MSG_RPCODE: + case HTTP_MSG_RPCODE_SP: + case HTTP_MSG_RPREASON: + ptr = (char *)http_parse_stsline(msg, + state, ptr, end, + &msg->next, &msg->msg_state); + if (unlikely(!ptr)) + return; + + /* we have a full response and we know that we have either a CR + * or an LF at . + */ + hdr_idx_set_start(idx, msg->sl.st.l, *ptr == '\r'); + + msg->sol = ptr - input; + if (likely(*ptr == '\r')) + EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rpline_end, http_msg_ood, state, HTTP_MSG_RPLINE_END); + goto http_msg_rpline_end; + + case HTTP_MSG_RPLINE_END: + http_msg_rpline_end: + /* msg->sol must point to the first of CR or LF. */ + EXPECT_LF_HERE(ptr, http_msg_invalid, state, HTTP_MSG_RPLINE_END); + EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_hdr_first, http_msg_ood, state, HTTP_MSG_HDR_FIRST); + /* stop here */ + + /* + * Second, states that are specific to the request only + */ + case HTTP_MSG_RQBEFORE: + http_msg_rqbefore: + if (likely(HTTP_IS_TOKEN(*ptr))) { + /* we have a start of message, but we have to check + * first if we need to remove some CRLF. We can only + * do this when o=0. + */ + if (likely(ptr != input)) { + if (co_data(msg->chn)) + goto http_msg_ood; + /* Remove empty leading lines, as recommended by RFC2616. */ + b_del(buf, ptr - input); + input = b_head(buf); + } + msg->sol = 0; + msg->sl.rq.l = 0; /* used in debug mode */ + state = HTTP_MSG_RQMETH; + goto http_msg_rqmeth; + } + + if (unlikely(!HTTP_IS_CRLF(*ptr))) { + state = HTTP_MSG_RQBEFORE; + goto http_msg_invalid; + } + + if (unlikely(*ptr == '\n')) + EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rqbefore, http_msg_ood, state, HTTP_MSG_RQBEFORE); + EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rqbefore_cr, http_msg_ood, state, HTTP_MSG_RQBEFORE_CR); + /* stop here */ + + case HTTP_MSG_RQBEFORE_CR: + http_msg_rqbefore_cr: + EXPECT_LF_HERE(ptr, http_msg_invalid, state, HTTP_MSG_RQBEFORE_CR); + EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rqbefore, http_msg_ood, state, HTTP_MSG_RQBEFORE); + /* stop here */ + + case HTTP_MSG_RQMETH: + http_msg_rqmeth: + case HTTP_MSG_RQMETH_SP: + case HTTP_MSG_RQURI: + case HTTP_MSG_RQURI_SP: + case HTTP_MSG_RQVER: + ptr = (char *)http_parse_reqline(msg, + state, ptr, end, + &msg->next, &msg->msg_state); + if (unlikely(!ptr)) + return; + + /* we have a full request and we know that we have either a CR + * or an LF at . + */ + hdr_idx_set_start(idx, msg->sl.rq.l, *ptr == '\r'); + + msg->sol = ptr - input; + if (likely(*ptr == '\r')) + EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rqline_end, http_msg_ood, state, HTTP_MSG_RQLINE_END); + goto http_msg_rqline_end; + + case HTTP_MSG_RQLINE_END: + http_msg_rqline_end: + /* check for HTTP/0.9 request : no version information available. + * msg->sol must point to the first of CR or LF. + */ + if (unlikely(msg->sl.rq.v_l == 0)) + goto http_msg_last_lf; + + EXPECT_LF_HERE(ptr, http_msg_invalid, state, HTTP_MSG_RQLINE_END); + EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_hdr_first, http_msg_ood, state, HTTP_MSG_HDR_FIRST); + /* stop here */ + + /* + * Common states below + */ + case HTTP_MSG_HDR_FIRST: + http_msg_hdr_first: + msg->sol = ptr - input; + if (likely(!HTTP_IS_CRLF(*ptr))) { + goto http_msg_hdr_name; + } + + if (likely(*ptr == '\r')) + EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_last_lf, http_msg_ood, state, HTTP_MSG_LAST_LF); + goto http_msg_last_lf; + + case HTTP_MSG_HDR_NAME: + http_msg_hdr_name: + /* assumes msg->sol points to the first char */ + if (likely(HTTP_IS_TOKEN(*ptr))) + EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_hdr_name, http_msg_ood, state, HTTP_MSG_HDR_NAME); + + if (likely(*ptr == ':')) + EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_hdr_l1_sp, http_msg_ood, state, HTTP_MSG_HDR_L1_SP); + + if (likely(msg->err_pos < -1) || *ptr == '\n') { + state = HTTP_MSG_HDR_NAME; + goto http_msg_invalid; + } + + if (msg->err_pos == -1) /* capture error pointer */ + msg->err_pos = ptr - input; /* >= 0 now */ + + /* and we still accept this non-token character */ + EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_hdr_name, http_msg_ood, state, HTTP_MSG_HDR_NAME); + + case HTTP_MSG_HDR_L1_SP: + http_msg_hdr_l1_sp: + /* assumes msg->sol points to the first char */ + if (likely(HTTP_IS_SPHT(*ptr))) + EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_hdr_l1_sp, http_msg_ood, state, HTTP_MSG_HDR_L1_SP); + + /* header value can be basically anything except CR/LF */ + msg->sov = ptr - input; + + if (likely(!HTTP_IS_CRLF(*ptr))) { + goto http_msg_hdr_val; + } + + if (likely(*ptr == '\r')) + EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_hdr_l1_lf, http_msg_ood, state, HTTP_MSG_HDR_L1_LF); + goto http_msg_hdr_l1_lf; + + case HTTP_MSG_HDR_L1_LF: + http_msg_hdr_l1_lf: + EXPECT_LF_HERE(ptr, http_msg_invalid, state, HTTP_MSG_HDR_L1_LF); + EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_hdr_l1_lws, http_msg_ood, state, HTTP_MSG_HDR_L1_LWS); + + case HTTP_MSG_HDR_L1_LWS: + http_msg_hdr_l1_lws: + if (likely(HTTP_IS_SPHT(*ptr))) { + /* replace HT,CR,LF with spaces */ + for (; input + msg->sov < ptr; msg->sov++) + input[msg->sov] = ' '; + goto http_msg_hdr_l1_sp; + } + /* we had a header consisting only in spaces ! */ + msg->eol = msg->sov; + goto http_msg_complete_header; + + case HTTP_MSG_HDR_VAL: + http_msg_hdr_val: + /* assumes msg->sol points to the first char, and msg->sov + * points to the first character of the value. + */ + + /* speedup: we'll skip packs of 4 or 8 bytes not containing bytes 0x0D + * and lower. In fact since most of the time is spent in the loop, we + * also remove the sign bit test so that bytes 0x8e..0x0d break the + * loop, but we don't care since they're very rare in header values. + */ +#if defined(__x86_64__) + while (ptr <= end - sizeof(long)) { + if ((*(long *)ptr - 0x0e0e0e0e0e0e0e0eULL) & 0x8080808080808080ULL) + goto http_msg_hdr_val2; + ptr += sizeof(long); + } +#endif +#if defined(__x86_64__) || \ + defined(__i386__) || defined(__i486__) || defined(__i586__) || defined(__i686__) || \ + defined(__ARM_ARCH_7A__) + while (ptr <= end - sizeof(int)) { + if ((*(int*)ptr - 0x0e0e0e0e) & 0x80808080) + goto http_msg_hdr_val2; + ptr += sizeof(int); + } +#endif + if (ptr >= end) { + state = HTTP_MSG_HDR_VAL; + goto http_msg_ood; + } + http_msg_hdr_val2: + if (likely(!HTTP_IS_CRLF(*ptr))) + EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_hdr_val2, http_msg_ood, state, HTTP_MSG_HDR_VAL); + + msg->eol = ptr - input; + /* Note: we could also copy eol into ->eoh so that we have the + * real header end in case it ends with lots of LWS, but is this + * really needed ? + */ + if (likely(*ptr == '\r')) + EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_hdr_l2_lf, http_msg_ood, state, HTTP_MSG_HDR_L2_LF); + goto http_msg_hdr_l2_lf; + + case HTTP_MSG_HDR_L2_LF: + http_msg_hdr_l2_lf: + EXPECT_LF_HERE(ptr, http_msg_invalid, state, HTTP_MSG_HDR_L2_LF); + EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_hdr_l2_lws, http_msg_ood, state, HTTP_MSG_HDR_L2_LWS); + + case HTTP_MSG_HDR_L2_LWS: + http_msg_hdr_l2_lws: + if (unlikely(HTTP_IS_SPHT(*ptr))) { + /* LWS: replace HT,CR,LF with spaces */ + for (; input + msg->eol < ptr; msg->eol++) + input[msg->eol] = ' '; + goto http_msg_hdr_val; + } + http_msg_complete_header: + /* + * It was a new header, so the last one is finished. + * Assumes msg->sol points to the first char, msg->sov points + * to the first character of the value and msg->eol to the + * first CR or LF so we know how the line ends. We insert last + * header into the index. + */ + if (unlikely(hdr_idx_add(msg->eol - msg->sol, input[msg->eol] == '\r', + idx, idx->tail) < 0)) { + state = HTTP_MSG_HDR_L2_LWS; + goto http_msg_invalid; + } + + msg->sol = ptr - input; + if (likely(!HTTP_IS_CRLF(*ptr))) { + goto http_msg_hdr_name; + } + + if (likely(*ptr == '\r')) + EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_last_lf, http_msg_ood, state, HTTP_MSG_LAST_LF); + goto http_msg_last_lf; + + case HTTP_MSG_LAST_LF: + http_msg_last_lf: + /* Assumes msg->sol points to the first of either CR or LF. + * Sets ->sov and ->next to the total header length, ->eoh to + * the last CRLF, and ->eol to the last CRLF length (1 or 2). + */ + EXPECT_LF_HERE(ptr, http_msg_invalid, state, HTTP_MSG_LAST_LF); + ptr++; + msg->sov = msg->next = ptr - input; + msg->eoh = msg->sol; + msg->sol = 0; + msg->eol = msg->sov - msg->eoh; + msg->msg_state = HTTP_MSG_BODY; + return; + + case HTTP_MSG_ERROR: + /* this may only happen if we call http_msg_analyser() twice with an error */ + break; + + default: +#ifdef DEBUG_FULL + fprintf(stderr, "FIXME !!!! impossible state at %s:%d = %d\n", __FILE__, __LINE__, state); + exit(1); +#endif + ; + } + http_msg_ood: + /* out of data */ + msg->msg_state = state; + msg->next = ptr - input; + return; + + http_msg_invalid: + /* invalid message */ + msg->err_state = state; + msg->msg_state = HTTP_MSG_ERROR; + msg->next = ptr - input; + return; +} + +/* This function skips trailers in the buffer associated with HTTP message + * . The first visited position is msg->next. If the end of the trailers is + * found, the function returns >0. So, the caller can automatically schedul it + * to be forwarded, and switch msg->msg_state to HTTP_MSG_DONE. If not enough + * data are available, the function does not change anything except maybe + * msg->sol if it could parse some lines, and returns zero. If a parse error + * is encountered, the function returns < 0 and does not change anything except + * maybe msg->sol. Note that the message must already be in HTTP_MSG_TRAILERS + * state before calling this function, which implies that all non-trailers data + * have already been scheduled for forwarding, and that msg->next exactly + * matches the length of trailers already parsed and not forwarded. It is also + * important to note that this function is designed to be able to parse wrapped + * headers at end of buffer. + */ +int http_forward_trailers(struct http_msg *msg) +{ + const struct buffer *buf = &msg->chn->buf; + const char *parse = ci_head(msg->chn); + const char *stop = b_tail(buf); + + /* we have msg->next which points to next line. Look for CRLF. But + * first, we reset msg->sol */ + msg->sol = 0; + while (1) { + const char *p1 = NULL, *p2 = NULL; + const char *start = c_ptr(msg->chn, msg->next + msg->sol); + const char *ptr = start; + + /* scan current line and stop at LF or CRLF */ + while (1) { + if (ptr == stop) + return 0; + + if (*ptr == '\n') { + if (!p1) + p1 = ptr; + p2 = ptr; + break; + } + + if (*ptr == '\r') { + if (p1) { + msg->err_pos = b_dist(buf, parse, ptr); + return -1; + } + p1 = ptr; + } + + ptr = b_next(buf, ptr); + } + + /* after LF; point to beginning of next line */ + p2 = b_next(buf, p2); + msg->sol += b_dist(buf, start, p2); + + /* LF/CRLF at beginning of line => end of trailers at p2. + * Everything was scheduled for forwarding, there's nothing left + * from this message. */ + if (p1 == start) + return 1; + + /* OK, next line then */ + } +}