/* * HTTP/1 protocol analyzer * * Copyright 2000-2017 Willy Tarreau * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License * as published by the Free Software Foundation; either version * 2 of the License, or (at your option) any later version. * */ #include #include #include #include #include #include /* * This function parses a status line between and , starting with * parser state . Only states HTTP_MSG_RPVER, HTTP_MSG_RPVER_SP, * HTTP_MSG_RPCODE, HTTP_MSG_RPCODE_SP and HTTP_MSG_RPREASON are handled. Others * will give undefined results. * Note that it is upon the caller's responsibility to ensure that ptr < end, * and that msg->sol points to the beginning of the response. * If a complete line is found (which implies that at least one CR or LF is * found before , the updated is returned, otherwise NULL is * returned indicating an incomplete line (which does not mean that parts have * not been updated). In the incomplete case, if or are * non-NULL, they are fed with the new and values to be passed * upon next call. * * This function was intentionally designed to be called from * http_msg_analyzer() with the lowest overhead. It should integrate perfectly * within its state machine and use the same macros, hence the need for same * labels and variable names. Note that msg->sol is left unchanged. */ const char *http_parse_stsline(struct http_msg *msg, enum h1_state state, const char *ptr, const char *end, unsigned int *ret_ptr, enum h1_state *ret_state) { const char *msg_start = ci_head(msg->chn); switch (state) { case HTTP_MSG_RPVER: http_msg_rpver: if (likely(HTTP_IS_VER_TOKEN(*ptr))) EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rpver, http_msg_ood, state, HTTP_MSG_RPVER); if (likely(HTTP_IS_SPHT(*ptr))) { msg->sl.st.v_l = ptr - msg_start; EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rpver_sp, http_msg_ood, state, HTTP_MSG_RPVER_SP); } msg->err_state = HTTP_MSG_RPVER; state = HTTP_MSG_ERROR; break; case HTTP_MSG_RPVER_SP: http_msg_rpver_sp: if (likely(!HTTP_IS_LWS(*ptr))) { msg->sl.st.c = ptr - msg_start; goto http_msg_rpcode; } if (likely(HTTP_IS_SPHT(*ptr))) EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rpver_sp, http_msg_ood, state, HTTP_MSG_RPVER_SP); /* so it's a CR/LF, this is invalid */ msg->err_state = HTTP_MSG_RPVER_SP; state = HTTP_MSG_ERROR; break; case HTTP_MSG_RPCODE: http_msg_rpcode: if (likely(!HTTP_IS_LWS(*ptr))) EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rpcode, http_msg_ood, state, HTTP_MSG_RPCODE); if (likely(HTTP_IS_SPHT(*ptr))) { msg->sl.st.c_l = ptr - msg_start - msg->sl.st.c; EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rpcode_sp, http_msg_ood, state, HTTP_MSG_RPCODE_SP); } /* so it's a CR/LF, so there is no reason phrase */ msg->sl.st.c_l = ptr - msg_start - msg->sl.st.c; http_msg_rsp_reason: /* FIXME: should we support HTTP responses without any reason phrase ? */ msg->sl.st.r = ptr - msg_start; msg->sl.st.r_l = 0; goto http_msg_rpline_eol; case HTTP_MSG_RPCODE_SP: http_msg_rpcode_sp: if (likely(!HTTP_IS_LWS(*ptr))) { msg->sl.st.r = ptr - msg_start; goto http_msg_rpreason; } if (likely(HTTP_IS_SPHT(*ptr))) EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rpcode_sp, http_msg_ood, state, HTTP_MSG_RPCODE_SP); /* so it's a CR/LF, so there is no reason phrase */ goto http_msg_rsp_reason; case HTTP_MSG_RPREASON: http_msg_rpreason: if (likely(!HTTP_IS_CRLF(*ptr))) EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rpreason, http_msg_ood, state, HTTP_MSG_RPREASON); msg->sl.st.r_l = ptr - msg_start - msg->sl.st.r; http_msg_rpline_eol: /* We have seen the end of line. Note that we do not * necessarily have the \n yet, but at least we know that we * have EITHER \r OR \n, otherwise the response would not be * complete. We can then record the response length and return * to the caller which will be able to register it. */ msg->sl.st.l = ptr - msg_start - msg->sol; return ptr; default: #ifdef DEBUG_FULL fprintf(stderr, "FIXME !!!! impossible state at %s:%d = %d\n", __FILE__, __LINE__, state); exit(1); #endif ; } http_msg_ood: /* out of valid data */ if (ret_state) *ret_state = state; if (ret_ptr) *ret_ptr = ptr - msg_start; return NULL; } /* * This function parses a request line between and , starting with * parser state . Only states HTTP_MSG_RQMETH, HTTP_MSG_RQMETH_SP, * HTTP_MSG_RQURI, HTTP_MSG_RQURI_SP and HTTP_MSG_RQVER are handled. Others * will give undefined results. * Note that it is upon the caller's responsibility to ensure that ptr < end, * and that msg->sol points to the beginning of the request. * If a complete line is found (which implies that at least one CR or LF is * found before , the updated is returned, otherwise NULL is * returned indicating an incomplete line (which does not mean that parts have * not been updated). In the incomplete case, if or are * non-NULL, they are fed with the new and values to be passed * upon next call. * * This function was intentionally designed to be called from * http_msg_analyzer() with the lowest overhead. It should integrate perfectly * within its state machine and use the same macros, hence the need for same * labels and variable names. Note that msg->sol is left unchanged. */ const char *http_parse_reqline(struct http_msg *msg, enum h1_state state, const char *ptr, const char *end, unsigned int *ret_ptr, enum h1_state *ret_state) { const char *msg_start = ci_head(msg->chn); switch (state) { case HTTP_MSG_RQMETH: http_msg_rqmeth: if (likely(HTTP_IS_TOKEN(*ptr))) EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rqmeth, http_msg_ood, state, HTTP_MSG_RQMETH); if (likely(HTTP_IS_SPHT(*ptr))) { msg->sl.rq.m_l = ptr - msg_start; EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rqmeth_sp, http_msg_ood, state, HTTP_MSG_RQMETH_SP); } if (likely(HTTP_IS_CRLF(*ptr))) { /* HTTP 0.9 request */ msg->sl.rq.m_l = ptr - msg_start; http_msg_req09_uri: msg->sl.rq.u = ptr - msg_start; http_msg_req09_uri_e: msg->sl.rq.u_l = ptr - msg_start - msg->sl.rq.u; http_msg_req09_ver: msg->sl.rq.v = ptr - msg_start; msg->sl.rq.v_l = 0; goto http_msg_rqline_eol; } msg->err_state = HTTP_MSG_RQMETH; state = HTTP_MSG_ERROR; break; case HTTP_MSG_RQMETH_SP: http_msg_rqmeth_sp: if (likely(!HTTP_IS_LWS(*ptr))) { msg->sl.rq.u = ptr - msg_start; goto http_msg_rquri; } if (likely(HTTP_IS_SPHT(*ptr))) EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rqmeth_sp, http_msg_ood, state, HTTP_MSG_RQMETH_SP); /* so it's a CR/LF, meaning an HTTP 0.9 request */ goto http_msg_req09_uri; case HTTP_MSG_RQURI: http_msg_rquri: #if defined(__x86_64__) || \ defined(__i386__) || defined(__i486__) || defined(__i586__) || defined(__i686__) || \ defined(__ARM_ARCH_7A__) /* speedup: skip bytes not between 0x21 and 0x7e inclusive */ while (ptr <= end - sizeof(int)) { int x = *(int *)ptr - 0x21212121; if (x & 0x80808080) break; x -= 0x5e5e5e5e; if (!(x & 0x80808080)) break; ptr += sizeof(int); } #endif if (ptr >= end) { state = HTTP_MSG_RQURI; goto http_msg_ood; } http_msg_rquri2: if (likely((unsigned char)(*ptr - 33) <= 93)) /* 33 to 126 included */ EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rquri2, http_msg_ood, state, HTTP_MSG_RQURI); if (likely(HTTP_IS_SPHT(*ptr))) { msg->sl.rq.u_l = ptr - msg_start - msg->sl.rq.u; EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rquri_sp, http_msg_ood, state, HTTP_MSG_RQURI_SP); } if (likely((unsigned char)*ptr >= 128)) { /* non-ASCII chars are forbidden unless option * accept-invalid-http-request is enabled in the frontend. * In any case, we capture the faulty char. */ if (msg->err_pos < -1) goto invalid_char; if (msg->err_pos == -1) msg->err_pos = ptr - msg_start; EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rquri, http_msg_ood, state, HTTP_MSG_RQURI); } if (likely(HTTP_IS_CRLF(*ptr))) { /* so it's a CR/LF, meaning an HTTP 0.9 request */ goto http_msg_req09_uri_e; } /* OK forbidden chars, 0..31 or 127 */ invalid_char: msg->err_pos = ptr - msg_start; msg->err_state = HTTP_MSG_RQURI; state = HTTP_MSG_ERROR; break; case HTTP_MSG_RQURI_SP: http_msg_rquri_sp: if (likely(!HTTP_IS_LWS(*ptr))) { msg->sl.rq.v = ptr - msg_start; goto http_msg_rqver; } if (likely(HTTP_IS_SPHT(*ptr))) EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rquri_sp, http_msg_ood, state, HTTP_MSG_RQURI_SP); /* so it's a CR/LF, meaning an HTTP 0.9 request */ goto http_msg_req09_ver; case HTTP_MSG_RQVER: http_msg_rqver: if (likely(HTTP_IS_VER_TOKEN(*ptr))) EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rqver, http_msg_ood, state, HTTP_MSG_RQVER); if (likely(HTTP_IS_CRLF(*ptr))) { msg->sl.rq.v_l = ptr - msg_start - msg->sl.rq.v; http_msg_rqline_eol: /* We have seen the end of line. Note that we do not * necessarily have the \n yet, but at least we know that we * have EITHER \r OR \n, otherwise the request would not be * complete. We can then record the request length and return * to the caller which will be able to register it. */ msg->sl.rq.l = ptr - msg_start - msg->sol; return ptr; } /* neither an HTTP_VER token nor a CRLF */ msg->err_state = HTTP_MSG_RQVER; state = HTTP_MSG_ERROR; break; default: #ifdef DEBUG_FULL fprintf(stderr, "FIXME !!!! impossible state at %s:%d = %d\n", __FILE__, __LINE__, state); exit(1); #endif ; } http_msg_ood: /* out of valid data */ if (ret_state) *ret_state = state; if (ret_ptr) *ret_ptr = ptr - msg_start; return NULL; } /* * This function parses an HTTP message, either a request or a response, * depending on the initial msg->msg_state. The caller is responsible for * ensuring that the message does not wrap. The function can be preempted * everywhere when data are missing and recalled at the exact same location * with no information loss. The message may even be realigned between two * calls. The header index is re-initialized when switching from * MSG_R[PQ]BEFORE to MSG_RPVER|MSG_RQMETH. It modifies msg->sol among other * fields. Note that msg->sol will be initialized after completing the first * state, so that none of the msg pointers has to be initialized prior to the * first call. */ void http_msg_analyzer(struct http_msg *msg, struct hdr_idx *idx) { enum h1_state state; /* updated only when leaving the FSM */ register const char *ptr, *end; /* request pointers, to avoid dereferences */ struct buffer *buf = &msg->chn->buf; char *input = b_head(buf); state = msg->msg_state; ptr = input + msg->next; end = b_stop(buf); if (unlikely(ptr >= end)) goto http_msg_ood; switch (state) { /* * First, states that are specific to the response only. * We check them first so that request and headers are * closer to each other (accessed more often). */ case HTTP_MSG_RPBEFORE: http_msg_rpbefore: if (likely(HTTP_IS_TOKEN(*ptr))) { /* we have a start of message, but we have to check * first if we need to remove some CRLF. We can only * do this when o=0. */ if (unlikely(ptr != input)) { if (co_data(msg->chn)) goto http_msg_ood; /* Remove empty leading lines, as recommended by RFC2616. */ b_del(buf, ptr - input); input = b_head(buf); } msg->sol = 0; msg->sl.st.l = 0; /* used in debug mode */ hdr_idx_init(idx); state = HTTP_MSG_RPVER; goto http_msg_rpver; } if (unlikely(!HTTP_IS_CRLF(*ptr))) { state = HTTP_MSG_RPBEFORE; goto http_msg_invalid; } if (unlikely(*ptr == '\n')) EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rpbefore, http_msg_ood, state, HTTP_MSG_RPBEFORE); EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rpbefore_cr, http_msg_ood, state, HTTP_MSG_RPBEFORE_CR); /* stop here */ case HTTP_MSG_RPBEFORE_CR: http_msg_rpbefore_cr: EXPECT_LF_HERE(ptr, http_msg_invalid, state, HTTP_MSG_RPBEFORE_CR); EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rpbefore, http_msg_ood, state, HTTP_MSG_RPBEFORE); /* stop here */ case HTTP_MSG_RPVER: http_msg_rpver: case HTTP_MSG_RPVER_SP: case HTTP_MSG_RPCODE: case HTTP_MSG_RPCODE_SP: case HTTP_MSG_RPREASON: ptr = (char *)http_parse_stsline(msg, state, ptr, end, &msg->next, &msg->msg_state); if (unlikely(!ptr)) return; /* we have a full response and we know that we have either a CR * or an LF at . */ hdr_idx_set_start(idx, msg->sl.st.l, *ptr == '\r'); msg->sol = ptr - input; if (likely(*ptr == '\r')) EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rpline_end, http_msg_ood, state, HTTP_MSG_RPLINE_END); goto http_msg_rpline_end; case HTTP_MSG_RPLINE_END: http_msg_rpline_end: /* msg->sol must point to the first of CR or LF. */ EXPECT_LF_HERE(ptr, http_msg_invalid, state, HTTP_MSG_RPLINE_END); EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_hdr_first, http_msg_ood, state, HTTP_MSG_HDR_FIRST); /* stop here */ /* * Second, states that are specific to the request only */ case HTTP_MSG_RQBEFORE: http_msg_rqbefore: if (likely(HTTP_IS_TOKEN(*ptr))) { /* we have a start of message, but we have to check * first if we need to remove some CRLF. We can only * do this when o=0. */ if (likely(ptr != input)) { if (co_data(msg->chn)) goto http_msg_ood; /* Remove empty leading lines, as recommended by RFC2616. */ b_del(buf, ptr - input); input = b_head(buf); } msg->sol = 0; msg->sl.rq.l = 0; /* used in debug mode */ state = HTTP_MSG_RQMETH; goto http_msg_rqmeth; } if (unlikely(!HTTP_IS_CRLF(*ptr))) { state = HTTP_MSG_RQBEFORE; goto http_msg_invalid; } if (unlikely(*ptr == '\n')) EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rqbefore, http_msg_ood, state, HTTP_MSG_RQBEFORE); EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rqbefore_cr, http_msg_ood, state, HTTP_MSG_RQBEFORE_CR); /* stop here */ case HTTP_MSG_RQBEFORE_CR: http_msg_rqbefore_cr: EXPECT_LF_HERE(ptr, http_msg_invalid, state, HTTP_MSG_RQBEFORE_CR); EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rqbefore, http_msg_ood, state, HTTP_MSG_RQBEFORE); /* stop here */ case HTTP_MSG_RQMETH: http_msg_rqmeth: case HTTP_MSG_RQMETH_SP: case HTTP_MSG_RQURI: case HTTP_MSG_RQURI_SP: case HTTP_MSG_RQVER: ptr = (char *)http_parse_reqline(msg, state, ptr, end, &msg->next, &msg->msg_state); if (unlikely(!ptr)) return; /* we have a full request and we know that we have either a CR * or an LF at . */ hdr_idx_set_start(idx, msg->sl.rq.l, *ptr == '\r'); msg->sol = ptr - input; if (likely(*ptr == '\r')) EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rqline_end, http_msg_ood, state, HTTP_MSG_RQLINE_END); goto http_msg_rqline_end; case HTTP_MSG_RQLINE_END: http_msg_rqline_end: /* check for HTTP/0.9 request : no version information available. * msg->sol must point to the first of CR or LF. */ if (unlikely(msg->sl.rq.v_l == 0)) goto http_msg_last_lf; EXPECT_LF_HERE(ptr, http_msg_invalid, state, HTTP_MSG_RQLINE_END); EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_hdr_first, http_msg_ood, state, HTTP_MSG_HDR_FIRST); /* stop here */ /* * Common states below */ case HTTP_MSG_HDR_FIRST: http_msg_hdr_first: msg->sol = ptr - input; if (likely(!HTTP_IS_CRLF(*ptr))) { goto http_msg_hdr_name; } if (likely(*ptr == '\r')) EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_last_lf, http_msg_ood, state, HTTP_MSG_LAST_LF); goto http_msg_last_lf; case HTTP_MSG_HDR_NAME: http_msg_hdr_name: /* assumes msg->sol points to the first char */ if (likely(HTTP_IS_TOKEN(*ptr))) EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_hdr_name, http_msg_ood, state, HTTP_MSG_HDR_NAME); if (likely(*ptr == ':')) EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_hdr_l1_sp, http_msg_ood, state, HTTP_MSG_HDR_L1_SP); if (likely(msg->err_pos < -1) || *ptr == '\n') { state = HTTP_MSG_HDR_NAME; goto http_msg_invalid; } if (msg->err_pos == -1) /* capture error pointer */ msg->err_pos = ptr - input; /* >= 0 now */ /* and we still accept this non-token character */ EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_hdr_name, http_msg_ood, state, HTTP_MSG_HDR_NAME); case HTTP_MSG_HDR_L1_SP: http_msg_hdr_l1_sp: /* assumes msg->sol points to the first char */ if (likely(HTTP_IS_SPHT(*ptr))) EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_hdr_l1_sp, http_msg_ood, state, HTTP_MSG_HDR_L1_SP); /* header value can be basically anything except CR/LF */ msg->sov = ptr - input; if (likely(!HTTP_IS_CRLF(*ptr))) { goto http_msg_hdr_val; } if (likely(*ptr == '\r')) EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_hdr_l1_lf, http_msg_ood, state, HTTP_MSG_HDR_L1_LF); goto http_msg_hdr_l1_lf; case HTTP_MSG_HDR_L1_LF: http_msg_hdr_l1_lf: EXPECT_LF_HERE(ptr, http_msg_invalid, state, HTTP_MSG_HDR_L1_LF); EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_hdr_l1_lws, http_msg_ood, state, HTTP_MSG_HDR_L1_LWS); case HTTP_MSG_HDR_L1_LWS: http_msg_hdr_l1_lws: if (likely(HTTP_IS_SPHT(*ptr))) { /* replace HT,CR,LF with spaces */ for (; input + msg->sov < ptr; msg->sov++) input[msg->sov] = ' '; goto http_msg_hdr_l1_sp; } /* we had a header consisting only in spaces ! */ msg->eol = msg->sov; goto http_msg_complete_header; case HTTP_MSG_HDR_VAL: http_msg_hdr_val: /* assumes msg->sol points to the first char, and msg->sov * points to the first character of the value. */ /* speedup: we'll skip packs of 4 or 8 bytes not containing bytes 0x0D * and lower. In fact since most of the time is spent in the loop, we * also remove the sign bit test so that bytes 0x8e..0x0d break the * loop, but we don't care since they're very rare in header values. */ #if defined(__x86_64__) while (ptr <= end - sizeof(long)) { if ((*(long *)ptr - 0x0e0e0e0e0e0e0e0eULL) & 0x8080808080808080ULL) goto http_msg_hdr_val2; ptr += sizeof(long); } #endif #if defined(__x86_64__) || \ defined(__i386__) || defined(__i486__) || defined(__i586__) || defined(__i686__) || \ defined(__ARM_ARCH_7A__) while (ptr <= end - sizeof(int)) { if ((*(int*)ptr - 0x0e0e0e0e) & 0x80808080) goto http_msg_hdr_val2; ptr += sizeof(int); } #endif if (ptr >= end) { state = HTTP_MSG_HDR_VAL; goto http_msg_ood; } http_msg_hdr_val2: if (likely(!HTTP_IS_CRLF(*ptr))) EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_hdr_val2, http_msg_ood, state, HTTP_MSG_HDR_VAL); msg->eol = ptr - input; /* Note: we could also copy eol into ->eoh so that we have the * real header end in case it ends with lots of LWS, but is this * really needed ? */ if (likely(*ptr == '\r')) EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_hdr_l2_lf, http_msg_ood, state, HTTP_MSG_HDR_L2_LF); goto http_msg_hdr_l2_lf; case HTTP_MSG_HDR_L2_LF: http_msg_hdr_l2_lf: EXPECT_LF_HERE(ptr, http_msg_invalid, state, HTTP_MSG_HDR_L2_LF); EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_hdr_l2_lws, http_msg_ood, state, HTTP_MSG_HDR_L2_LWS); case HTTP_MSG_HDR_L2_LWS: http_msg_hdr_l2_lws: if (unlikely(HTTP_IS_SPHT(*ptr))) { /* LWS: replace HT,CR,LF with spaces */ for (; input + msg->eol < ptr; msg->eol++) input[msg->eol] = ' '; goto http_msg_hdr_val; } http_msg_complete_header: /* * It was a new header, so the last one is finished. * Assumes msg->sol points to the first char, msg->sov points * to the first character of the value and msg->eol to the * first CR or LF so we know how the line ends. We insert last * header into the index. */ if (unlikely(hdr_idx_add(msg->eol - msg->sol, input[msg->eol] == '\r', idx, idx->tail) < 0)) { state = HTTP_MSG_HDR_L2_LWS; goto http_msg_invalid; } msg->sol = ptr - input; if (likely(!HTTP_IS_CRLF(*ptr))) { goto http_msg_hdr_name; } if (likely(*ptr == '\r')) EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_last_lf, http_msg_ood, state, HTTP_MSG_LAST_LF); goto http_msg_last_lf; case HTTP_MSG_LAST_LF: http_msg_last_lf: /* Assumes msg->sol points to the first of either CR or LF. * Sets ->sov and ->next to the total header length, ->eoh to * the last CRLF, and ->eol to the last CRLF length (1 or 2). */ EXPECT_LF_HERE(ptr, http_msg_invalid, state, HTTP_MSG_LAST_LF); ptr++; msg->sov = msg->next = ptr - input; msg->eoh = msg->sol; msg->sol = 0; msg->eol = msg->sov - msg->eoh; msg->msg_state = HTTP_MSG_BODY; return; case HTTP_MSG_ERROR: /* this may only happen if we call http_msg_analyser() twice with an error */ break; default: #ifdef DEBUG_FULL fprintf(stderr, "FIXME !!!! impossible state at %s:%d = %d\n", __FILE__, __LINE__, state); exit(1); #endif ; } http_msg_ood: /* out of data */ msg->msg_state = state; msg->next = ptr - input; return; http_msg_invalid: /* invalid message */ msg->err_state = state; msg->msg_state = HTTP_MSG_ERROR; msg->next = ptr - input; return; } /* Parse the Content-Length header field of an HTTP/1 request. The function * checks all possible occurrences of a comma-delimited value, and verifies * if any of them doesn't match a previous value. It returns <0 if a value * differs, 0 if the whole header can be dropped (i.e. already known), or >0 * if the value can be indexed (first one). In the last case, the value might * be adjusted and the caller must only add the updated value. */ int h1_parse_cont_len_header(struct h1m *h1m, struct ist *value) { char *e, *n; long long cl; int not_first = !!(h1m->flags & H1_MF_CLEN); struct ist word; word.ptr = value->ptr - 1; // -1 for next loop's pre-increment e = value->ptr + value->len; while (++word.ptr < e) { /* skip leading delimitor and blanks */ if (unlikely(HTTP_IS_LWS(*word.ptr))) continue; /* digits only now */ for (cl = 0, n = word.ptr; n < e; n++) { unsigned int c = *n - '0'; if (unlikely(c > 9)) { /* non-digit */ if (unlikely(n == word.ptr)) // spaces only goto fail; break; } if (unlikely(cl > ULLONG_MAX / 10ULL)) goto fail; /* multiply overflow */ cl = cl * 10ULL; if (unlikely(cl + c < cl)) goto fail; /* addition overflow */ cl = cl + c; } /* keep a copy of the exact cleaned value */ word.len = n - word.ptr; /* skip trailing LWS till next comma or EOL */ for (; n < e; n++) { if (!HTTP_IS_LWS(*n)) { if (unlikely(*n != ',')) goto fail; break; } } /* if duplicate, must be equal */ if (h1m->flags & H1_MF_CLEN && cl != h1m->body_len) goto fail; /* OK, store this result as the one to be indexed */ h1m->flags |= H1_MF_CLEN; h1m->curr_len = h1m->body_len = cl; *value = word; word.ptr = n; } /* here we've reached the end with a single value or a series of * identical values, all matching previous series if any. The last * parsed value was sent back into . We just have to decide * if this occurrence has to be indexed (it's the first one) or * silently skipped (it's not the first one) */ return !not_first; fail: return -1; } /* Parse the Transfer-Encoding: header field of an HTTP/1 request, looking for * "chunked" being the last value, and setting H1_MF_CHNK in h1m->flags only in * this case. Any other token found or any empty header field found will reset * this flag, so that it accurately represents the token's presence at the last * position. The H1_MF_XFER_ENC flag is always set. Note that transfer codings * are case-insensitive (cf RFC7230#4). */ void h1_parse_xfer_enc_header(struct h1m *h1m, struct ist value) { char *e, *n; struct ist word; h1m->flags |= H1_MF_XFER_ENC; h1m->flags &= ~H1_MF_CHNK; word.ptr = value.ptr - 1; // -1 for next loop's pre-increment e = value.ptr + value.len; while (++word.ptr < e) { /* skip leading delimitor and blanks */ if (HTTP_IS_LWS(*word.ptr)) continue; n = http_find_hdr_value_end(word.ptr, e); // next comma or end of line word.len = n - word.ptr; /* trim trailing blanks */ while (word.len && HTTP_IS_LWS(word.ptr[word.len-1])) word.len--; h1m->flags &= ~H1_MF_CHNK; if (isteqi(word, ist("chunked"))) h1m->flags |= H1_MF_CHNK; word.ptr = n; } } /* Parse the Connection: header of an HTTP/1 request, looking for "close", * "keep-alive", and "upgrade" values, and updating h1m->flags according to * what was found there. Note that flags are only added, not removed, so the * function is safe for being called multiple times if multiple occurrences * are found. */ void h1_parse_connection_header(struct h1m *h1m, struct ist value) { char *e, *n; struct ist word; word.ptr = value.ptr - 1; // -1 for next loop's pre-increment e = value.ptr + value.len; while (++word.ptr < e) { /* skip leading delimitor and blanks */ if (HTTP_IS_LWS(*word.ptr)) continue; n = http_find_hdr_value_end(word.ptr, e); // next comma or end of line word.len = n - word.ptr; /* trim trailing blanks */ while (word.len && HTTP_IS_LWS(word.ptr[word.len-1])) word.len--; if (isteqi(word, ist("keep-alive"))) h1m->flags |= H1_MF_CONN_KAL; else if (isteqi(word, ist("close"))) h1m->flags |= H1_MF_CONN_CLO; else if (isteqi(word, ist("upgrade"))) h1m->flags |= H1_MF_CONN_UPG; word.ptr = n; } } /* This function parses a contiguous HTTP/1 headers block starting at * and ending before , at once, and converts it a list of (name,value) * pairs representing header fields into the array of size , * whose last entry will have an empty name and an empty value. If is * too small to represent the whole message, an error is returned. Some * protocol elements such as content-length and transfer-encoding will be * parsed and stored into h1m as well. may be null, in which case only * the parsing state will be updated. This may be used to restart the parsing * where it stopped for example. * * For now it's limited to the response. If the header block is incomplete, * 0 is returned, waiting to be called again with more data to try it again. * The caller is responsible for initializing h1m->state to H1_MSG_RPBEFORE, * and h1m->next to zero on the first call, the parser will do the rest. If * an incomplete message is seen, the caller only needs to present h1m->state * and h1m->next again, with an empty header list so that the parser can start * again. In this case, it will detect that it interrupted a previous session * and will first look for the end of the message before reparsing it again and * indexing it at the same time. This ensures that incomplete messages fed 1 * character at a time are never processed entirely more than exactly twice, * and that there is no need to store all the internal state and pre-parsed * headers or start line between calls. * * A pointer to a start line descriptor may be passed in , in which case * the parser will fill it with whatever it found. * * The code derived from the main HTTP/1 parser above but was simplified and * optimized to process responses produced or forwarded by haproxy. The caller * is responsible for ensuring that the message doesn't wrap, and should ensure * it is complete to avoid having to retry the operation after a failed * attempt. The message is not supposed to be invalid, which is why a few * properties such as the character set used in the header field names are not * checked. In case of an unparsable response message, a negative value will be * returned with h1m->err_pos and h1m->err_state matching the location and * state where the error was met. Leading blank likes are tolerated but not * recommended. * * This function returns : * -1 in case of error. In this case, h1m->err_state is filled (if h1m is * set) with the state the error occurred in and h1m->err_pos with the * the position relative to * -2 if the output is full (hdr_num reached). err_state and err_pos also * indicate where it failed. * 0 in case of missing data. * > 0 on success, it then corresponds to the number of bytes read since * so that the caller can go on with the payload. */ int h1_headers_to_hdr_list(char *start, const char *stop, struct http_hdr *hdr, unsigned int hdr_num, struct h1m *h1m, union h1_sl *slp) { enum h1m_state state; register char *ptr; register const char *end; unsigned int hdr_count; unsigned int skip; /* number of bytes skipped at the beginning */ unsigned int sol; /* start of line */ unsigned int col; /* position of the colon */ unsigned int eol; /* end of line */ unsigned int sov; /* start of value */ union h1_sl sl; int skip_update; int restarting; struct ist n, v; /* header name and value during parsing */ skip = 0; // do it only once to keep track of the leading CRLF. try_again: hdr_count = sol = col = eol = sov = 0; sl.st.status = 0; skip_update = restarting = 0; ptr = start + h1m->next; end = stop; state = h1m->state; if (state != H1_MSG_RQBEFORE && state != H1_MSG_RPBEFORE) restarting = 1; if (unlikely(ptr >= end)) goto http_msg_ood; /* don't update output if hdr is NULL or if we're restarting */ if (!hdr || restarting) skip_update = 1; switch (state) { case H1_MSG_RQBEFORE: http_msg_rqbefore: if (likely(HTTP_IS_TOKEN(*ptr))) { /* we have a start of message, we may have skipped some * heading CRLF. Skip them now. */ skip += ptr - start; start = ptr; sol = 0; sl.rq.m.ptr = ptr; hdr_count = 0; state = H1_MSG_RQMETH; goto http_msg_rqmeth; } if (unlikely(!HTTP_IS_CRLF(*ptr))) { state = H1_MSG_RQBEFORE; goto http_msg_invalid; } if (unlikely(*ptr == '\n')) EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rqbefore, http_msg_ood, state, H1_MSG_RQBEFORE); EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rqbefore_cr, http_msg_ood, state, H1_MSG_RQBEFORE_CR); /* stop here */ case H1_MSG_RQBEFORE_CR: http_msg_rqbefore_cr: EXPECT_LF_HERE(ptr, http_msg_invalid, state, H1_MSG_RQBEFORE_CR); EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rqbefore, http_msg_ood, state, H1_MSG_RQBEFORE); /* stop here */ case H1_MSG_RQMETH: http_msg_rqmeth: if (likely(HTTP_IS_TOKEN(*ptr))) EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rqmeth, http_msg_ood, state, H1_MSG_RQMETH); if (likely(HTTP_IS_SPHT(*ptr))) { sl.rq.m.len = ptr - sl.rq.m.ptr; sl.rq.meth = find_http_meth(start, sl.rq.m.len); EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rqmeth_sp, http_msg_ood, state, H1_MSG_RQMETH_SP); } if (likely(HTTP_IS_CRLF(*ptr))) { /* HTTP 0.9 request */ sl.rq.m.len = ptr - sl.rq.m.ptr; sl.rq.meth = find_http_meth(sl.rq.m.ptr, sl.rq.m.len); http_msg_req09_uri: sl.rq.u.ptr = ptr; http_msg_req09_uri_e: sl.rq.u.len = ptr - sl.rq.u.ptr; http_msg_req09_ver: sl.rq.v.ptr = ptr; sl.rq.v.len = 0; goto http_msg_rqline_eol; } state = H1_MSG_RQMETH; goto http_msg_invalid; case H1_MSG_RQMETH_SP: http_msg_rqmeth_sp: if (likely(!HTTP_IS_LWS(*ptr))) { sl.rq.u.ptr = ptr; goto http_msg_rquri; } if (likely(HTTP_IS_SPHT(*ptr))) EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rqmeth_sp, http_msg_ood, state, H1_MSG_RQMETH_SP); /* so it's a CR/LF, meaning an HTTP 0.9 request */ goto http_msg_req09_uri; case H1_MSG_RQURI: http_msg_rquri: #if defined(__x86_64__) || \ defined(__i386__) || defined(__i486__) || defined(__i586__) || defined(__i686__) || \ defined(__ARM_ARCH_7A__) /* speedup: skip bytes not between 0x21 and 0x7e inclusive */ while (ptr <= end - sizeof(int)) { int x = *(int *)ptr - 0x21212121; if (x & 0x80808080) break; x -= 0x5e5e5e5e; if (!(x & 0x80808080)) break; ptr += sizeof(int); } #endif if (ptr >= end) { state = H1_MSG_RQURI; goto http_msg_ood; } http_msg_rquri2: if (likely((unsigned char)(*ptr - 33) <= 93)) /* 33 to 126 included */ EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rquri2, http_msg_ood, state, H1_MSG_RQURI); if (likely(HTTP_IS_SPHT(*ptr))) { sl.rq.u.len = ptr - sl.rq.u.ptr; EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rquri_sp, http_msg_ood, state, H1_MSG_RQURI_SP); } if (likely((unsigned char)*ptr >= 128)) { /* non-ASCII chars are forbidden unless option * accept-invalid-http-request is enabled in the frontend. * In any case, we capture the faulty char. */ if (h1m->err_pos < -1) goto invalid_char; if (h1m->err_pos == -1) h1m->err_pos = ptr - start + skip; EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rquri, http_msg_ood, state, H1_MSG_RQURI); } if (likely(HTTP_IS_CRLF(*ptr))) { /* so it's a CR/LF, meaning an HTTP 0.9 request */ goto http_msg_req09_uri_e; } /* OK forbidden chars, 0..31 or 127 */ invalid_char: state = H1_MSG_RQURI; goto http_msg_invalid; case H1_MSG_RQURI_SP: http_msg_rquri_sp: if (likely(!HTTP_IS_LWS(*ptr))) { sl.rq.v.ptr = ptr; goto http_msg_rqver; } if (likely(HTTP_IS_SPHT(*ptr))) EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rquri_sp, http_msg_ood, state, H1_MSG_RQURI_SP); /* so it's a CR/LF, meaning an HTTP 0.9 request */ goto http_msg_req09_ver; case H1_MSG_RQVER: http_msg_rqver: if (likely(HTTP_IS_VER_TOKEN(*ptr))) EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rqver, http_msg_ood, state, H1_MSG_RQVER); if (likely(HTTP_IS_CRLF(*ptr))) { sl.rq.v.len = ptr - sl.rq.v.ptr; http_msg_rqline_eol: /* We have seen the end of line. Note that we do not * necessarily have the \n yet, but at least we know that we * have EITHER \r OR \n, otherwise the request would not be * complete. We can then record the request length and return * to the caller which will be able to register it. */ if (likely(!skip_update)) { if ((sl.rq.v.len == 8) && (*(sl.rq.v.ptr + 5) > '1' || (*(sl.rq.v.ptr + 5) == '1' && *(sl.rq.v.ptr + 7) >= '1'))) h1m->flags |= H1_MF_VER_11; if (unlikely(hdr_count >= hdr_num)) { state = H1_MSG_RQVER; goto http_output_full; } if (!(h1m->flags & H1_MF_NO_PHDR)) http_set_hdr(&hdr[hdr_count++], ist(":method"), sl.rq.m); if (unlikely(hdr_count >= hdr_num)) { state = H1_MSG_RQVER; goto http_output_full; } if (!(h1m->flags & H1_MF_NO_PHDR)) http_set_hdr(&hdr[hdr_count++], ist(":path"), sl.rq.u); } sol = ptr - start; if (likely(*ptr == '\r')) EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rqline_end, http_msg_ood, state, H1_MSG_RQLINE_END); goto http_msg_rqline_end; } /* neither an HTTP_VER token nor a CRLF */ state = H1_MSG_RQVER; goto http_msg_invalid; case H1_MSG_RQLINE_END: http_msg_rqline_end: /* check for HTTP/0.9 request : no version information * available. sol must point to the first of CR or LF. However * since we don't save these elements between calls, if we come * here from a restart, we don't necessarily know. Thus in this * case we simply start over. */ if (restarting) goto restart; if (unlikely(sl.rq.v.len == 0)) goto http_msg_last_lf; EXPECT_LF_HERE(ptr, http_msg_invalid, state, H1_MSG_RQLINE_END); EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_hdr_first, http_msg_ood, state, H1_MSG_HDR_FIRST); /* stop here */ /* * Common states below */ case H1_MSG_RPBEFORE: http_msg_rpbefore: if (likely(HTTP_IS_TOKEN(*ptr))) { /* we have a start of message, we may have skipped some * heading CRLF. Skip them now. */ skip += ptr - start; start = ptr; sol = 0; sl.st.v.ptr = ptr; hdr_count = 0; state = H1_MSG_RPVER; goto http_msg_rpver; } if (unlikely(!HTTP_IS_CRLF(*ptr))) { state = H1_MSG_RPBEFORE; goto http_msg_invalid; } if (unlikely(*ptr == '\n')) EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rpbefore, http_msg_ood, state, H1_MSG_RPBEFORE); EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rpbefore_cr, http_msg_ood, state, H1_MSG_RPBEFORE_CR); /* stop here */ case H1_MSG_RPBEFORE_CR: http_msg_rpbefore_cr: EXPECT_LF_HERE(ptr, http_msg_invalid, state, H1_MSG_RPBEFORE_CR); EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rpbefore, http_msg_ood, state, H1_MSG_RPBEFORE); /* stop here */ case H1_MSG_RPVER: http_msg_rpver: if (likely(HTTP_IS_VER_TOKEN(*ptr))) EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rpver, http_msg_ood, state, H1_MSG_RPVER); if (likely(HTTP_IS_SPHT(*ptr))) { sl.st.v.len = ptr - sl.st.v.ptr; if ((sl.st.v.len == 8) && (*(sl.st.v.ptr + 5) > '1' || (*(sl.st.v.ptr + 5) == '1' && *(sl.st.v.ptr + 7) >= '1'))) h1m->flags |= H1_MF_VER_11; EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rpver_sp, http_msg_ood, state, H1_MSG_RPVER_SP); } state = H1_MSG_RPVER; goto http_msg_invalid; case H1_MSG_RPVER_SP: http_msg_rpver_sp: if (likely(!HTTP_IS_LWS(*ptr))) { sl.st.status = 0; sl.st.c.ptr = ptr; goto http_msg_rpcode; } if (likely(HTTP_IS_SPHT(*ptr))) EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rpver_sp, http_msg_ood, state, H1_MSG_RPVER_SP); /* so it's a CR/LF, this is invalid */ state = H1_MSG_RPVER_SP; goto http_msg_invalid; case H1_MSG_RPCODE: http_msg_rpcode: if (likely(HTTP_IS_DIGIT(*ptr))) { sl.st.status = sl.st.status * 10 + *ptr - '0'; EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rpcode, http_msg_ood, state, H1_MSG_RPCODE); } if (unlikely(!HTTP_IS_LWS(*ptr))) { state = H1_MSG_RPCODE; goto http_msg_invalid; } if (likely(HTTP_IS_SPHT(*ptr))) { sl.st.c.len = ptr - sl.st.c.ptr; EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rpcode_sp, http_msg_ood, state, H1_MSG_RPCODE_SP); } /* so it's a CR/LF, so there is no reason phrase */ sl.st.c.len = ptr - sl.st.c.ptr; http_msg_rsp_reason: sl.st.r.ptr = ptr; sl.st.r.len = 0; goto http_msg_rpline_eol; case H1_MSG_RPCODE_SP: http_msg_rpcode_sp: if (likely(!HTTP_IS_LWS(*ptr))) { sl.st.r.ptr = ptr; goto http_msg_rpreason; } if (likely(HTTP_IS_SPHT(*ptr))) EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rpcode_sp, http_msg_ood, state, H1_MSG_RPCODE_SP); /* so it's a CR/LF, so there is no reason phrase */ goto http_msg_rsp_reason; case H1_MSG_RPREASON: http_msg_rpreason: if (likely(!HTTP_IS_CRLF(*ptr))) EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rpreason, http_msg_ood, state, H1_MSG_RPREASON); sl.st.r.len = ptr - sl.st.r.ptr; http_msg_rpline_eol: /* We have seen the end of line. Note that we do not * necessarily have the \n yet, but at least we know that we * have EITHER \r OR \n, otherwise the response would not be * complete. We can then record the response length and return * to the caller which will be able to register it. */ if (likely(!skip_update)) { if (unlikely(hdr_count >= hdr_num)) { state = H1_MSG_RPREASON; goto http_output_full; } if (!(h1m->flags & H1_MF_NO_PHDR)) http_set_hdr(&hdr[hdr_count++], ist(":status"), sl.st.c); } sol = ptr - start; if (likely(*ptr == '\r')) EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rpline_end, http_msg_ood, state, H1_MSG_RPLINE_END); goto http_msg_rpline_end; case H1_MSG_RPLINE_END: http_msg_rpline_end: /* sol must point to the first of CR or LF. */ EXPECT_LF_HERE(ptr, http_msg_invalid, state, H1_MSG_RPLINE_END); EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_hdr_first, http_msg_ood, state, H1_MSG_HDR_FIRST); /* stop here */ case H1_MSG_HDR_FIRST: http_msg_hdr_first: sol = ptr - start; if (likely(!HTTP_IS_CRLF(*ptr))) { goto http_msg_hdr_name; } if (likely(*ptr == '\r')) EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_last_lf, http_msg_ood, state, H1_MSG_LAST_LF); goto http_msg_last_lf; case H1_MSG_HDR_NAME: http_msg_hdr_name: /* assumes sol points to the first char */ if (likely(HTTP_IS_TOKEN(*ptr))) { if (!skip_update) { /* turn it to lower case if needed */ if (isupper((unsigned char)*ptr) && h1m->flags & H1_MF_TOLOWER) *ptr = tolower(*ptr); } EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_hdr_name, http_msg_ood, state, H1_MSG_HDR_NAME); } if (likely(*ptr == ':')) { col = ptr - start; EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_hdr_l1_sp, http_msg_ood, state, H1_MSG_HDR_L1_SP); } if (likely(h1m->err_pos < -1) || *ptr == '\n') { state = H1_MSG_HDR_NAME; goto http_msg_invalid; } if (h1m->err_pos == -1) /* capture the error pointer */ h1m->err_pos = ptr - start + skip; /* >= 0 now */ /* and we still accept this non-token character */ EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_hdr_name, http_msg_ood, state, H1_MSG_HDR_NAME); case H1_MSG_HDR_L1_SP: http_msg_hdr_l1_sp: /* assumes sol points to the first char */ if (likely(HTTP_IS_SPHT(*ptr))) EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_hdr_l1_sp, http_msg_ood, state, H1_MSG_HDR_L1_SP); /* header value can be basically anything except CR/LF */ sov = ptr - start; if (likely(!HTTP_IS_CRLF(*ptr))) { goto http_msg_hdr_val; } if (likely(*ptr == '\r')) EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_hdr_l1_lf, http_msg_ood, state, H1_MSG_HDR_L1_LF); goto http_msg_hdr_l1_lf; case H1_MSG_HDR_L1_LF: http_msg_hdr_l1_lf: EXPECT_LF_HERE(ptr, http_msg_invalid, state, H1_MSG_HDR_L1_LF); EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_hdr_l1_lws, http_msg_ood, state, H1_MSG_HDR_L1_LWS); case H1_MSG_HDR_L1_LWS: http_msg_hdr_l1_lws: if (likely(HTTP_IS_SPHT(*ptr))) { if (!skip_update) { /* replace HT,CR,LF with spaces */ for (; start + sov < ptr; sov++) start[sov] = ' '; } goto http_msg_hdr_l1_sp; } /* we had a header consisting only in spaces ! */ eol = sov; goto http_msg_complete_header; case H1_MSG_HDR_VAL: http_msg_hdr_val: /* assumes sol points to the first char, and sov * points to the first character of the value. */ /* speedup: we'll skip packs of 4 or 8 bytes not containing bytes 0x0D * and lower. In fact since most of the time is spent in the loop, we * also remove the sign bit test so that bytes 0x8e..0x0d break the * loop, but we don't care since they're very rare in header values. */ #if defined(__x86_64__) while (ptr <= end - sizeof(long)) { if ((*(long *)ptr - 0x0e0e0e0e0e0e0e0eULL) & 0x8080808080808080ULL) goto http_msg_hdr_val2; ptr += sizeof(long); } #endif #if defined(__x86_64__) || \ defined(__i386__) || defined(__i486__) || defined(__i586__) || defined(__i686__) || \ defined(__ARM_ARCH_7A__) while (ptr <= end - sizeof(int)) { if ((*(int*)ptr - 0x0e0e0e0e) & 0x80808080) goto http_msg_hdr_val2; ptr += sizeof(int); } #endif if (ptr >= end) { state = H1_MSG_HDR_VAL; goto http_msg_ood; } http_msg_hdr_val2: if (likely(!HTTP_IS_CRLF(*ptr))) EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_hdr_val2, http_msg_ood, state, H1_MSG_HDR_VAL); eol = ptr - start; /* Note: we could also copy eol into ->eoh so that we have the * real header end in case it ends with lots of LWS, but is this * really needed ? */ if (likely(*ptr == '\r')) EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_hdr_l2_lf, http_msg_ood, state, H1_MSG_HDR_L2_LF); goto http_msg_hdr_l2_lf; case H1_MSG_HDR_L2_LF: http_msg_hdr_l2_lf: EXPECT_LF_HERE(ptr, http_msg_invalid, state, H1_MSG_HDR_L2_LF); EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_hdr_l2_lws, http_msg_ood, state, H1_MSG_HDR_L2_LWS); case H1_MSG_HDR_L2_LWS: http_msg_hdr_l2_lws: if (unlikely(HTTP_IS_SPHT(*ptr))) { if (!skip_update) { /* LWS: replace HT,CR,LF with spaces */ for (; start + eol < ptr; eol++) start[eol] = ' '; } goto http_msg_hdr_val; } http_msg_complete_header: /* * It was a new header, so the last one is finished. Assumes * points to the first char of the name, to the * colon, points to the first character of the value and * to the first CR or LF so we know how the line ends. We * will trim spaces around the value. It's possible to do it by * adjusting and which are no more used after this. * We can add the header field to the list. */ if (likely(!skip_update)) { while (sov < eol && HTTP_IS_LWS(start[sov])) sov++; while (eol - 1 > sov && HTTP_IS_LWS(start[eol - 1])) eol--; n = ist2(start + sol, col - sol); v = ist2(start + sov, eol - sov); do { int ret; if (unlikely(hdr_count >= hdr_num)) { state = H1_MSG_HDR_L2_LWS; goto http_output_full; } if (isteqi(n, ist("transfer-encoding"))) { h1_parse_xfer_enc_header(h1m, v); } else if (isteqi(n, ist("content-length"))) { ret = h1_parse_cont_len_header(h1m, &v); if (ret < 0) { state = H1_MSG_HDR_L2_LWS; goto http_msg_invalid; } else if (ret == 0) { /* skip it */ break; } } else if (isteqi(n, ist("connection"))) { h1_parse_connection_header(h1m, v); } http_set_hdr(&hdr[hdr_count++], n, v); } while (0); } sol = ptr - start; if (likely(!HTTP_IS_CRLF(*ptr))) goto http_msg_hdr_name; if (likely(*ptr == '\r')) EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_last_lf, http_msg_ood, state, H1_MSG_LAST_LF); goto http_msg_last_lf; case H1_MSG_LAST_LF: http_msg_last_lf: EXPECT_LF_HERE(ptr, http_msg_invalid, state, H1_MSG_LAST_LF); ptr++; /* now points to the first byte of payload. If needed sol * still points to the first of either CR or LF of the empty * line ending the headers block. */ if (likely(!skip_update)) { if (unlikely(hdr_count >= hdr_num)) { state = H1_MSG_LAST_LF; goto http_output_full; } http_set_hdr(&hdr[hdr_count++], ist2(start+sol, 0), ist("")); } /* reaching here we've parsed the whole message. We may detect * that we were already continuing an interrupted parsing pass * so we were silently looking for the end of message not * updating anything before deciding to parse it fully at once. * It's guaranteed that we won't match this test twice in a row * since restarting will turn zero. */ if (restarting) goto restart; state = H1_MSG_DATA; if (h1m->flags & H1_MF_XFER_ENC) { if (h1m->flags & H1_MF_CLEN) { h1m->flags &= ~H1_MF_CLEN; hdr_count = http_del_hdr(hdr, ist("content-length")); } if (h1m->flags & H1_MF_CHNK) state = H1_MSG_CHUNK_SIZE; else if (!(h1m->flags & H1_MF_RESP)) { /* cf RFC7230#3.3.3 : transfer-encoding in * request without chunked encoding is invalid. */ goto http_msg_invalid; } } break; default: /* impossible states */ goto http_msg_invalid; } /* Now we've left the headers state and are either in H1_MSG_DATA or * H1_MSG_CHUNK_SIZE. */ if (slp && !skip_update) *slp = sl; h1m->state = state; h1m->next = ptr - start + skip; return h1m->next; http_msg_ood: /* out of data at during state */ if (slp && !skip_update) *slp = sl; h1m->state = state; h1m->next = ptr - start + skip; return 0; http_msg_invalid: /* invalid message, error at */ if (slp && !skip_update) *slp = sl; h1m->err_state = h1m->state = state; h1m->err_pos = h1m->next = ptr - start + skip; return -1; http_output_full: /* no more room to store the current header, error at */ if (slp && !skip_update) *slp = sl; h1m->err_state = h1m->state = state; h1m->err_pos = h1m->next = ptr - start + skip; return -2; restart: h1m->next = 0; if (h1m->flags & H1_MF_RESP) h1m->state = H1_MSG_RPBEFORE; else h1m->state = H1_MSG_RQBEFORE; goto try_again; } /* This function performs a very minimal parsing of the trailers block present * at offset in for up to bytes, and returns the number of * bytes to delete to skip the trailers. It may return 0 if it's missing some * input data, or < 0 in case of parse error (in which case the caller may have * to decide how to proceed, possibly eating everything). */ int h1_measure_trailers(const struct buffer *buf, unsigned int ofs, unsigned int max) { const char *stop = b_peek(buf, ofs + max); int count = ofs; while (1) { const char *p1 = NULL, *p2 = NULL; const char *start = b_peek(buf, count); const char *ptr = start; /* scan current line and stop at LF or CRLF */ while (1) { if (ptr == stop) return 0; if (*ptr == '\n') { if (!p1) p1 = ptr; p2 = ptr; break; } if (*ptr == '\r') { if (p1) return -1; p1 = ptr; } ptr = b_next(buf, ptr); } /* after LF; point to beginning of next line */ p2 = b_next(buf, p2); count += b_dist(buf, start, p2); /* LF/CRLF at beginning of line => end of trailers at p2. * Everything was scheduled for forwarding, there's nothing left * from this message. */ if (p1 == start) break; /* OK, next line then */ } return count - ofs; } /* This function skips trailers in the buffer associated with HTTP message * . The first visited position is msg->next. If the end of the trailers is * found, the function returns >0. So, the caller can automatically schedul it * to be forwarded, and switch msg->msg_state to HTTP_MSG_DONE. If not enough * data are available, the function does not change anything except maybe * msg->sol if it could parse some lines, and returns zero. If a parse error * is encountered, the function returns < 0 and does not change anything except * maybe msg->sol. Note that the message must already be in HTTP_MSG_TRAILERS * state before calling this function, which implies that all non-trailers data * have already been scheduled for forwarding, and that msg->next exactly * matches the length of trailers already parsed and not forwarded. It is also * important to note that this function is designed to be able to parse wrapped * headers at end of buffer. */ int http_forward_trailers(struct http_msg *msg) { const struct buffer *buf = &msg->chn->buf; const char *parse = ci_head(msg->chn); const char *stop = b_tail(buf); /* we have msg->next which points to next line. Look for CRLF. But * first, we reset msg->sol */ msg->sol = 0; while (1) { const char *p1 = NULL, *p2 = NULL; const char *start = c_ptr(msg->chn, msg->next + msg->sol); const char *ptr = start; /* scan current line and stop at LF or CRLF */ while (1) { if (ptr == stop) return 0; if (*ptr == '\n') { if (!p1) p1 = ptr; p2 = ptr; break; } if (*ptr == '\r') { if (p1) { msg->err_pos = b_dist(buf, parse, ptr); return -1; } p1 = ptr; } ptr = b_next(buf, ptr); } /* after LF; point to beginning of next line */ p2 = b_next(buf, p2); msg->sol += b_dist(buf, start, p2); /* LF/CRLF at beginning of line => end of trailers at p2. * Everything was scheduled for forwarding, there's nothing left * from this message. */ if (p1 == start) return 1; /* OK, next line then */ } }