MINOR: http: use http uri parser for authority

Replace http_get_authority by the http_uri_parser API.

The new function is renamed http_parse_authority. Replace duplicated
scheme parsing code by http_parse_scheme invocation. A new
http_uri_parser state is declared to mark the authority parsing as done.
This commit is contained in:
Amaury Denoyelle 2021-07-06 11:02:22 +02:00
parent 8ac8cbfd72
commit 69294b20ac
5 changed files with 28 additions and 37 deletions

View File

@ -135,6 +135,7 @@ enum http_etag_type {
enum http_uri_parser_state {
URI_PARSER_STATE_BEFORE = 0,
URI_PARSER_STATE_SCHEME_DONE,
URI_PARSER_STATE_AUTHORITY_DONE,
};
/* HTTP URI format as described in rfc 7230 5.3.

View File

@ -37,7 +37,7 @@ enum http_meth_t find_http_meth(const char *str, const int len);
int http_get_status_idx(unsigned int status);
const char *http_get_reason(unsigned int status);
struct ist http_parse_scheme(struct http_uri_parser *parser);
struct ist http_get_authority(const struct ist uri, int no_userinfo);
struct ist http_parse_authority(struct http_uri_parser *parser, int no_userinfo);
struct ist http_get_path(const struct ist uri);
int http_header_match2(const char *hdr, const char *end,
const char *name, int len);

View File

@ -871,8 +871,9 @@ int h1_headers_to_hdr_list(char *start, const char *stop,
else if (!(h1m->flags & (H1_MF_HDRS_ONLY|H1_MF_RESP)) && isteqi(n, ist("host"))) {
if (host_idx == -1) {
struct ist authority;
struct http_uri_parser parser = http_uri_parser_init(sl.rq.u);
authority = http_get_authority(sl.rq.u, 1);
authority = http_parse_authority(&parser, 1);
if (authority.len && !isteqi(v, authority)) {
if (h1m->err_pos < -1) {
state = H1_MSG_HDR_L2_LWS;

View File

@ -523,55 +523,40 @@ struct ist http_parse_scheme(struct http_uri_parser *parser)
* path. if no_userinfo is not zero, the part before the '@' (including it) is
* skipped. If not found, an empty ist is returned. Otherwise, the ist pointing
* on the authority is returned.
*
* <parser> must have been initialized via http_uri_parser_init. See the
* related http_uri_parser documentation for the specific API usage.
*/
struct ist http_get_authority(const struct ist uri, int no_userinfo)
struct ist http_parse_authority(struct http_uri_parser *parser, int no_userinfo)
{
const char *ptr, *start, *end;
if (!uri.len)
if (parser->state >= URI_PARSER_STATE_AUTHORITY_DONE)
goto not_found;
ptr = uri.ptr;
start = ptr;
end = ptr + uri.len;
/* RFC7230, par. 2.7 :
* Request-URI = "*" | absuri | abspath | authority
*/
if (*ptr == '*' || *ptr == '/')
if (parser->format != URI_PARSER_FORMAT_ABSURI_OR_AUTHORITY)
goto not_found;
if (isalpha((unsigned char)*ptr)) {
/* this is a scheme as described by RFC3986, par. 3.1, or only
* an authority (in case of a CONNECT method).
*/
ptr++;
while (ptr < end &&
(isalnum((unsigned char)*ptr) || *ptr == '+' || *ptr == '-' || *ptr == '.'))
ptr++;
/* skip '://' or take the whole as authority if not found */
if (ptr == end || *ptr++ != ':')
goto authority;
if (ptr == end || *ptr++ != '/')
goto authority;
if (ptr == end || *ptr++ != '/')
goto authority;
}
if (parser->state < URI_PARSER_STATE_SCHEME_DONE)
http_parse_scheme(parser);
ptr = start = istptr(parser->uri);
end = istend(parser->uri);
start = ptr;
while (ptr < end && *ptr != '/') {
if (*ptr++ == '@' && no_userinfo)
start = ptr;
}
/* OK, ptr point on the '/' or the end */
end = ptr;
authority:
return ist2(start, end - start);
parser->uri = ist2(ptr, end - ptr);
parser->state = URI_PARSER_STATE_AUTHORITY_DONE;
return ist2(start, ptr - start);
not_found:
parser->state = URI_PARSER_STATE_AUTHORITY_DONE;
return IST_NULL;
}

View File

@ -691,9 +691,11 @@ int http_update_authority(struct htx *htx, struct htx_sl *sl, const struct ist h
{
struct buffer *temp = get_trash_chunk();
struct ist meth, vsn, uri, authority;
struct http_uri_parser parser;
uri = htx_sl_req_uri(sl);
authority = http_get_authority(uri, 1);
parser = http_uri_parser_init(uri);
authority = http_parse_authority(&parser, 1);
if (!authority.len)
return 0;
@ -728,9 +730,11 @@ int http_update_host(struct htx *htx, struct htx_sl *sl, const struct ist uri)
{
struct ist authority;
struct http_hdr_ctx ctx;
struct http_uri_parser parser = http_uri_parser_init(uri);
if (!uri.len || uri.ptr[0] == '/' || uri.ptr[0] == '*') {
// origin-form or a asterisk-form (RFC7320 #5.3.1 and #5.3.4)
if (parser.format == URI_PARSER_FORMAT_EMPTY ||
parser.format == URI_PARSER_FORMAT_ASTERISK ||
parser.format == URI_PARSER_FORMAT_ABSPATH) {
sl->flags &= ~HTX_SL_F_HAS_AUTHORITY;
}
else {
@ -741,7 +745,7 @@ int http_update_host(struct htx *htx, struct htx_sl *sl, const struct ist uri)
if (uri.len > 4 && (uri.ptr[0] | 0x20) == 'h')
sl->flags |= ((uri.ptr[4] == ':') ? HTX_SL_F_SCHM_HTTP : HTX_SL_F_SCHM_HTTPS);
authority = http_get_authority(uri, 1);
authority = http_parse_authority(&parser, 1);
if (!authority.len)
goto fail;
}
@ -1759,7 +1763,7 @@ int http_scheme_based_normalize(struct htx *htx)
* hostnames, do a reverse search on the last ':' separator as long as
* digits are found.
*/
authority = http_get_authority(uri, 0);
authority = http_parse_authority(&parser, 0);
start = istptr(authority);
end = istend(authority);
for (ptr = end; ptr > start && isdigit((unsigned char)*--ptr); )