From 89c68c8117dc18a2f25999428b4bfcef83f7069e Mon Sep 17 00:00:00 2001 From: Amaury Denoyelle Date: Tue, 6 Jul 2021 10:48:44 +0200 Subject: [PATCH] MINOR: http: implement http uri parser Implement a http uri parser type. This type will be used as a context to parse the various elements of an uri. The goal of this serie of patches is to factorize duplicated code between the http_get_scheme/authority/path functions. A simple parsing API is designed to be able to extract once each element of an HTTP URI in order. The functions will be renamed in the following patches to reflect the API change with the prefix http_parse_*. For the parser API, the http_uri_parser type must first be initialized before usage. It will register the URI to parse and detect its format according to the rfc 7230. --- include/haproxy/http-t.h | 38 ++++++++++++++++++++++++++++++++++++++ include/haproxy/http.h | 36 ++++++++++++++++++++++++++++++++++++ 2 files changed, 74 insertions(+) diff --git a/include/haproxy/http-t.h b/include/haproxy/http-t.h index ab4756c11..140a9beed 100644 --- a/include/haproxy/http-t.h +++ b/include/haproxy/http-t.h @@ -131,6 +131,44 @@ enum http_etag_type { ETAG_WEAK }; +/* Indicates what elements have been parsed in a HTTP URI. */ +enum http_uri_parser_state { + URI_PARSER_STATE_BEFORE = 0, +}; + +/* HTTP URI format as described in rfc 7230 5.3. + * As the first character is used to identify the format, absolute-form and + * authority-form are not differentiated. + */ +enum http_uri_parser_format { + URI_PARSER_FORMAT_EMPTY, + URI_PARSER_FORMAT_ASTERISK, + URI_PARSER_FORMAT_ABSPATH, + URI_PARSER_FORMAT_ABSURI_OR_AUTHORITY, +}; + +/* Parser context for a HTTP URI. Must be initialized with http_uri_parser_init + * before its usage. + * + * The parser API is not idempotent. For an initialized parser instance, each + * URI element can be extracted only once using its related function : + * - http_parse_scheme + * - http_parse_authority + * - http_parse_path + * + * Also each element must be extracted in the order of its appearance in the + * URI according to the rfc 3986. However, it is possible to skip the parsing + * of elements which are of no interest. + * + * If the above rules are not respected, the parsing functions return an empty + * ist. + */ +struct http_uri_parser { + struct ist uri; /* HTTP URI for parsing */ + enum http_uri_parser_state state; /* already parsed HTTP URI elements */ + enum http_uri_parser_format format; /* rfc 7230 5.3 HTTP URI format */ +}; + #endif /* _HAPROXY_HTTP_T_H */ /* diff --git a/include/haproxy/http.h b/include/haproxy/http.h index 991eece49..faf00a034 100644 --- a/include/haproxy/http.h +++ b/include/haproxy/http.h @@ -131,6 +131,42 @@ static inline enum http_etag_type http_get_etag_type(const struct ist etag) return ETAG_INVALID; } +/* Initialize a HTTP URI parser to use it with http URI parsing functions. The + * URI format is detected according to its first character. + */ +static inline struct http_uri_parser http_uri_parser_init(const struct ist uri) +{ + struct http_uri_parser parser = { + .uri = uri, + .state = URI_PARSER_STATE_BEFORE, + }; + + /* RFC7230, par. 2.7 : + * Request-URI = "*" | absuri | abspath | authority + */ + + if (!istlen(parser.uri)) { + parser.format = URI_PARSER_FORMAT_EMPTY; + } + else { + /* detect the format according to the first URI character */ + switch (*istptr(parser.uri)) { + case '*': + parser.format = URI_PARSER_FORMAT_ASTERISK; + break; + + case '/': + parser.format = URI_PARSER_FORMAT_ABSPATH; + break; + + default: + parser.format = URI_PARSER_FORMAT_ABSURI_OR_AUTHORITY; + break; + } + } + + return parser; +} #endif /* _HAPROXY_HTTP_H */