diff --git a/include/haproxy/http-t.h b/include/haproxy/http-t.h index ab4756c11..140a9beed 100644 --- a/include/haproxy/http-t.h +++ b/include/haproxy/http-t.h @@ -131,6 +131,44 @@ enum http_etag_type { ETAG_WEAK }; +/* Indicates what elements have been parsed in a HTTP URI. */ +enum http_uri_parser_state { + URI_PARSER_STATE_BEFORE = 0, +}; + +/* HTTP URI format as described in rfc 7230 5.3. + * As the first character is used to identify the format, absolute-form and + * authority-form are not differentiated. + */ +enum http_uri_parser_format { + URI_PARSER_FORMAT_EMPTY, + URI_PARSER_FORMAT_ASTERISK, + URI_PARSER_FORMAT_ABSPATH, + URI_PARSER_FORMAT_ABSURI_OR_AUTHORITY, +}; + +/* Parser context for a HTTP URI. Must be initialized with http_uri_parser_init + * before its usage. + * + * The parser API is not idempotent. For an initialized parser instance, each + * URI element can be extracted only once using its related function : + * - http_parse_scheme + * - http_parse_authority + * - http_parse_path + * + * Also each element must be extracted in the order of its appearance in the + * URI according to the rfc 3986. However, it is possible to skip the parsing + * of elements which are of no interest. + * + * If the above rules are not respected, the parsing functions return an empty + * ist. + */ +struct http_uri_parser { + struct ist uri; /* HTTP URI for parsing */ + enum http_uri_parser_state state; /* already parsed HTTP URI elements */ + enum http_uri_parser_format format; /* rfc 7230 5.3 HTTP URI format */ +}; + #endif /* _HAPROXY_HTTP_T_H */ /* diff --git a/include/haproxy/http.h b/include/haproxy/http.h index 991eece49..faf00a034 100644 --- a/include/haproxy/http.h +++ b/include/haproxy/http.h @@ -131,6 +131,42 @@ static inline enum http_etag_type http_get_etag_type(const struct ist etag) return ETAG_INVALID; } +/* Initialize a HTTP URI parser to use it with http URI parsing functions. The + * URI format is detected according to its first character. + */ +static inline struct http_uri_parser http_uri_parser_init(const struct ist uri) +{ + struct http_uri_parser parser = { + .uri = uri, + .state = URI_PARSER_STATE_BEFORE, + }; + + /* RFC7230, par. 2.7 : + * Request-URI = "*" | absuri | abspath | authority + */ + + if (!istlen(parser.uri)) { + parser.format = URI_PARSER_FORMAT_EMPTY; + } + else { + /* detect the format according to the first URI character */ + switch (*istptr(parser.uri)) { + case '*': + parser.format = URI_PARSER_FORMAT_ASTERISK; + break; + + case '/': + parser.format = URI_PARSER_FORMAT_ABSPATH; + break; + + default: + parser.format = URI_PARSER_FORMAT_ABSURI_OR_AUTHORITY; + break; + } + } + + return parser; +} #endif /* _HAPROXY_HTTP_H */