MINOR: http: implement http_get_scheme

This method can be used to retrieve the scheme part of an uri, with the
suffix '://'. It will be useful to implement scheme-based normalization.
This commit is contained in:
Amaury Denoyelle 2021-07-07 10:49:25 +02:00 committed by Christopher Faulet
parent 5b654ad42c
commit ef08811240
2 changed files with 51 additions and 0 deletions

View File

@ -36,6 +36,7 @@ extern const uint8_t http_char_classes[256];
enum http_meth_t find_http_meth(const char *str, const int len);
int http_get_status_idx(unsigned int status);
const char *http_get_reason(unsigned int status);
struct ist http_get_scheme(const struct ist uri);
struct ist http_get_authority(const struct ist uri, int no_userinfo);
struct ist http_get_path(const struct ist uri);
int http_header_match2(const char *hdr, const char *end,

View File

@ -468,6 +468,56 @@ const char *http_get_reason(unsigned int status)
}
}
/* Parse the uri and looks for the scheme. If not found, an empty ist is
* returned. Otherwise, the ist pointing to the scheme is returned.
*/
struct ist http_get_scheme(const struct ist uri)
{
const char *ptr, *start, *end;
if (!uri.len)
goto not_found;
ptr = uri.ptr;
start = ptr;
end = ptr + uri.len;
/* RFC7230, par. 2.7 :
* Request-URI = "*" | absuri | abspath | authority
*/
if (*ptr == '*' || *ptr == '/')
goto not_found;
if (isalpha((unsigned char)*ptr)) {
/* this is a scheme as described by RFC3986, par. 3.1, or only
* an authority (in case of a CONNECT method).
*/
ptr++;
/* retrieve the scheme up to the suffix '://'. If the suffix is
* not found, this means there is no scheme and it is an
* authority-only uri.
*/
while (ptr < end &&
(isalnum((unsigned char)*ptr) || *ptr == '+' || *ptr == '-' || *ptr == '.'))
ptr++;
if (ptr == end || *ptr++ != ':')
goto not_found;
if (ptr == end || *ptr++ != '/')
goto not_found;
if (ptr == end || *ptr++ != '/')
goto not_found;
}
else {
goto not_found;
}
return ist2(start, ptr - start);
not_found:
return IST_NULL;
}
/* Parse the uri and looks for the authority, between the scheme and the
* path. if no_userinfo is not zero, the part before the '@' (including it) is
* skipped. If not found, an empty ist is returned. Otherwise, the ist pointing