MINOR: uri_normalizer: Add a percent-decode-unreserved normalizer

This normalizer decodes percent encoded characters within the RFC 3986
unreserved set.

See GitHub Issue #714.
This commit is contained in:
Tim Duesterhus 2021-04-21 21:20:36 +02:00 committed by Christopher Faulet
parent d6d33deaea
commit 2e4a18e04a
6 changed files with 247 additions and 3 deletions

View File

@ -6029,6 +6029,7 @@ http-request normalize-uri <normalizer> [ { if | unless } <condition> ]
http-request normalize-uri path-merge-slashes [ { if | unless } <condition> ] http-request normalize-uri path-merge-slashes [ { if | unless } <condition> ]
http-request normalize-uri path-strip-dot [ { if | unless } <condition> ] http-request normalize-uri path-strip-dot [ { if | unless } <condition> ]
http-request normalize-uri path-strip-dotdot [ full ] [ { if | unless } <condition> ] http-request normalize-uri path-strip-dotdot [ full ] [ { if | unless } <condition> ]
http-request normalize-uri percent-decode-unreserved [ strict ] [ { if | unless } <condition> ]
http-request normalize-uri percent-to-uppercase [ strict ] [ { if | unless } <condition> ] http-request normalize-uri percent-to-uppercase [ strict ] [ { if | unless } <condition> ]
http-request normalize-uri query-sort-by-name [ { if | unless } <condition> ] http-request normalize-uri query-sort-by-name [ { if | unless } <condition> ]
@ -6048,11 +6049,25 @@ http-request normalize-uri query-sort-by-name [ { if | unless } <condition> ]
filesystem. However it might break routing of an API that expects a specific filesystem. However it might break routing of an API that expects a specific
number of segments in the path. number of segments in the path.
It is important to note that some normalizers might result in unsafe
transformations for broken URIs. It might also be possible that a combination
of normalizers that are safe by themselves results in unsafe transformations
when improperly combined.
As an example the "percent-decode-unreserved" normalizer might result in
unexpected results when a broken URI includes bare percent characters. One
such a broken URI is "/%%36%36" which would be decoded to "/%66" which in
turn is equivalent to "/f". By specifying the "strict" option requests to
such a broken URI would safely be rejected.
The following normalizers are available: The following normalizers are available:
- path-strip-dot: Removes "/./" segments within the "path" component - path-strip-dot: Removes "/./" segments within the "path" component
(RFC 3986#6.2.2.3). (RFC 3986#6.2.2.3).
Segments including percent encoded dots ("%2E") will not be detected. Use
the "percent-decode-unreserved" normalizer first if this is undesired.
Example: Example:
- /. -> / - /. -> /
- /./bar/ -> /bar/ - /./bar/ -> /bar/
@ -6063,8 +6078,13 @@ http-request normalize-uri query-sort-by-name [ { if | unless } <condition> ]
(RFC 3986#6.2.2.3). (RFC 3986#6.2.2.3).
This merges segments that attempt to access the parent directory with This merges segments that attempt to access the parent directory with
their preceding segment. Empty segments do not receive special treatment. their preceding segment.
Use the "path-merge-slashes" normalizer first if this is undesired.
Empty segments do not receive special treatment. Use the "merge-slashes"
normalizer first if this is undesired.
Segments including percent encoded dots ("%2E") will not be detected. Use
the "percent-decode-unreserved" normalizer first if this is undesired.
Example: Example:
- /foo/../ -> / - /foo/../ -> /
@ -6073,6 +6093,7 @@ http-request normalize-uri query-sort-by-name [ { if | unless } <condition> ]
- /../bar/ -> /../bar/ - /../bar/ -> /../bar/
- /bar/../../ -> /../ - /bar/../../ -> /../
- /foo//../ -> /foo/ - /foo//../ -> /foo/
- /foo/%2E%2E/ -> /foo/%2E%2E/
If the "full" option is specified then "../" at the beginning will be If the "full" option is specified then "../" at the beginning will be
removed as well: removed as well:
@ -6088,6 +6109,25 @@ http-request normalize-uri query-sort-by-name [ { if | unless } <condition> ]
- // -> / - // -> /
- /foo//bar -> /foo/bar - /foo//bar -> /foo/bar
- percent-decode-unreserved: Decodes unreserved percent encoded characters to
their representation as a regular character (RFC 3986#6.2.2.2).
The set of unreserved characters includes all letters, all digits, "-",
".", "_", and "~".
Example:
- /%61dmin -> /admin
- /foo%3Fbar=baz -> /foo%3Fbar=baz (no change)
- /%%36%36 -> /%66 (unsafe)
- /%ZZ -> /%ZZ
If the "strict" option is specified then invalid sequences will result
in a HTTP 400 Bad Request being returned.
Example:
- /%%36%36 -> HTTP 400
- /%ZZ -> HTTP 400
- percent-to-uppercase: Uppercases letters within percent-encoded sequences - percent-to-uppercase: Uppercases letters within percent-encoded sequences
(RFC 3986#6.2.2.1). (RFC 3986#6.2.2.1).

View File

@ -109,6 +109,8 @@ enum act_normalize_uri {
ACT_NORMALIZE_URI_QUERY_SORT_BY_NAME, ACT_NORMALIZE_URI_QUERY_SORT_BY_NAME,
ACT_NORMALIZE_URI_PERCENT_TO_UPPERCASE, ACT_NORMALIZE_URI_PERCENT_TO_UPPERCASE,
ACT_NORMALIZE_URI_PERCENT_TO_UPPERCASE_STRICT, ACT_NORMALIZE_URI_PERCENT_TO_UPPERCASE_STRICT,
ACT_NORMALIZE_URI_PERCENT_DECODE_UNRESERVED,
ACT_NORMALIZE_URI_PERCENT_DECODE_UNRESERVED_STRICT,
}; };
/* NOTE: if <.action_ptr> is defined, the referenced function will always be /* NOTE: if <.action_ptr> is defined, the referenced function will always be

View File

@ -18,6 +18,7 @@
#include <haproxy/uri_normalizer-t.h> #include <haproxy/uri_normalizer-t.h>
enum uri_normalizer_err uri_normalizer_percent_decode_unreserved(const struct ist input, int strict, struct ist *dst);
enum uri_normalizer_err uri_normalizer_percent_upper(const struct ist input, int strict, struct ist *dst); enum uri_normalizer_err uri_normalizer_percent_upper(const struct ist input, int strict, struct ist *dst);
enum uri_normalizer_err uri_normalizer_path_dot(const struct ist path, struct ist *dst); enum uri_normalizer_err uri_normalizer_path_dot(const struct ist path, struct ist *dst);
enum uri_normalizer_err uri_normalizer_path_dotdot(const struct ist path, int full, struct ist *dst); enum uri_normalizer_err uri_normalizer_path_dotdot(const struct ist path, int full, struct ist *dst);

View File

@ -8,7 +8,7 @@ feature ignore_unknown_macro
server s1 { server s1 {
rxreq rxreq
txresp txresp
} -repeat 54 -start } -repeat 63 -start
haproxy h1 -conf { haproxy h1 -conf {
defaults defaults
@ -94,6 +94,30 @@ haproxy h1 -conf {
default_backend be default_backend be
frontend fe_percent_decode_unreserved
bind "fd@${fe_percent_decode_unreserved}"
http-request set-var(txn.before) url
http-request normalize-uri percent-decode-unreserved
http-request set-var(txn.after) url
http-response add-header before %[var(txn.before)]
http-response add-header after %[var(txn.after)]
default_backend be
frontend fe_percent_decode_unreserved_strict
bind "fd@${fe_percent_decode_unreserved_strict}"
http-request set-var(txn.before) url
http-request normalize-uri percent-decode-unreserved strict
http-request set-var(txn.after) url
http-response add-header before %[var(txn.before)]
http-response add-header after %[var(txn.after)]
default_backend be
backend be backend be
server s1 ${s1_addr}:${s1_port} server s1 ${s1_addr}:${s1_port}
@ -391,3 +415,52 @@ client c6 -connect ${h1_fe_dot_sock} {
expect resp.http.before == "/?a=/./" expect resp.http.before == "/?a=/./"
expect resp.http.after == "/?a=/./" expect resp.http.after == "/?a=/./"
} -run } -run
client c7 -connect ${h1_fe_percent_decode_unreserved_sock} {
txreq -url "/a?a=a"
rxresp
expect resp.http.before == "/a?a=a"
expect resp.http.after == "/a?a=a"
txreq -url "/%61?%61=%61"
rxresp
expect resp.http.before == "/%61?%61=%61"
expect resp.http.after == "/a?a=a"
txreq -url "/%3F?foo=bar"
rxresp
expect resp.http.before == "/%3F?foo=bar"
expect resp.http.after == "/%3F?foo=bar"
txreq -url "/%%36%36"
rxresp
expect resp.status == 200
expect resp.http.before == "/%%36%36"
expect resp.http.after == "/%66"
txreq -req OPTIONS -url "*"
rxresp
expect resp.http.before == "*"
expect resp.http.after == "*"
} -run
client c8 -connect ${h1_fe_percent_decode_unreserved_strict_sock} {
txreq -url "/a?a=a"
rxresp
expect resp.http.before == "/a?a=a"
expect resp.http.after == "/a?a=a"
txreq -url "/%61?%61=%61"
rxresp
expect resp.http.before == "/%61?%61=%61"
expect resp.http.after == "/a?a=a"
txreq -url "/%3F?foo=bar"
rxresp
expect resp.http.before == "/%3F?foo=bar"
expect resp.http.after == "/%3F?foo=bar"
txreq -url "/%%36%36"
rxresp
expect resp.status == 400
} -run

View File

@ -294,6 +294,24 @@ static enum act_return http_action_normalize_uri(struct act_rule *rule, struct p
err = uri_normalizer_percent_upper(path, rule->action == ACT_NORMALIZE_URI_PERCENT_TO_UPPERCASE_STRICT, &newpath); err = uri_normalizer_percent_upper(path, rule->action == ACT_NORMALIZE_URI_PERCENT_TO_UPPERCASE_STRICT, &newpath);
if (err != URI_NORMALIZER_ERR_NONE)
break;
if (!http_replace_req_path(htx, newpath, 1))
goto fail_rewrite;
break;
}
case ACT_NORMALIZE_URI_PERCENT_DECODE_UNRESERVED:
case ACT_NORMALIZE_URI_PERCENT_DECODE_UNRESERVED_STRICT: {
const struct ist path = http_get_path(uri);
struct ist newpath = ist2(replace->area, replace->size);
if (!isttest(path))
goto leave;
err = uri_normalizer_percent_decode_unreserved(path, rule->action == ACT_NORMALIZE_URI_PERCENT_DECODE_UNRESERVED_STRICT, &newpath);
if (err != URI_NORMALIZER_ERR_NONE) if (err != URI_NORMALIZER_ERR_NONE)
break; break;
@ -407,6 +425,21 @@ static enum act_parse_ret parse_http_normalize_uri(const char **args, int *orig_
return ACT_RET_PRS_ERR; return ACT_RET_PRS_ERR;
} }
} }
else if (strcmp(args[cur_arg], "percent-decode-unreserved") == 0) {
cur_arg++;
if (strcmp(args[cur_arg], "strict") == 0) {
cur_arg++;
rule->action = ACT_NORMALIZE_URI_PERCENT_DECODE_UNRESERVED_STRICT;
}
else if (!*args[cur_arg]) {
rule->action = ACT_NORMALIZE_URI_PERCENT_DECODE_UNRESERVED;
}
else if (strcmp(args[cur_arg], "if") != 0 && strcmp(args[cur_arg], "unless") != 0) {
memprintf(err, "unknown argument '%s' for 'percent-decode-unreserved' normalizer", args[cur_arg]);
return ACT_RET_PRS_ERR;
}
}
else { else {
memprintf(err, "unknown normalizer '%s'", args[cur_arg]); memprintf(err, "unknown normalizer '%s'", args[cur_arg]);
return ACT_RET_PRS_ERR; return ACT_RET_PRS_ERR;

View File

@ -18,6 +18,101 @@
#include <haproxy/tools.h> #include <haproxy/tools.h>
#include <haproxy/uri_normalizer.h> #include <haproxy/uri_normalizer.h>
/* Returns 1 if the given character is part of the 'unreserved' set in the
* RFC 3986 ABNF.
* Returns 0 if not.
*/
static int is_unreserved_character(unsigned char c)
{
switch (c) {
case 'A'...'Z': /* ALPHA */
case 'a'...'z': /* ALPHA */
case '0'...'9': /* DIGIT */
case '-':
case '.':
case '_':
case '~':
return 1;
default:
return 0;
}
}
/* Decodes percent encoded characters that are part of the 'unreserved' set.
*
* RFC 3986, section 2.3:
* > URIs that differ in the replacement of an unreserved character with
* > its corresponding percent-encoded US-ASCII octet are equivalent [...]
* > when found in a URI, should be decoded to their corresponding unreserved
* > characters by URI normalizers.
*
* If `strict` is set to 0 then percent characters that are not followed by a
* hexadecimal digit are returned as-is without performing any decoding.
* If `strict` is set to 1 then `URI_NORMALIZER_ERR_INVALID_INPUT` is returned
* for invalid sequences.
*/
enum uri_normalizer_err uri_normalizer_percent_decode_unreserved(const struct ist input, int strict, struct ist *dst)
{
enum uri_normalizer_err err;
const size_t size = istclear(dst);
struct ist output = *dst;
struct ist scanner = input;
/* The output will either be shortened or have the same length. */
if (size < istlen(input)) {
err = URI_NORMALIZER_ERR_ALLOC;
goto fail;
}
while (istlen(scanner)) {
const char current = istshift(&scanner);
if (current == '%') {
if (istlen(scanner) >= 2) {
if (ishex(istptr(scanner)[0]) && ishex(istptr(scanner)[1])) {
char hex1, hex2, c;
hex1 = istshift(&scanner);
hex2 = istshift(&scanner);
c = (hex2i(hex1) << 4) + hex2i(hex2);
if (is_unreserved_character(c)) {
output = __istappend(output, c);
}
else {
output = __istappend(output, current);
output = __istappend(output, hex1);
output = __istappend(output, hex2);
}
continue;
}
}
if (strict) {
err = URI_NORMALIZER_ERR_INVALID_INPUT;
goto fail;
}
else {
output = __istappend(output, current);
}
}
else {
output = __istappend(output, current);
}
}
*dst = output;
return URI_NORMALIZER_ERR_NONE;
fail:
return err;
}
/* Uppercases letters used in percent encoding. /* Uppercases letters used in percent encoding.
* *
* If `strict` is set to 0 then percent characters that are not followed by a * If `strict` is set to 0 then percent characters that are not followed by a