mirror of
https://git.haproxy.org/git/haproxy.git/
synced 2025-11-09 04:51:01 +01:00
MINOR: uri_normalizer: Add a percent-decode-unreserved normalizer
This normalizer decodes percent encoded characters within the RFC 3986 unreserved set. See GitHub Issue #714.
This commit is contained in:
parent
d6d33deaea
commit
2e4a18e04a
@ -6029,6 +6029,7 @@ http-request normalize-uri <normalizer> [ { if | unless } <condition> ]
|
|||||||
http-request normalize-uri path-merge-slashes [ { if | unless } <condition> ]
|
http-request normalize-uri path-merge-slashes [ { if | unless } <condition> ]
|
||||||
http-request normalize-uri path-strip-dot [ { if | unless } <condition> ]
|
http-request normalize-uri path-strip-dot [ { if | unless } <condition> ]
|
||||||
http-request normalize-uri path-strip-dotdot [ full ] [ { if | unless } <condition> ]
|
http-request normalize-uri path-strip-dotdot [ full ] [ { if | unless } <condition> ]
|
||||||
|
http-request normalize-uri percent-decode-unreserved [ strict ] [ { if | unless } <condition> ]
|
||||||
http-request normalize-uri percent-to-uppercase [ strict ] [ { if | unless } <condition> ]
|
http-request normalize-uri percent-to-uppercase [ strict ] [ { if | unless } <condition> ]
|
||||||
http-request normalize-uri query-sort-by-name [ { if | unless } <condition> ]
|
http-request normalize-uri query-sort-by-name [ { if | unless } <condition> ]
|
||||||
|
|
||||||
@ -6048,11 +6049,25 @@ http-request normalize-uri query-sort-by-name [ { if | unless } <condition> ]
|
|||||||
filesystem. However it might break routing of an API that expects a specific
|
filesystem. However it might break routing of an API that expects a specific
|
||||||
number of segments in the path.
|
number of segments in the path.
|
||||||
|
|
||||||
|
It is important to note that some normalizers might result in unsafe
|
||||||
|
transformations for broken URIs. It might also be possible that a combination
|
||||||
|
of normalizers that are safe by themselves results in unsafe transformations
|
||||||
|
when improperly combined.
|
||||||
|
|
||||||
|
As an example the "percent-decode-unreserved" normalizer might result in
|
||||||
|
unexpected results when a broken URI includes bare percent characters. One
|
||||||
|
such a broken URI is "/%%36%36" which would be decoded to "/%66" which in
|
||||||
|
turn is equivalent to "/f". By specifying the "strict" option requests to
|
||||||
|
such a broken URI would safely be rejected.
|
||||||
|
|
||||||
The following normalizers are available:
|
The following normalizers are available:
|
||||||
|
|
||||||
- path-strip-dot: Removes "/./" segments within the "path" component
|
- path-strip-dot: Removes "/./" segments within the "path" component
|
||||||
(RFC 3986#6.2.2.3).
|
(RFC 3986#6.2.2.3).
|
||||||
|
|
||||||
|
Segments including percent encoded dots ("%2E") will not be detected. Use
|
||||||
|
the "percent-decode-unreserved" normalizer first if this is undesired.
|
||||||
|
|
||||||
Example:
|
Example:
|
||||||
- /. -> /
|
- /. -> /
|
||||||
- /./bar/ -> /bar/
|
- /./bar/ -> /bar/
|
||||||
@ -6063,8 +6078,13 @@ http-request normalize-uri query-sort-by-name [ { if | unless } <condition> ]
|
|||||||
(RFC 3986#6.2.2.3).
|
(RFC 3986#6.2.2.3).
|
||||||
|
|
||||||
This merges segments that attempt to access the parent directory with
|
This merges segments that attempt to access the parent directory with
|
||||||
their preceding segment. Empty segments do not receive special treatment.
|
their preceding segment.
|
||||||
Use the "path-merge-slashes" normalizer first if this is undesired.
|
|
||||||
|
Empty segments do not receive special treatment. Use the "merge-slashes"
|
||||||
|
normalizer first if this is undesired.
|
||||||
|
|
||||||
|
Segments including percent encoded dots ("%2E") will not be detected. Use
|
||||||
|
the "percent-decode-unreserved" normalizer first if this is undesired.
|
||||||
|
|
||||||
Example:
|
Example:
|
||||||
- /foo/../ -> /
|
- /foo/../ -> /
|
||||||
@ -6073,6 +6093,7 @@ http-request normalize-uri query-sort-by-name [ { if | unless } <condition> ]
|
|||||||
- /../bar/ -> /../bar/
|
- /../bar/ -> /../bar/
|
||||||
- /bar/../../ -> /../
|
- /bar/../../ -> /../
|
||||||
- /foo//../ -> /foo/
|
- /foo//../ -> /foo/
|
||||||
|
- /foo/%2E%2E/ -> /foo/%2E%2E/
|
||||||
|
|
||||||
If the "full" option is specified then "../" at the beginning will be
|
If the "full" option is specified then "../" at the beginning will be
|
||||||
removed as well:
|
removed as well:
|
||||||
@ -6088,6 +6109,25 @@ http-request normalize-uri query-sort-by-name [ { if | unless } <condition> ]
|
|||||||
- // -> /
|
- // -> /
|
||||||
- /foo//bar -> /foo/bar
|
- /foo//bar -> /foo/bar
|
||||||
|
|
||||||
|
- percent-decode-unreserved: Decodes unreserved percent encoded characters to
|
||||||
|
their representation as a regular character (RFC 3986#6.2.2.2).
|
||||||
|
|
||||||
|
The set of unreserved characters includes all letters, all digits, "-",
|
||||||
|
".", "_", and "~".
|
||||||
|
|
||||||
|
Example:
|
||||||
|
- /%61dmin -> /admin
|
||||||
|
- /foo%3Fbar=baz -> /foo%3Fbar=baz (no change)
|
||||||
|
- /%%36%36 -> /%66 (unsafe)
|
||||||
|
- /%ZZ -> /%ZZ
|
||||||
|
|
||||||
|
If the "strict" option is specified then invalid sequences will result
|
||||||
|
in a HTTP 400 Bad Request being returned.
|
||||||
|
|
||||||
|
Example:
|
||||||
|
- /%%36%36 -> HTTP 400
|
||||||
|
- /%ZZ -> HTTP 400
|
||||||
|
|
||||||
- percent-to-uppercase: Uppercases letters within percent-encoded sequences
|
- percent-to-uppercase: Uppercases letters within percent-encoded sequences
|
||||||
(RFC 3986#6.2.2.1).
|
(RFC 3986#6.2.2.1).
|
||||||
|
|
||||||
|
|||||||
@ -109,6 +109,8 @@ enum act_normalize_uri {
|
|||||||
ACT_NORMALIZE_URI_QUERY_SORT_BY_NAME,
|
ACT_NORMALIZE_URI_QUERY_SORT_BY_NAME,
|
||||||
ACT_NORMALIZE_URI_PERCENT_TO_UPPERCASE,
|
ACT_NORMALIZE_URI_PERCENT_TO_UPPERCASE,
|
||||||
ACT_NORMALIZE_URI_PERCENT_TO_UPPERCASE_STRICT,
|
ACT_NORMALIZE_URI_PERCENT_TO_UPPERCASE_STRICT,
|
||||||
|
ACT_NORMALIZE_URI_PERCENT_DECODE_UNRESERVED,
|
||||||
|
ACT_NORMALIZE_URI_PERCENT_DECODE_UNRESERVED_STRICT,
|
||||||
};
|
};
|
||||||
|
|
||||||
/* NOTE: if <.action_ptr> is defined, the referenced function will always be
|
/* NOTE: if <.action_ptr> is defined, the referenced function will always be
|
||||||
|
|||||||
@ -18,6 +18,7 @@
|
|||||||
|
|
||||||
#include <haproxy/uri_normalizer-t.h>
|
#include <haproxy/uri_normalizer-t.h>
|
||||||
|
|
||||||
|
enum uri_normalizer_err uri_normalizer_percent_decode_unreserved(const struct ist input, int strict, struct ist *dst);
|
||||||
enum uri_normalizer_err uri_normalizer_percent_upper(const struct ist input, int strict, struct ist *dst);
|
enum uri_normalizer_err uri_normalizer_percent_upper(const struct ist input, int strict, struct ist *dst);
|
||||||
enum uri_normalizer_err uri_normalizer_path_dot(const struct ist path, struct ist *dst);
|
enum uri_normalizer_err uri_normalizer_path_dot(const struct ist path, struct ist *dst);
|
||||||
enum uri_normalizer_err uri_normalizer_path_dotdot(const struct ist path, int full, struct ist *dst);
|
enum uri_normalizer_err uri_normalizer_path_dotdot(const struct ist path, int full, struct ist *dst);
|
||||||
|
|||||||
@ -8,7 +8,7 @@ feature ignore_unknown_macro
|
|||||||
server s1 {
|
server s1 {
|
||||||
rxreq
|
rxreq
|
||||||
txresp
|
txresp
|
||||||
} -repeat 54 -start
|
} -repeat 63 -start
|
||||||
|
|
||||||
haproxy h1 -conf {
|
haproxy h1 -conf {
|
||||||
defaults
|
defaults
|
||||||
@ -94,6 +94,30 @@ haproxy h1 -conf {
|
|||||||
|
|
||||||
default_backend be
|
default_backend be
|
||||||
|
|
||||||
|
frontend fe_percent_decode_unreserved
|
||||||
|
bind "fd@${fe_percent_decode_unreserved}"
|
||||||
|
|
||||||
|
http-request set-var(txn.before) url
|
||||||
|
http-request normalize-uri percent-decode-unreserved
|
||||||
|
http-request set-var(txn.after) url
|
||||||
|
|
||||||
|
http-response add-header before %[var(txn.before)]
|
||||||
|
http-response add-header after %[var(txn.after)]
|
||||||
|
|
||||||
|
default_backend be
|
||||||
|
|
||||||
|
frontend fe_percent_decode_unreserved_strict
|
||||||
|
bind "fd@${fe_percent_decode_unreserved_strict}"
|
||||||
|
|
||||||
|
http-request set-var(txn.before) url
|
||||||
|
http-request normalize-uri percent-decode-unreserved strict
|
||||||
|
http-request set-var(txn.after) url
|
||||||
|
|
||||||
|
http-response add-header before %[var(txn.before)]
|
||||||
|
http-response add-header after %[var(txn.after)]
|
||||||
|
|
||||||
|
default_backend be
|
||||||
|
|
||||||
backend be
|
backend be
|
||||||
server s1 ${s1_addr}:${s1_port}
|
server s1 ${s1_addr}:${s1_port}
|
||||||
|
|
||||||
@ -391,3 +415,52 @@ client c6 -connect ${h1_fe_dot_sock} {
|
|||||||
expect resp.http.before == "/?a=/./"
|
expect resp.http.before == "/?a=/./"
|
||||||
expect resp.http.after == "/?a=/./"
|
expect resp.http.after == "/?a=/./"
|
||||||
} -run
|
} -run
|
||||||
|
|
||||||
|
client c7 -connect ${h1_fe_percent_decode_unreserved_sock} {
|
||||||
|
txreq -url "/a?a=a"
|
||||||
|
rxresp
|
||||||
|
expect resp.http.before == "/a?a=a"
|
||||||
|
expect resp.http.after == "/a?a=a"
|
||||||
|
|
||||||
|
txreq -url "/%61?%61=%61"
|
||||||
|
rxresp
|
||||||
|
expect resp.http.before == "/%61?%61=%61"
|
||||||
|
expect resp.http.after == "/a?a=a"
|
||||||
|
|
||||||
|
txreq -url "/%3F?foo=bar"
|
||||||
|
rxresp
|
||||||
|
expect resp.http.before == "/%3F?foo=bar"
|
||||||
|
expect resp.http.after == "/%3F?foo=bar"
|
||||||
|
|
||||||
|
txreq -url "/%%36%36"
|
||||||
|
rxresp
|
||||||
|
expect resp.status == 200
|
||||||
|
expect resp.http.before == "/%%36%36"
|
||||||
|
expect resp.http.after == "/%66"
|
||||||
|
|
||||||
|
txreq -req OPTIONS -url "*"
|
||||||
|
rxresp
|
||||||
|
expect resp.http.before == "*"
|
||||||
|
expect resp.http.after == "*"
|
||||||
|
} -run
|
||||||
|
|
||||||
|
client c8 -connect ${h1_fe_percent_decode_unreserved_strict_sock} {
|
||||||
|
txreq -url "/a?a=a"
|
||||||
|
rxresp
|
||||||
|
expect resp.http.before == "/a?a=a"
|
||||||
|
expect resp.http.after == "/a?a=a"
|
||||||
|
|
||||||
|
txreq -url "/%61?%61=%61"
|
||||||
|
rxresp
|
||||||
|
expect resp.http.before == "/%61?%61=%61"
|
||||||
|
expect resp.http.after == "/a?a=a"
|
||||||
|
|
||||||
|
txreq -url "/%3F?foo=bar"
|
||||||
|
rxresp
|
||||||
|
expect resp.http.before == "/%3F?foo=bar"
|
||||||
|
expect resp.http.after == "/%3F?foo=bar"
|
||||||
|
|
||||||
|
txreq -url "/%%36%36"
|
||||||
|
rxresp
|
||||||
|
expect resp.status == 400
|
||||||
|
} -run
|
||||||
|
|||||||
@ -294,6 +294,24 @@ static enum act_return http_action_normalize_uri(struct act_rule *rule, struct p
|
|||||||
|
|
||||||
err = uri_normalizer_percent_upper(path, rule->action == ACT_NORMALIZE_URI_PERCENT_TO_UPPERCASE_STRICT, &newpath);
|
err = uri_normalizer_percent_upper(path, rule->action == ACT_NORMALIZE_URI_PERCENT_TO_UPPERCASE_STRICT, &newpath);
|
||||||
|
|
||||||
|
if (err != URI_NORMALIZER_ERR_NONE)
|
||||||
|
break;
|
||||||
|
|
||||||
|
if (!http_replace_req_path(htx, newpath, 1))
|
||||||
|
goto fail_rewrite;
|
||||||
|
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
case ACT_NORMALIZE_URI_PERCENT_DECODE_UNRESERVED:
|
||||||
|
case ACT_NORMALIZE_URI_PERCENT_DECODE_UNRESERVED_STRICT: {
|
||||||
|
const struct ist path = http_get_path(uri);
|
||||||
|
struct ist newpath = ist2(replace->area, replace->size);
|
||||||
|
|
||||||
|
if (!isttest(path))
|
||||||
|
goto leave;
|
||||||
|
|
||||||
|
err = uri_normalizer_percent_decode_unreserved(path, rule->action == ACT_NORMALIZE_URI_PERCENT_DECODE_UNRESERVED_STRICT, &newpath);
|
||||||
|
|
||||||
if (err != URI_NORMALIZER_ERR_NONE)
|
if (err != URI_NORMALIZER_ERR_NONE)
|
||||||
break;
|
break;
|
||||||
|
|
||||||
@ -407,6 +425,21 @@ static enum act_parse_ret parse_http_normalize_uri(const char **args, int *orig_
|
|||||||
return ACT_RET_PRS_ERR;
|
return ACT_RET_PRS_ERR;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
else if (strcmp(args[cur_arg], "percent-decode-unreserved") == 0) {
|
||||||
|
cur_arg++;
|
||||||
|
|
||||||
|
if (strcmp(args[cur_arg], "strict") == 0) {
|
||||||
|
cur_arg++;
|
||||||
|
rule->action = ACT_NORMALIZE_URI_PERCENT_DECODE_UNRESERVED_STRICT;
|
||||||
|
}
|
||||||
|
else if (!*args[cur_arg]) {
|
||||||
|
rule->action = ACT_NORMALIZE_URI_PERCENT_DECODE_UNRESERVED;
|
||||||
|
}
|
||||||
|
else if (strcmp(args[cur_arg], "if") != 0 && strcmp(args[cur_arg], "unless") != 0) {
|
||||||
|
memprintf(err, "unknown argument '%s' for 'percent-decode-unreserved' normalizer", args[cur_arg]);
|
||||||
|
return ACT_RET_PRS_ERR;
|
||||||
|
}
|
||||||
|
}
|
||||||
else {
|
else {
|
||||||
memprintf(err, "unknown normalizer '%s'", args[cur_arg]);
|
memprintf(err, "unknown normalizer '%s'", args[cur_arg]);
|
||||||
return ACT_RET_PRS_ERR;
|
return ACT_RET_PRS_ERR;
|
||||||
|
|||||||
@ -18,6 +18,101 @@
|
|||||||
#include <haproxy/tools.h>
|
#include <haproxy/tools.h>
|
||||||
#include <haproxy/uri_normalizer.h>
|
#include <haproxy/uri_normalizer.h>
|
||||||
|
|
||||||
|
/* Returns 1 if the given character is part of the 'unreserved' set in the
|
||||||
|
* RFC 3986 ABNF.
|
||||||
|
* Returns 0 if not.
|
||||||
|
*/
|
||||||
|
static int is_unreserved_character(unsigned char c)
|
||||||
|
{
|
||||||
|
switch (c) {
|
||||||
|
case 'A'...'Z': /* ALPHA */
|
||||||
|
case 'a'...'z': /* ALPHA */
|
||||||
|
case '0'...'9': /* DIGIT */
|
||||||
|
case '-':
|
||||||
|
case '.':
|
||||||
|
case '_':
|
||||||
|
case '~':
|
||||||
|
return 1;
|
||||||
|
default:
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Decodes percent encoded characters that are part of the 'unreserved' set.
|
||||||
|
*
|
||||||
|
* RFC 3986, section 2.3:
|
||||||
|
* > URIs that differ in the replacement of an unreserved character with
|
||||||
|
* > its corresponding percent-encoded US-ASCII octet are equivalent [...]
|
||||||
|
* > when found in a URI, should be decoded to their corresponding unreserved
|
||||||
|
* > characters by URI normalizers.
|
||||||
|
*
|
||||||
|
* If `strict` is set to 0 then percent characters that are not followed by a
|
||||||
|
* hexadecimal digit are returned as-is without performing any decoding.
|
||||||
|
* If `strict` is set to 1 then `URI_NORMALIZER_ERR_INVALID_INPUT` is returned
|
||||||
|
* for invalid sequences.
|
||||||
|
*/
|
||||||
|
enum uri_normalizer_err uri_normalizer_percent_decode_unreserved(const struct ist input, int strict, struct ist *dst)
|
||||||
|
{
|
||||||
|
enum uri_normalizer_err err;
|
||||||
|
|
||||||
|
const size_t size = istclear(dst);
|
||||||
|
struct ist output = *dst;
|
||||||
|
|
||||||
|
struct ist scanner = input;
|
||||||
|
|
||||||
|
/* The output will either be shortened or have the same length. */
|
||||||
|
if (size < istlen(input)) {
|
||||||
|
err = URI_NORMALIZER_ERR_ALLOC;
|
||||||
|
goto fail;
|
||||||
|
}
|
||||||
|
|
||||||
|
while (istlen(scanner)) {
|
||||||
|
const char current = istshift(&scanner);
|
||||||
|
|
||||||
|
if (current == '%') {
|
||||||
|
if (istlen(scanner) >= 2) {
|
||||||
|
if (ishex(istptr(scanner)[0]) && ishex(istptr(scanner)[1])) {
|
||||||
|
char hex1, hex2, c;
|
||||||
|
|
||||||
|
hex1 = istshift(&scanner);
|
||||||
|
hex2 = istshift(&scanner);
|
||||||
|
c = (hex2i(hex1) << 4) + hex2i(hex2);
|
||||||
|
|
||||||
|
if (is_unreserved_character(c)) {
|
||||||
|
output = __istappend(output, c);
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
output = __istappend(output, current);
|
||||||
|
output = __istappend(output, hex1);
|
||||||
|
output = __istappend(output, hex2);
|
||||||
|
}
|
||||||
|
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (strict) {
|
||||||
|
err = URI_NORMALIZER_ERR_INVALID_INPUT;
|
||||||
|
goto fail;
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
output = __istappend(output, current);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
output = __istappend(output, current);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
*dst = output;
|
||||||
|
|
||||||
|
return URI_NORMALIZER_ERR_NONE;
|
||||||
|
|
||||||
|
fail:
|
||||||
|
|
||||||
|
return err;
|
||||||
|
}
|
||||||
|
|
||||||
/* Uppercases letters used in percent encoding.
|
/* Uppercases letters used in percent encoding.
|
||||||
*
|
*
|
||||||
* If `strict` is set to 0 then percent characters that are not followed by a
|
* If `strict` is set to 0 then percent characters that are not followed by a
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user