From dec1c36b3a4a2856496fd2ed79735a11818e7e56 Mon Sep 17 00:00:00 2001 From: Tim Duesterhus Date: Mon, 10 May 2021 17:28:26 +0200 Subject: [PATCH] MINOR: uri_normalizer: Add `fragment-encode` normalizer This normalizer encodes '#' as '%23'. See GitHub Issue #714. --- doc/configuration.txt | 9 +++++++ include/haproxy/action-t.h | 1 + include/haproxy/uri_normalizer.h | 1 + reg-tests/http-rules/normalize_uri.vtc | 36 +++++++++++++++++++++++++- src/http_act.c | 22 ++++++++++++++++ src/uri_normalizer.c | 35 +++++++++++++++++++++++++ 6 files changed, 103 insertions(+), 1 deletion(-) diff --git a/doc/configuration.txt b/doc/configuration.txt index 7ab7baadc..442b61e17 100644 --- a/doc/configuration.txt +++ b/doc/configuration.txt @@ -6172,6 +6172,7 @@ http-request early-hint [ { if | unless } ] See RFC 8297 for more information. http-request normalize-uri [ { if | unless } ] +http-request normalize-uri fragment-encode [ { if | unless } ] http-request normalize-uri fragment-strip [ { if | unless } ] http-request normalize-uri path-merge-slashes [ { if | unless } ] http-request normalize-uri path-strip-dot [ { if | unless } ] @@ -6210,6 +6211,14 @@ http-request normalize-uri query-sort-by-name [ { if | unless } ] The following normalizers are available: + - fragment-encode: Encodes "#" as "%23". + + The "fragment-strip" normalizer should be preferred, unless it is known + that broken clients do not correctly encode '#' within the path component. + + Example: + - /#foo -> /%23foo + - fragment-strip: Removes the URI's "fragment" component. According to RFC 3986#3.5 the "fragment" component of an URI should not diff --git a/include/haproxy/action-t.h b/include/haproxy/action-t.h index 56ac32f7f..d4fc3f6da 100644 --- a/include/haproxy/action-t.h +++ b/include/haproxy/action-t.h @@ -112,6 +112,7 @@ enum act_normalize_uri { ACT_NORMALIZE_URI_PERCENT_DECODE_UNRESERVED, ACT_NORMALIZE_URI_PERCENT_DECODE_UNRESERVED_STRICT, ACT_NORMALIZE_URI_FRAGMENT_STRIP, + ACT_NORMALIZE_URI_FRAGMENT_ENCODE, }; /* NOTE: if <.action_ptr> is defined, the referenced function will always be diff --git a/include/haproxy/uri_normalizer.h b/include/haproxy/uri_normalizer.h index fa5d89dd0..b384007f5 100644 --- a/include/haproxy/uri_normalizer.h +++ b/include/haproxy/uri_normalizer.h @@ -26,6 +26,7 @@ static inline enum uri_normalizer_err uri_normalizer_fragment_strip(const struct return URI_NORMALIZER_ERR_NONE; } +enum uri_normalizer_err uri_normalizer_fragment_encode(const struct ist input, struct ist *dst); enum uri_normalizer_err uri_normalizer_percent_decode_unreserved(const struct ist input, int strict, struct ist *dst); enum uri_normalizer_err uri_normalizer_percent_upper(const struct ist input, int strict, struct ist *dst); enum uri_normalizer_err uri_normalizer_path_dot(const struct ist path, struct ist *dst); diff --git a/reg-tests/http-rules/normalize_uri.vtc b/reg-tests/http-rules/normalize_uri.vtc index 792bea5d4..7e2d7491f 100644 --- a/reg-tests/http-rules/normalize_uri.vtc +++ b/reg-tests/http-rules/normalize_uri.vtc @@ -8,7 +8,7 @@ feature ignore_unknown_macro server s1 { rxreq txresp -} -repeat 66 -start +} -repeat 70 -start haproxy h1 -conf { global @@ -137,6 +137,18 @@ haproxy h1 -conf { default_backend be + frontend fe_fragment_encode + bind "fd@${fe_fragment_encode}" + + http-request set-var(txn.before) url + http-request normalize-uri fragment-encode + http-request set-var(txn.after) url + + http-response add-header before %[var(txn.before)] + http-response add-header after %[var(txn.after)] + + default_backend be + backend be server s1 ${s1_addr}:${s1_port} @@ -500,3 +512,25 @@ client c9 -connect ${h1_fe_fragment_strip_sock} { expect resp.http.before == "*" expect resp.http.after == "*" } -run + +client c10 -connect ${h1_fe_fragment_encode_sock} { + txreq -url "/#foo" + rxresp + expect resp.http.before == "/#foo" + expect resp.http.after == "/%23foo" + + txreq -url "/#foo/#foo" + rxresp + expect resp.http.before == "/#foo/#foo" + expect resp.http.after == "/%23foo/%23foo" + + txreq -url "/%23foo" + rxresp + expect resp.http.before == "/%23foo" + expect resp.http.after == "/%23foo" + + txreq -req OPTIONS -url "*" + rxresp + expect resp.http.before == "*" + expect resp.http.after == "*" +} -run diff --git a/src/http_act.c b/src/http_act.c index f30694e5f..f61362475 100644 --- a/src/http_act.c +++ b/src/http_act.c @@ -329,6 +329,23 @@ static enum act_return http_action_normalize_uri(struct act_rule *rule, struct p err = uri_normalizer_fragment_strip(path, &newpath); + if (err != URI_NORMALIZER_ERR_NONE) + break; + + if (!http_replace_req_path(htx, newpath, 1)) + goto fail_rewrite; + + break; + } + case ACT_NORMALIZE_URI_FRAGMENT_ENCODE: { + const struct ist path = http_get_path(uri); + struct ist newpath = ist2(replace->area, replace->size); + + if (!isttest(path)) + goto leave; + + err = uri_normalizer_fragment_encode(path, &newpath); + if (err != URI_NORMALIZER_ERR_NONE) break; @@ -462,6 +479,11 @@ static enum act_parse_ret parse_http_normalize_uri(const char **args, int *orig_ rule->action = ACT_NORMALIZE_URI_FRAGMENT_STRIP; } + else if (strcmp(args[cur_arg], "fragment-encode") == 0) { + cur_arg++; + + rule->action = ACT_NORMALIZE_URI_FRAGMENT_ENCODE; + } else { memprintf(err, "unknown normalizer '%s'", args[cur_arg]); return ACT_RET_PRS_ERR; diff --git a/src/uri_normalizer.c b/src/uri_normalizer.c index 4fd783d4a..bc793f2f1 100644 --- a/src/uri_normalizer.c +++ b/src/uri_normalizer.c @@ -18,6 +18,41 @@ #include #include +/* Encodes '#' as '%23'. */ +enum uri_normalizer_err uri_normalizer_fragment_encode(const struct ist input, struct ist *dst) +{ + enum uri_normalizer_err err; + + const size_t size = istclear(dst); + struct ist output = *dst; + + struct ist scanner = input; + + while (istlen(scanner)) { + const struct ist before_hash = istsplit(&scanner, '#'); + + if (istcat(&output, before_hash, size) < 0) { + err = URI_NORMALIZER_ERR_ALLOC; + goto fail; + } + + if (istend(before_hash) != istend(scanner)) { + if (istcat(&output, ist("%23"), size) < 0) { + err = URI_NORMALIZER_ERR_ALLOC; + goto fail; + } + } + } + + *dst = output; + + return URI_NORMALIZER_ERR_NONE; + + fail: + + return err; +} + /* Returns 1 if the given character is part of the 'unreserved' set in the * RFC 3986 ABNF. * Returns 0 if not.