From 8d2514e0872a597e2f72659924aa67352b78c4d4 Mon Sep 17 00:00:00 2001 From: Christopher Faulet Date: Fri, 24 May 2024 15:59:35 +0200 Subject: [PATCH] BUG/MINOR: http-htx: Support default path during scheme based normalization As stated in RFC3986, for an absolute-form URI, an empty path should be normalized to a path of "/". This is part of scheme based normalization rules. This kind of normalization is already performed for default ports. So we might as well deal with the case of empty path. The associated reg-tests was updated accordingly. This patch should fix the issue #2573. It may be backported as far as 2.4 if necessary. --- .../http-messaging/h1_host_normalization.vtc | 152 +++++++++++++++++- src/http_htx.c | 24 ++- 2 files changed, 164 insertions(+), 12 deletions(-) diff --git a/reg-tests/http-messaging/h1_host_normalization.vtc b/reg-tests/http-messaging/h1_host_normalization.vtc index 48174b819..909d52509 100644 --- a/reg-tests/http-messaging/h1_host_normalization.vtc +++ b/reg-tests/http-messaging/h1_host_normalization.vtc @@ -175,22 +175,62 @@ syslog S1 -level info { # C32 recv - expect ~ "^.* uri: GET http:// HTTP/1.1; host: {}$" + expect ~ "^.* uri: GET http:/// HTTP/1.1; host: {}$" barrier b1 sync # C33 recv - expect ~ "^.* uri: GET https:// HTTP/1.1; host: {}$" + expect ~ "^.* uri: GET https:/// HTTP/1.1; host: {}$" barrier b1 sync # C34 recv - expect ~ "^.* uri: GET http:// HTTP/1.1; host: {}$" + expect ~ "^.* uri: GET http:/// HTTP/1.1; host: {}$" barrier b1 sync # C35 recv - expect ~ "^.* uri: GET https:// HTTP/1.1; host: {}$" + expect ~ "^.* uri: GET https:/// HTTP/1.1; host: {}$" + barrier b1 sync + + # C36 + recv + expect ~ "^.* uri: GET http://hostname/ HTTP/1.1; host: {hostname}$" + barrier b1 sync + + # C37 + recv + expect ~ "^.* uri: GET http://hostname/ HTTP/1.1; host: {hostname}$" + barrier b1 sync + + # C38 + recv + expect ~ "^.* uri: GET http://hostname/ HTTP/1.1; host: {hostname}$" + barrier b1 sync + + # C39 + recv + expect ~ "^.* uri: GET https://hostname/ HTTP/1.1; host: {hostname}$" + barrier b1 sync + + # C40 + recv + expect ~ "^.* uri: GET https://hostname/ HTTP/1.1; host: {hostname}$" + barrier b1 sync + + # C41 + recv + expect ~ "^.* uri: GET https://hostname/ HTTP/1.1; host: {hostname}$" + barrier b1 sync + + # C42 + recv + expect ~ "^.* uri: GET http://hostname:81/ HTTP/1.1; host: {hostname:81}$" + barrier b1 sync + + # C43 + recv + expect ~ "^.* uri: GET https://hostname:444/ HTTP/1.1; host: {hostname:444}$" } -start @@ -759,4 +799,108 @@ client c35 -connect ${h1_fe_sock} { expect resp.status == 200 } -run +# Wait matching on log message +barrier b1 sync + +client c36 -connect ${h1_fe_sock} { + txreq \ + -req "GET" \ + -url "http://hostname" \ + -hdr "host: hostname" + + rxresp + expect resp.status == 200 +} -run + +# Wait matching on log message +barrier b1 sync + +client c37 -connect ${h1_fe_sock} { + txreq \ + -req "GET" \ + -url "http://hostname:80" \ + -hdr "host: hostname" + + rxresp + expect resp.status == 200 +} -run + +# Wait matching on log message +barrier b1 sync + +client c38 -connect ${h1_fe_sock} { + txreq \ + -req "GET" \ + -url "http://hostname:" \ + -hdr "host: hostname" + + rxresp + expect resp.status == 200 +} -run + +# Wait matching on log message +barrier b1 sync + +client c39 -connect ${h1_fe_sock} { + txreq \ + -req "GET" \ + -url "https://hostname" \ + -hdr "host: hostname" + + rxresp + expect resp.status == 200 +} -run + +# Wait matching on log message +barrier b1 sync + +client c40 -connect ${h1_fe_sock} { + txreq \ + -req "GET" \ + -url "https://hostname:443" \ + -hdr "host: hostname" + + rxresp + expect resp.status == 200 +} -run + +# Wait matching on log message +barrier b1 sync + +client c41 -connect ${h1_fe_sock} { + txreq \ + -req "GET" \ + -url "https://hostname:" \ + -hdr "host: hostname" + + rxresp + expect resp.status == 200 +} -run + +# Wait matching on log message +barrier b1 sync + +client c42 -connect ${h1_fe_sock} { + txreq \ + -req "GET" \ + -url "http://hostname:81" \ + -hdr "host: hostname:81" + + rxresp + expect resp.status == 200 +} -run + +# Wait matching on log message +barrier b1 sync + +client c43 -connect ${h1_fe_sock} { + txreq \ + -req "GET" \ + -url "https://hostname:444" \ + -hdr "host: hostname:444" + + rxresp + expect resp.status == 200 +} -run + syslog S1 -wait diff --git a/src/http_htx.c b/src/http_htx.c index 405b688b6..36356edf0 100644 --- a/src/http_htx.c +++ b/src/http_htx.c @@ -1828,8 +1828,9 @@ int http_scheme_based_normalize(struct htx *htx) { struct http_hdr_ctx ctx; struct htx_sl *sl; - struct ist uri, scheme, authority, host, port; + struct ist uri, scheme, authority, host, port, path; struct http_uri_parser parser; + int normalize = 0; sl = http_get_stline(htx); @@ -1846,14 +1847,21 @@ int http_scheme_based_normalize(struct htx *htx) /* Extract the port if present in authority */ authority = http_parse_authority(&parser, 1); + path = http_parse_path(&parser); port = http_get_host_port(authority); - if (!isttest(port)) { - /* if no port found, no normalization to proceed */ - return 0; + if (!isttest(port) || !http_is_default_port(scheme, port)) + host = authority; + else { + host = isttrim(authority, istlen(authority) - istlen(port) - 1); + normalize = 1; } - host = isttrim(authority, istlen(authority) - istlen(port) - 1); - if (http_is_default_port(scheme, port)) { + if (!isttest(path)) { + path = ist("/"); + normalize = 1; + } + + if (normalize) { /* reconstruct the uri with removal of the port */ struct buffer *temp = get_trash_chunk(); struct ist meth, vsn; @@ -1869,8 +1877,8 @@ int http_scheme_based_normalize(struct htx *htx) /* reconstruct uri without port */ chunk_memcat(temp, uri.ptr, authority.ptr - uri.ptr); chunk_istcat(temp, host); - chunk_memcat(temp, istend(authority), istend(uri) - istend(authority)); - uri = ist2(temp->area + meth.len + vsn.len, host.len + uri.len - authority.len); /* uri */ + chunk_istcat(temp, path); + uri = ist2(temp->area + meth.len + vsn.len, host.len + path.len + authority.ptr - uri.ptr); /* uri */ http_replace_stline(htx, meth, uri, vsn);