From 65ad6e12c1aaac8b4336b1240ef12de1b5a498c6 Mon Sep 17 00:00:00 2001 From: William Lallemand Date: Fri, 31 Jan 2014 15:08:02 +0100 Subject: [PATCH] MINOR: http: capture.req.method and capture.req.uri Add 2 sample fetchs allowing to extract the method and the uri of an HTTP request. FIXME: the sample fetches parser can't add the LW_REQ requirement, at the moment this flag is used automatically when you use sample fetches. Note: also fixed the alphabetical order of other capture.req.* keywords in the doc. --- doc/configuration.txt | 34 ++++++++----- src/log.c | 7 +-- src/proto_http.c | 115 ++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 141 insertions(+), 15 deletions(-) diff --git a/doc/configuration.txt b/doc/configuration.txt index 014d9db16..6d4df7db7 100644 --- a/doc/configuration.txt +++ b/doc/configuration.txt @@ -10556,6 +10556,28 @@ base32+src : binary depending on the source address family. This can be used to track per-IP, per-URL counters. +capture.req.hdr() : string + This extracts the content of the header captured by the "capture request + header", idx is the position of the capture keyword in the configuration. + The first entry is an index of 0. See also: "capture request header". + +capture.req.method : string + This extracts the METHOD of an HTTP request. It can be used in both request + and response. Unlike "method", it can be used in both request and response + because it's allocated. + +capture.req.uri : string + This extracts the request's URI, which starts at the first slash and ends + before the first space in the request (without the host part). Unlike "path" + and "url", it can be used in both request and response because it's + allocated. + +capture.res.hdr() : string + This extracts the content of the header captured by the "capture response + header", idx is the position of the capture keyword in the configuration. + The first entry is an index of 0. + See also: "capture response header" + req.cook([]) : string cook([]) : string (deprecated) This extracts the last occurrence of the cookie name on a "Cookie" @@ -10601,18 +10623,6 @@ cookie([]) : string (deprecated) ambiguously uses the direction based on the context where it is used. See also : "appsession". -capture.req.hdr() : string - This extracts the content of the header captured by the "capture request - header", idx is the position of the capture keyword in the configuration. - The first entry is an index of 0. - See also: "capture request header" - -capture.res.hdr() : string - This extracts the content of the header captured by the "capture response - header", idx is the position of the capture keyword in the configuration. - The first entry is an index of 0. - See also: "capture response header" - hdr([[,]]) : string This is equivalent to req.hdr() when used on requests, and to res.hdr() when used on responses. Please refer to these respective fetches for more details. diff --git a/src/log.c b/src/log.c index 2a6acf429..046294ebd 100644 --- a/src/log.c +++ b/src/log.c @@ -382,11 +382,12 @@ void add_sample_to_logformat_list(char *text, char *arg, int arg_len, struct pro /* Note, we may also need to set curpx->to_log with certain fetches */ curpx->http_needed |= !!(expr->fetch->use & SMP_USE_HTTP_ANY); - /* FIXME: temporary workaround for missing LW_XPRT flag needed with some - * sample fetches (eg: ssl*). We always set it for now on, but this will - * leave with sample capabilities soon. + /* FIXME: temporary workaround for missing LW_XPRT and LW_REQ flags + * needed with some sample fetches (eg: ssl*). We always set it for + * now on, but this will leave with sample capabilities soon. */ curpx->to_log |= LW_XPRT; + curpx->to_log |= LW_REQ; LIST_ADDQ(list_format, &node->list); } diff --git a/src/proto_http.c b/src/proto_http.c index e92dc6a6a..211a37af0 100644 --- a/src/proto_http.c +++ b/src/proto_http.c @@ -835,6 +835,46 @@ http_get_path(struct http_txn *txn) return ptr; } +/* Parse the URI from the given string and look for the "/" beginning the PATH. + * If not found, return NULL. It is returned otherwise. + */ +static char * +http_get_path_from_string(char *str) +{ + char *ptr = str; + + /* RFC2616, par. 5.1.2 : + * Request-URI = "*" | absuri | abspath | authority + */ + + if (*ptr == '*') + return NULL; + + if (isalpha((unsigned char)*ptr)) { + /* this is a scheme as described by RFC3986, par. 3.1 */ + ptr++; + while (isalnum((unsigned char)*ptr) || *ptr == '+' || *ptr == '-' || *ptr == '.') + ptr++; + /* skip '://' */ + if (*ptr == '\0' || *ptr++ != ':') + return NULL; + if (*ptr == '\0' || *ptr++ != '/') + return NULL; + if (*ptr == '\0' || *ptr++ != '/') + return NULL; + } + /* skip [user[:passwd]@]host[:[port]] */ + + while (*ptr != '\0' && *ptr != ' ' && *ptr != '/') + ptr++; + + if (*ptr == '\0' || *ptr == ' ') + return NULL; + + /* OK, we got the '/' ! */ + return ptr; +} + /* Returns a 302 for a redirectable request that reaches a server working in * in redirect mode. This may only be called just after the stream interface * has moved to SI_ST_ASS. Unprocessable requests are left unchanged and will @@ -9731,6 +9771,78 @@ smp_fetch_capture_header_res(struct proxy *px, struct session *l4, void *l7, uns return 1; } +/* Extracts the METHOD in the HTTP request, the txn->uri should be filled before the call */ +static int +smp_fetch_capture_req_method(struct proxy *px, struct session *l4, void *l7, unsigned int opt, + const struct arg *args, struct sample *smp, const char *kw) +{ + struct chunk *temp; + struct http_txn *txn = l7; + char *spc; + int len; + + if (!txn->uri) + return 0; + + spc = strchr(txn->uri, ' '); /* first space before URI */ + if (likely(spc)) + len = spc - txn->uri; + else + len = strlen(txn->uri); + + temp = get_trash_chunk(); + len = MIN(len, temp->size - 1); + strncpy(temp->str, txn->uri, len); + temp->str[len] = '\0'; + + smp->data.str = *temp; + smp->data.str.len = len; + smp->type = SMP_T_STR; + + return 1; + +} + +/* Extracts the path in the HTTP request, the txn->uri should be filled before the call */ +static int +smp_fetch_capture_req_uri(struct proxy *px, struct session *l4, void *l7, unsigned int opt, + const struct arg *args, struct sample *smp, const char *kw) +{ + struct chunk *temp; + struct http_txn *txn = l7; + char *ptr; + char *ret; + + if (!txn->uri) + return 0; + ptr = txn->uri; + + while (*ptr != ' ' && *ptr != '\0') /* find first space */ + ptr++; + if (!*ptr) + return 0; + + ptr++; /* skip the space */ + + temp = get_trash_chunk(); + ret = encode_string(temp->str, temp->str + temp->size, '#', url_encode_map, ptr); + if (ret == NULL || *ret != '\0') + return 0; + ptr = temp->str = http_get_path_from_string(temp->str); + if (!ptr) + return 0; + while (*ptr != ' ' && *ptr != '\0') /* find space after URI */ + ptr++; + *ptr = '\0'; + + smp->data.str = *temp; + smp->data.str.len = strlen(smp->data.str.str); + smp->type = SMP_T_STR; + + return 1; +} + + /* Iterate over all cookies present in a message. The context is stored in * smp->ctx.a[0] for the in-header position, smp->ctx.a[1] for the * end-of-header-value, and smp->ctx.a[2] for the hdr_ctx. Depending on @@ -10289,6 +10401,9 @@ static struct sample_fetch_kw_list sample_fetch_keywords = {ILH, { { "base32", smp_fetch_base32, 0, NULL, SMP_T_UINT, SMP_USE_HRQHV }, { "base32+src", smp_fetch_base32_src, 0, NULL, SMP_T_BIN, SMP_USE_HRQHV }, + { "capture.req.uri", smp_fetch_capture_req_uri, 0, NULL, SMP_T_CSTR, SMP_USE_HRQHP }, + { "capture.req.method", smp_fetch_capture_req_method, 0, NULL, SMP_T_CSTR, SMP_USE_HRQHP }, + /* capture are allocated and are permanent in the session */ { "capture.req.hdr", smp_fetch_capture_header_req, ARG1(1, UINT), NULL, SMP_T_CSTR, SMP_USE_HRQHP }, { "capture.res.hdr", smp_fetch_capture_header_res, ARG1(1, UINT), NULL, SMP_T_CSTR, SMP_USE_HRSHP },