diff --git a/doc/configuration.txt b/doc/configuration.txt index aa4826fc2..8bf0506d5 100644 --- a/doc/configuration.txt +++ b/doc/configuration.txt @@ -13752,14 +13752,16 @@ concat([],[],[]) appended after the variable. It may also be omitted. Together, these elements allow to concatenate variables with delimiters to an existing set of variables. This can be used to build new variables made of a succession of - other variables, such as colon-delimited values. Note that due to the config - parser, it is not possible to use a comma nor a closing parenthesis as - delimiters. + other variables, such as colon-delimited values. If commas or closing + parethesis are needed as delimiters, they must be protected by quotes or + backslashes, themselves protected so that they are not stripped by the first + level parser. See examples below. Example: tcp-request session set-var(sess.src) src tcp-request session set-var(sess.dn) ssl_c_s_dn tcp-request session set-var(txn.sig) str(),concat(),concat() + tcp-request session set-var(txn.ipport) "str(),concat('addr=(',sess.ip),concat(',',sess.port,')')" http-request set-header x-hap-sig %[var(txn.sig)] cpl @@ -14139,18 +14141,22 @@ regsub(,[,]) regex case insensitive by adding the flag "i" in . Since is a string, it is made up from the concatenation of all desired flags. Thus if both "i" and "g" are desired, using "gi" or "ig" will have the same effect. - It is important to note that due to the current limitations of the - configuration parser, some characters such as closing parenthesis, closing - square brackets or comma are not possible to use in the arguments. The first - use of this converter is to replace certain characters or sequence of - characters with other ones. + The first use of this converter is to replace certain characters or sequence + of characters with other ones. + + It is highly recommended to enclose the regex part using protected quotes to + improve clarity and never have a closing parenthesis from the regex mixed up + with the parenthesis from the function. Just like in Bourne shell, the first + level of quotes is processed when delimiting word groups on the line, a + second level is usable for argument. It is recommended to use single quotes + outside since these ones do not try to resolve backslashes nor dollar signs. Example : # de-duplicate "/" in header "x-path". # input: x-path: /////a///b/c/xzxyz/ # output: x-path: /a/b/c/xzxyz/ - http-request set-header x-path %[hdr(x-path),regsub(/+,/,g)] + http-request set-header x-path "%[hdr(x-path),regsub('/+','/','g')]" capture-req() Capture the string entry in the request slot and returns the entry as diff --git a/src/arg.c b/src/arg.c index 927aaa4d0..3843a9b25 100644 --- a/src/arg.c +++ b/src/arg.c @@ -152,20 +152,68 @@ int make_arg_list(const char *in, int len, uint64_t mask, struct arg **argp, /* Note: empty arguments after a comma always exist. */ while (pos < nbarg) { unsigned int uint; + int squote = 0, dquote = 0; + char *out; - beg = in; - while (len && *in != ',' && *in && *in != ')') { - in++; - len--; + chunk_reset(&trash); + out = trash.area; + + while (len && *in && trash.data < trash.size - 1) { + if (*in == '"' && !squote) { /* double quote outside single quotes */ + if (dquote) + dquote = 0; + else + dquote = 1; + in++; len--; + continue; + } + else if (*in == '\'' && !dquote) { /* single quote outside double quotes */ + if (squote) + squote = 0; + else + squote = 1; + in++; len--; + continue; + } + else if (*in == '\\' && !squote && len != 1) { + /* '\', ', ' ', '"' support being escaped by '\' */ + if (len == 1 || in[1] == 0) + goto unquote_err; + + if (in[1] == '\\' || in[1] == ' ' || in[1] == '"' || in[1] == '\'') { + in++; len--; + *out++ = *in; + } + else if (in[1] == 'r') { + in++; len--; + *out++ = '\r'; + } + else if (in[1] == 'n') { + in++; len--; + *out++ = '\n'; + } + else if (in[1] == 't') { + in++; len--; + *out++ = '\t'; + } + else { + /* just a lone '\' */ + *out++ = *in; + } + in++; len--; + } + else { + if (!squote && !dquote && (*in == ',' || *in == ')')) { + /* end of argument */ + break; + } + /* verbatim copy */ + *out++ = *in++; + len--; + } + trash.data = out - trash.area; } - - /* we have a new argument between and (not included). - * For ease of handling, we copy it into a zero-terminated word. - * By default, the output argument will be the same type of the - * expected one. - */ - if (!chunk_strncpy(&trash, beg, in - beg)) - goto buffer_err; + trash.area[trash.data] = 0; arg->type = (mask >> (pos * ARGT_BITS)) & ARGT_MASK; @@ -362,4 +410,14 @@ int make_arg_list(const char *in, int len, uint64_t mask, struct arg **argp, memprintf(err_msg, "too small buffer size to store decoded argument %d, increase bufsize ?", pos + 1); goto err; + + unquote_err: + /* come here with the parsed part in : and the + * unparsable part in . + */ + trash.area[trash.data] = 0; + memprintf(err_msg, "failed to parse '%s' after '%s' as type '%s' at position %d", + in, trash.area, arg_type_names[(mask >> (pos * ARGT_BITS)) & ARGT_MASK], pos + 1); + goto err; + }