MEDIUM: sample: Extend functionality for field/word converters

Extend functionality of field/word converters, so it's possible
to extract field(s)/word(s) counting from the beginning/end and/or
extract multiple fields/words (including separators) eg.

str(f1_f2_f3__f5),field(2,_,2)  # f2_f3
str(f1_f2_f3__f5),field(2,_,0)  # f2_f3__f5
str(f1_f2_f3__f5),field(-2,_,3) # f2_f3_
str(f1_f2_f3__f5),field(-3,_,0) # f1_f2_f3

str(w1_w2_w3___w4),word(3,_,2)  # w3___w4
str(w1_w2_w3___w4),word(2,_,0)  # w2_w3___w4
str(w1_w2_w3___w4),word(-2,_,3) # w1_w2_w3
str(w1_w2_w3___w4),word(-3,_,0) # w1_w2

Change is backward compatible.
This commit is contained in:
Marcin Deranek 2018-04-16 14:30:46 +02:00 committed by Willy Tarreau
parent 9a4da683a6
commit 9631a28275
2 changed files with 125 additions and 41 deletions

View File

@ -12907,10 +12907,20 @@ even
Returns a boolean TRUE if the input value of type signed integer is even Returns a boolean TRUE if the input value of type signed integer is even
otherwise returns FALSE. It is functionally equivalent to "not,and(1),bool". otherwise returns FALSE. It is functionally equivalent to "not,and(1),bool".
field(<index>,<delimiters>) field(<index>,<delimiters>[,<count>])
Extracts the substring at the given index considering given delimiters from Extracts the substring at the given index counting from the beginning
an input string. Indexes start at 1 and delimiters are a string formatted (positive index) or from the end (negative index) considering given delimiters
list of chars. from an input string. Indexes start at 1 or -1 and delimiters are a string
formatted list of chars. Optionally you can specify <count> of fields to
extract (default: 1). Value of 0 indicates extraction of all remaining
fields.
Example :
str(f1_f2_f3__f5),field(5,_) # f5
str(f1_f2_f3__f5),field(2,_,0) # f2_f3__f5
str(f1_f2_f3__f5),field(2,_,2) # f2_f3
str(f1_f2_f3__f5),field(-2,_,3) # f2_f3_
str(f1_f2_f3__f5),field(-3,_,0) # f1_f2_f3
hex hex
Converts a binary input sample to a hex string containing two hex digits per Converts a binary input sample to a hex string containing two hex digits per
@ -13440,9 +13450,19 @@ utime(<format>[,<offset>])
# e.g. 20140710162350 127.0.0.1:57325 # e.g. 20140710162350 127.0.0.1:57325
log-format %[date,utime(%Y%m%d%H%M%S)]\ %ci:%cp log-format %[date,utime(%Y%m%d%H%M%S)]\ %ci:%cp
word(<index>,<delimiters>) word(<index>,<delimiters>[,<count>])
Extracts the nth word considering given delimiters from an input string. Extracts the nth word counting from the beginning (positive index) or from
Indexes start at 1 and delimiters are a string formatted list of chars. the end (negative index) considering given delimiters from an input string.
Indexes start at 1 or -1 and delimiters are a string formatted list of chars.
Optionally you can specify <count> of words to extract (default: 1).
Value of 0 indicates extraction of all remaining words.
Example :
str(f1_f2_f3__f5),word(4,_) # f5
str(f1_f2_f3__f5),word(2,_,0) # f2_f3__f5
str(f1_f2_f3__f5),word(3,_,2) # f3__f5
str(f1_f2_f3__f5),word(-2,_,3) # f1_f2_f3
str(f1_f2_f3__f5),word(-3,_,0) # f1_f2
wt6([<avalanche>]) wt6([<avalanche>])
Hashes a binary input sample into an unsigned 32-bit quantity using the WT6 Hashes a binary input sample into an unsigned 32-bit quantity using the WT6

View File

@ -1997,27 +1997,54 @@ static int sample_conv_field_check(struct arg *args, struct sample_conv *conv,
*/ */
static int sample_conv_field(const struct arg *arg_p, struct sample *smp, void *private) static int sample_conv_field(const struct arg *arg_p, struct sample *smp, void *private)
{ {
unsigned int field; int field;
char *start, *end; char *start, *end;
int i; int i;
int count = (arg_p[2].type == ARGT_SINT) ? arg_p[2].data.sint : 1;
if (!arg_p[0].data.sint) if (!arg_p[0].data.sint)
return 0; return 0;
field = 1; if (arg_p[0].data.sint < 0) {
end = start = smp->data.u.str.str; field = -1;
while (end - smp->data.u.str.str < smp->data.u.str.len) { end = start = smp->data.u.str.str + smp->data.u.str.len;
while (start > smp->data.u.str.str) {
for (i = 0 ; i < arg_p[1].data.str.len ; i++) { for (i = 0 ; i < arg_p[1].data.str.len ; i++) {
if (*end == arg_p[1].data.str.str[i]) { if (*(start-1) == arg_p[1].data.str.str[i]) {
if (field == arg_p[0].data.sint) if (field == arg_p[0].data.sint) {
goto found; if (count == 1)
start = end+1; goto found;
field++; else if (count > 1)
break; count--;
} else {
end = start-1;
field--;
}
break;
}
} }
start--;
}
} else {
field = 1;
end = start = smp->data.u.str.str;
while (end - smp->data.u.str.str < smp->data.u.str.len) {
for (i = 0 ; i < arg_p[1].data.str.len ; i++) {
if (*end == arg_p[1].data.str.str[i]) {
if (field == arg_p[0].data.sint) {
if (count == 1)
goto found;
else if (count > 1)
count--;
} else {
start = end+1;
field++;
}
break;
}
}
end++;
} }
end++;
} }
/* Field not found */ /* Field not found */
@ -2048,37 +2075,74 @@ static int sample_conv_field(const struct arg *arg_p, struct sample *smp, void *
*/ */
static int sample_conv_word(const struct arg *arg_p, struct sample *smp, void *private) static int sample_conv_word(const struct arg *arg_p, struct sample *smp, void *private)
{ {
unsigned int word; int word;
char *start, *end; char *start, *end;
int i, issep, inword; int i, issep, inword;
int count = (arg_p[2].type == ARGT_SINT) ? arg_p[2].data.sint : 1;
if (!arg_p[0].data.sint) if (!arg_p[0].data.sint)
return 0; return 0;
word = 0; word = 0;
inword = 0; inword = 0;
end = start = smp->data.u.str.str; if (arg_p[0].data.sint < 0) {
while (end - smp->data.u.str.str < smp->data.u.str.len) { end = start = smp->data.u.str.str + smp->data.u.str.len;
issep = 0; while (start > smp->data.u.str.str) {
for (i = 0 ; i < arg_p[1].data.str.len ; i++) { issep = 0;
if (*end == arg_p[1].data.str.str[i]) { for (i = 0 ; i < arg_p[1].data.str.len ; i++) {
issep = 1; if (*(start-1) == arg_p[1].data.str.str[i]) {
break; issep = 1;
break;
}
} }
} if (!inword) {
if (!inword) { if (!issep) {
if (!issep) { if (word != arg_p[0].data.sint) {
word++; word--;
start = end; end = start;
inword = 1; }
inword = 1;
}
} }
else if (issep) {
if (word == arg_p[0].data.sint)
if (count == 1)
goto found;
else if (count > 1)
count--;
inword = 0;
}
start--;
} }
else if (issep) { } else {
if (word == arg_p[0].data.sint) end = start = smp->data.u.str.str;
goto found; while (end - smp->data.u.str.str < smp->data.u.str.len) {
inword = 0; issep = 0;
for (i = 0 ; i < arg_p[1].data.str.len ; i++) {
if (*end == arg_p[1].data.str.str[i]) {
issep = 1;
break;
}
}
if (!inword) {
if (!issep) {
if (word != arg_p[0].data.sint) {
word++;
start = end;
}
inword = 1;
}
}
else if (issep) {
if (word == arg_p[0].data.sint)
if (count == 1)
goto found;
else if (count > 1)
count--;
inword = 0;
}
end++;
} }
end++;
} }
/* Field not found */ /* Field not found */
@ -2928,8 +2992,8 @@ static struct sample_conv_kw_list sample_conv_kws = {ILH, {
{ "xxh64", sample_conv_xxh64, ARG1(0,SINT), NULL, SMP_T_BIN, SMP_T_SINT }, { "xxh64", sample_conv_xxh64, ARG1(0,SINT), NULL, SMP_T_BIN, SMP_T_SINT },
{ "json", sample_conv_json, ARG1(1,STR), sample_conv_json_check, SMP_T_STR, SMP_T_STR }, { "json", sample_conv_json, ARG1(1,STR), sample_conv_json_check, SMP_T_STR, SMP_T_STR },
{ "bytes", sample_conv_bytes, ARG2(1,SINT,SINT), NULL, SMP_T_BIN, SMP_T_BIN }, { "bytes", sample_conv_bytes, ARG2(1,SINT,SINT), NULL, SMP_T_BIN, SMP_T_BIN },
{ "field", sample_conv_field, ARG2(2,SINT,STR), sample_conv_field_check, SMP_T_STR, SMP_T_STR }, { "field", sample_conv_field, ARG3(2,SINT,STR,SINT), sample_conv_field_check, SMP_T_STR, SMP_T_STR },
{ "word", sample_conv_word, ARG2(2,SINT,STR), sample_conv_field_check, SMP_T_STR, SMP_T_STR }, { "word", sample_conv_word, ARG3(2,SINT,STR,SINT), sample_conv_field_check, SMP_T_STR, SMP_T_STR },
{ "regsub", sample_conv_regsub, ARG3(2,REG,STR,STR), sample_conv_regsub_check, SMP_T_STR, SMP_T_STR }, { "regsub", sample_conv_regsub, ARG3(2,REG,STR,STR), sample_conv_regsub_check, SMP_T_STR, SMP_T_STR },
{ "sha1", sample_conv_sha1, 0, NULL, SMP_T_BIN, SMP_T_BIN }, { "sha1", sample_conv_sha1, 0, NULL, SMP_T_BIN, SMP_T_BIN },
{ "concat", sample_conv_concat, ARG3(1,STR,STR,STR), smp_check_concat, SMP_T_STR, SMP_T_STR }, { "concat", sample_conv_concat, ARG3(1,STR,STR,STR), smp_check_concat, SMP_T_STR, SMP_T_STR },