diff --git a/doc/configuration.txt b/doc/configuration.txt index 5f1b1cd14..9b833af5f 100644 --- a/doc/configuration.txt +++ b/doc/configuration.txt @@ -9959,6 +9959,17 @@ base64 transfer binary content in a way that can be reliably transferred (eg: an SSL ID can be copied in a header). +djb2([]) + Hashes a binary input sample into an unsigned 32-bit quantity using the DJB2 + hash function. Optionally, it is possible to apply a full avalanche hash + function to the output if the optional argument equals 1. This + converter uses the same functions as used by the various hash-based load + balancing algorithms, so it will provide exactly the same results. It is + mostly intended for debugging, but can be used as a stick-table entry to + collect rough statistics. It must not be used for security purposes as a + 32-bit hash is trivial to break. See also "sdbm", "wt6" and the "hash-type" + directive. + hex Converts a binary input sample to an hex string containing two hex digits per input byte. It is used to log or transfer hex dumps of some binary input data @@ -10089,6 +10100,17 @@ map__([,]) | `---------------------------- key `------------------------------------ leading spaces ignored +sdbm([]) + Hashes a binary input sample into an unsigned 32-bit quantity using the SDBM + hash function. Optionally, it is possible to apply a full avalanche hash + function to the output if the optional argument equals 1. This + converter uses the same functions as used by the various hash-based load + balancing algorithms, so it will provide exactly the same results. It is + mostly intended for debugging, but can be used as a stick-table entry to + collect rough statistics. It must not be used for security purposes as a + 32-bit hash is trivial to break. See also "djb2", "wt6" and the "hash-type" + directive. + table_bytes_in_rate() Uses the string representation of the input sample to perform a look up in the specified table. If the key is not found in the table, integer value zero @@ -10247,6 +10269,17 @@ utime([,]) # Eg: 20140710162350 127.0.0.1:57325 log-format %[date,utime(%Y%m%d%H%M%S)]\ %ci:%cp +wt6([]) + Hashes a binary input sample into an unsigned 32-bit quantity using the WT6 + hash function. Optionally, it is possible to apply a full avalanche hash + function to the output if the optional argument equals 1. This + converter uses the same functions as used by the various hash-based load + balancing algorithms, so it will provide exactly the same results. It is + mostly intended for debugging, but can be used as a stick-table entry to + collect rough statistics. It must not be used for security purposes as a + 32-bit hash is trivial to break. See also "djb2", "sdbm", and the "hash-type" + directive. + 7.3.2. Fetching samples from internal states -------------------------------------------- @@ -11408,7 +11441,9 @@ base32 : integer This returns a 32-bit hash of the value returned by the "base" fetch method above. This is useful to track per-URL activity on high traffic sites without having to store all URLs. Instead a shorter hash is stored, saving a lot of - memory. The output type is an unsigned integer. + memory. The output type is an unsigned integer. The hash function used is + SDBM with full avalanche on the output. Technically, base32 is exactly equal + to "base,sdbm(1)". base32+src : binary This returns the concatenation of the base32 fetch above and the src fetch diff --git a/src/sample.c b/src/sample.c index 34f4c1d00..6128649c2 100644 --- a/src/sample.c +++ b/src/sample.c @@ -18,6 +18,7 @@ #include #include +#include #include #include #include @@ -1240,6 +1241,16 @@ static int sample_conv_bin2hex(const struct arg *arg_p, struct sample *smp) return 1; } +/* hashes the binary input into a 32-bit unsigned int */ +static int sample_conv_djb2(const struct arg *arg_p, struct sample *smp) +{ + smp->data.uint = hash_djb2(smp->data.str.str, smp->data.str.len); + if (arg_p && arg_p->data.uint) + smp->data.uint = full_hash(smp->data.uint); + smp->type = SMP_T_UINT; + return 1; +} + static int sample_conv_str2lower(const struct arg *arg_p, struct sample *smp) { int i; @@ -1302,6 +1313,16 @@ static int sample_conv_ltime(const struct arg *args, struct sample *smp) return 1; } +/* hashes the binary input into a 32-bit unsigned int */ +static int sample_conv_sdbm(const struct arg *arg_p, struct sample *smp) +{ + smp->data.uint = hash_sdbm(smp->data.str.str, smp->data.str.len); + if (arg_p && arg_p->data.uint) + smp->data.uint = full_hash(smp->data.uint); + smp->type = SMP_T_UINT; + return 1; +} + /* takes an UINT value on input supposed to represent the time since EPOCH, * adds an optional offset found in args[1] and emits a string representing * the UTC date in the format specified in args[1] using strftime(). @@ -1322,6 +1343,16 @@ static int sample_conv_utime(const struct arg *args, struct sample *smp) return 1; } +/* hashes the binary input into a 32-bit unsigned int */ +static int sample_conv_wt6(const struct arg *arg_p, struct sample *smp) +{ + smp->data.uint = hash_wt6(smp->data.str.str, smp->data.str.len); + if (arg_p && arg_p->data.uint) + smp->data.uint = full_hash(smp->data.uint); + smp->type = SMP_T_UINT; + return 1; +} + /************************************************************************/ /* All supported sample fetch functions must be declared here */ /************************************************************************/ @@ -1426,6 +1457,9 @@ static struct sample_conv_kw_list sample_conv_kws = {ILH, { { "ipmask", sample_conv_ipmask, ARG1(1,MSK4), NULL, SMP_T_IPV4, SMP_T_IPV4 }, { "ltime", sample_conv_ltime, ARG2(1,STR,SINT), NULL, SMP_T_UINT, SMP_T_STR }, { "utime", sample_conv_utime, ARG2(1,STR,SINT), NULL, SMP_T_UINT, SMP_T_STR }, + { "djb2", sample_conv_djb2, ARG1(0,UINT), NULL, SMP_T_BIN, SMP_T_UINT }, + { "sdbm", sample_conv_sdbm, ARG1(0,UINT), NULL, SMP_T_BIN, SMP_T_UINT }, + { "wt6", sample_conv_wt6, ARG1(0,UINT), NULL, SMP_T_BIN, SMP_T_UINT }, { NULL, NULL, 0, 0, 0 }, }};