From e33c4b3c11f80a1252fbb17e0a3147d74888b61d Mon Sep 17 00:00:00 2001 From: Willy Tarreau Date: Fri, 12 Mar 2021 18:59:31 +0100 Subject: [PATCH] MINOR: tools: add the ability to update a word fingerprint Instead of making a new one from scratch, let's support not wiping the existing fingerprint and updating it, and to do the same char by char. The word-by-word one will still result in multiple beginnings and ends, but that will accurately translate word boundaries. The char-based one has more flexibility and requires that the caller maintains the previous char to indicate the transition, which also allows to insert delimiters for example. --- include/haproxy/tools.h | 29 +++++++++++++++++++++++++++++ src/tools.c | 16 +++++++++++++--- 2 files changed, 42 insertions(+), 3 deletions(-) diff --git a/include/haproxy/tools.h b/include/haproxy/tools.h index 56948ea6c..901dca0bb 100644 --- a/include/haproxy/tools.h +++ b/include/haproxy/tools.h @@ -865,6 +865,7 @@ int my_unsetenv(const char *name); char *env_expand(char *in); uint32_t parse_line(char *in, char *out, size_t *outlen, char **args, int *nbargs, uint32_t opts, char **errptr); size_t sanitize_for_printing(char *line, size_t pos, size_t width); +void update_word_fingerprint(uint8_t *fp, const char *word); void make_word_fingerprint(uint8_t *fp, const char *word); int word_fingerprint_distance(const uint8_t *fp1, const uint8_t *fp2); @@ -1072,5 +1073,33 @@ static inline unsigned int statistical_prng() return statistical_prng_state = x; } +/* Update array with the character transition to . If + * is zero, it's assumed that is the first character. If is zero + * its assumed to mark the end. Both may be zero. is a 1024-entries array + * indexed as 32*from+to. Positions for 'from' and 'to' are: + * 0..25=letter, 26=digit, 27=other, 28=begin, 29=end, others unused. + */ +static inline void update_char_fingerprint(uint8_t *fp, char prev, char curr) +{ + int from, to; + + switch (prev) { + case 0: from = 26; break; // begin + case 'a'...'z': from = prev - 'a'; break; + case 'A'...'Z': from = tolower(prev) - 'a'; break; + case '0'...'9': from = 26; break; + default: from = 27; break; + } + + switch (curr) { + case 0: to = 28; break; // end + case 'a'...'z': to = curr - 'a'; break; + case 'A'...'Z': to = tolower(curr) - 'a'; break; + case '0'...'9': to = 26; break; + default: to = 27; break; + } + + fp[32 * from + to]++; +} #endif /* _HAPROXY_TOOLS_H */ diff --git a/src/tools.c b/src/tools.c index 0fd3edead..1255e748b 100644 --- a/src/tools.c +++ b/src/tools.c @@ -5369,18 +5369,17 @@ size_t sanitize_for_printing(char *line, size_t pos, size_t width) return pos - shift; } -/* Initialize array with the fingerprint of word by counting the +/* Update array with the fingerprint of word by counting the * transitions between characters. is a 1024-entries array indexed as * 32*from+to. Positions for 'from' and 'to' are: * 0..25=letter, 26=digit, 27=other, 28=begin, 29=end, others unused. */ -void make_word_fingerprint(uint8_t *fp, const char *word) +void update_word_fingerprint(uint8_t *fp, const char *word) { const char *p; int from, to; int c; - memset(fp, 0, 1024); from = 28; // begin for (p = word; *p; p++) { c = tolower(*p); @@ -5397,6 +5396,17 @@ void make_word_fingerprint(uint8_t *fp, const char *word) fp[32 * from + to]++; } +/* Initialize array with the fingerprint of word by counting the + * transitions between characters. is a 1024-entries array indexed as + * 32*from+to. Positions for 'from' and 'to' are: + * 0..25=letter, 26=digit, 27=other, 28=begin, 29=end, others unused. + */ +void make_word_fingerprint(uint8_t *fp, const char *word) +{ + memset(fp, 0, 1024); + update_word_fingerprint(fp, word); +} + /* Return the distance between two word fingerprints created by function * make_word_fingerprint(). It's a positive integer calculated as the sum of * the squares of the differences between each location.