mirror of
https://git.haproxy.org/git/haproxy.git/
synced 2025-09-21 22:01:31 +02:00
MINOR: tools: add the ability to update a word fingerprint
Instead of making a new one from scratch, let's support not wiping the existing fingerprint and updating it, and to do the same char by char. The word-by-word one will still result in multiple beginnings and ends, but that will accurately translate word boundaries. The char-based one has more flexibility and requires that the caller maintains the previous char to indicate the transition, which also allows to insert delimiters for example.
This commit is contained in:
parent
b736458bfa
commit
e33c4b3c11
@ -865,6 +865,7 @@ int my_unsetenv(const char *name);
|
||||
char *env_expand(char *in);
|
||||
uint32_t parse_line(char *in, char *out, size_t *outlen, char **args, int *nbargs, uint32_t opts, char **errptr);
|
||||
size_t sanitize_for_printing(char *line, size_t pos, size_t width);
|
||||
void update_word_fingerprint(uint8_t *fp, const char *word);
|
||||
void make_word_fingerprint(uint8_t *fp, const char *word);
|
||||
int word_fingerprint_distance(const uint8_t *fp1, const uint8_t *fp2);
|
||||
|
||||
@ -1072,5 +1073,33 @@ static inline unsigned int statistical_prng()
|
||||
return statistical_prng_state = x;
|
||||
}
|
||||
|
||||
/* Update array <fp> with the character transition <prev> to <curr>. If <prev>
|
||||
* is zero, it's assumed that <curr> is the first character. If <curr> is zero
|
||||
* its assumed to mark the end. Both may be zero. <fp> is a 1024-entries array
|
||||
* indexed as 32*from+to. Positions for 'from' and 'to' are:
|
||||
* 0..25=letter, 26=digit, 27=other, 28=begin, 29=end, others unused.
|
||||
*/
|
||||
static inline void update_char_fingerprint(uint8_t *fp, char prev, char curr)
|
||||
{
|
||||
int from, to;
|
||||
|
||||
switch (prev) {
|
||||
case 0: from = 26; break; // begin
|
||||
case 'a'...'z': from = prev - 'a'; break;
|
||||
case 'A'...'Z': from = tolower(prev) - 'a'; break;
|
||||
case '0'...'9': from = 26; break;
|
||||
default: from = 27; break;
|
||||
}
|
||||
|
||||
switch (curr) {
|
||||
case 0: to = 28; break; // end
|
||||
case 'a'...'z': to = curr - 'a'; break;
|
||||
case 'A'...'Z': to = tolower(curr) - 'a'; break;
|
||||
case '0'...'9': to = 26; break;
|
||||
default: to = 27; break;
|
||||
}
|
||||
|
||||
fp[32 * from + to]++;
|
||||
}
|
||||
|
||||
#endif /* _HAPROXY_TOOLS_H */
|
||||
|
16
src/tools.c
16
src/tools.c
@ -5369,18 +5369,17 @@ size_t sanitize_for_printing(char *line, size_t pos, size_t width)
|
||||
return pos - shift;
|
||||
}
|
||||
|
||||
/* Initialize array <fp> with the fingerprint of word <word> by counting the
|
||||
/* Update array <fp> with the fingerprint of word <word> by counting the
|
||||
* transitions between characters. <fp> is a 1024-entries array indexed as
|
||||
* 32*from+to. Positions for 'from' and 'to' are:
|
||||
* 0..25=letter, 26=digit, 27=other, 28=begin, 29=end, others unused.
|
||||
*/
|
||||
void make_word_fingerprint(uint8_t *fp, const char *word)
|
||||
void update_word_fingerprint(uint8_t *fp, const char *word)
|
||||
{
|
||||
const char *p;
|
||||
int from, to;
|
||||
int c;
|
||||
|
||||
memset(fp, 0, 1024);
|
||||
from = 28; // begin
|
||||
for (p = word; *p; p++) {
|
||||
c = tolower(*p);
|
||||
@ -5397,6 +5396,17 @@ void make_word_fingerprint(uint8_t *fp, const char *word)
|
||||
fp[32 * from + to]++;
|
||||
}
|
||||
|
||||
/* Initialize array <fp> with the fingerprint of word <word> by counting the
|
||||
* transitions between characters. <fp> is a 1024-entries array indexed as
|
||||
* 32*from+to. Positions for 'from' and 'to' are:
|
||||
* 0..25=letter, 26=digit, 27=other, 28=begin, 29=end, others unused.
|
||||
*/
|
||||
void make_word_fingerprint(uint8_t *fp, const char *word)
|
||||
{
|
||||
memset(fp, 0, 1024);
|
||||
update_word_fingerprint(fp, word);
|
||||
}
|
||||
|
||||
/* Return the distance between two word fingerprints created by function
|
||||
* make_word_fingerprint(). It's a positive integer calculated as the sum of
|
||||
* the squares of the differences between each location.
|
||||
|
Loading…
x
Reference in New Issue
Block a user