mirror of
https://git.haproxy.org/git/haproxy.git/
synced 2025-12-09 19:51:36 +01:00
MINOR: tools: add support for ist to the word fingerprinting functions
The word fingerprinting functions are used to compare similar words to suggest a correctly spelled one that looks like what the user proposed. Currently the functions only support const char*, but there's no reason for this, and it would be convenient to support substrings extracted from random pieces of configurations. Here we're adding new variants "_with_len" that take these ISTs and which are in fact a slight change of the original ones that the old ones now rely on.
This commit is contained in:
parent
d37be30c88
commit
4adc1ef7ad
@ -1028,7 +1028,9 @@ int is_dir_present(const char *path_fmt, ...);
|
|||||||
uint32_t parse_line(char *in, char *out, size_t *outlen, char **args, int *nbargs, uint32_t opts, const char **errptr);
|
uint32_t parse_line(char *in, char *out, size_t *outlen, char **args, int *nbargs, uint32_t opts, const char **errptr);
|
||||||
ssize_t read_line_to_trash(const char *path_fmt, ...);
|
ssize_t read_line_to_trash(const char *path_fmt, ...);
|
||||||
size_t sanitize_for_printing(char *line, size_t pos, size_t width);
|
size_t sanitize_for_printing(char *line, size_t pos, size_t width);
|
||||||
|
void update_word_fingerprint_with_len(uint8_t *fp, struct ist word);
|
||||||
void update_word_fingerprint(uint8_t *fp, const char *word);
|
void update_word_fingerprint(uint8_t *fp, const char *word);
|
||||||
|
void make_word_fingerprint_with_len(uint8_t *fp, struct ist word);
|
||||||
void make_word_fingerprint(uint8_t *fp, const char *word);
|
void make_word_fingerprint(uint8_t *fp, const char *word);
|
||||||
int word_fingerprint_distance(const uint8_t *fp1, const uint8_t *fp2);
|
int word_fingerprint_distance(const uint8_t *fp1, const uint8_t *fp2);
|
||||||
|
|
||||||
|
|||||||
32
src/tools.c
32
src/tools.c
@ -6606,20 +6606,20 @@ size_t sanitize_for_printing(char *line, size_t pos, size_t width)
|
|||||||
return pos - shift;
|
return pos - shift;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Update array <fp> with the fingerprint of word <word> by counting the
|
/* Update array <fp> with the fingerprint of word <word> for up to <len> chars
|
||||||
* transitions between characters. <fp> is a 1024-entries array indexed as
|
* by counting the transitions between characters. <fp> is a 1024-entries array
|
||||||
* 32*from+to. Positions for 'from' and 'to' are:
|
* indexed as 32*from+to. Positions for 'from' and 'to' are:
|
||||||
* 1..26=letter, 27=digit, 28=other/begin/end.
|
* 1..26=letter, 27=digit, 28=other/begin/end.
|
||||||
* Row "from=0" is used to mark the character's presence. Others unused.
|
* Row "from=0" is used to mark the character's presence. Others unused.
|
||||||
*/
|
*/
|
||||||
void update_word_fingerprint(uint8_t *fp, const char *word)
|
void update_word_fingerprint_with_len(uint8_t *fp, struct ist word)
|
||||||
{
|
{
|
||||||
const char *p;
|
const char *p;
|
||||||
int from, to;
|
int from, to;
|
||||||
int c;
|
int c;
|
||||||
|
|
||||||
from = 28; // begin
|
from = 28; // begin
|
||||||
for (p = word; *p; p++) {
|
for (p = word.ptr; p < word.ptr + word.len; p++) {
|
||||||
c = tolower((unsigned char)*p);
|
c = tolower((unsigned char)*p);
|
||||||
switch(c) {
|
switch(c) {
|
||||||
case 'a'...'z': to = c - 'a' + 1; break;
|
case 'a'...'z': to = c - 'a' + 1; break;
|
||||||
@ -6635,6 +6635,17 @@ void update_word_fingerprint(uint8_t *fp, const char *word)
|
|||||||
fp[32 * from + to]++;
|
fp[32 * from + to]++;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* Update array <fp> with the fingerprint of word <word> by counting the
|
||||||
|
* transitions between characters. <fp> is a 1024-entries array indexed as
|
||||||
|
* 32*from+to. Positions for 'from' and 'to' are:
|
||||||
|
* 1..26=letter, 27=digit, 28=other/begin/end.
|
||||||
|
* Row "from=0" is used to mark the character's presence. Others unused.
|
||||||
|
*/
|
||||||
|
void update_word_fingerprint(uint8_t *fp, const char *word)
|
||||||
|
{
|
||||||
|
return update_word_fingerprint_with_len(fp, ist(word));
|
||||||
|
}
|
||||||
|
|
||||||
/* This function hashes a word, scramble is the anonymizing key, returns
|
/* This function hashes a word, scramble is the anonymizing key, returns
|
||||||
* the hashed word when the key (scramble) != 0, else returns the word.
|
* the hashed word when the key (scramble) != 0, else returns the word.
|
||||||
* This function can be called NB_L_HASH_WORD times in a row, don't call
|
* This function can be called NB_L_HASH_WORD times in a row, don't call
|
||||||
@ -6764,6 +6775,17 @@ void make_word_fingerprint(uint8_t *fp, const char *word)
|
|||||||
update_word_fingerprint(fp, word);
|
update_word_fingerprint(fp, word);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* Initialize array <fp> with the fingerprint of word <word> by counting the
|
||||||
|
* transitions between characters. <fp> is a 1024-entries array indexed as
|
||||||
|
* 32*from+to. Positions for 'from' and 'to' are:
|
||||||
|
* 0..25=letter, 26=digit, 27=other, 28=begin, 29=end, others unused.
|
||||||
|
*/
|
||||||
|
void make_word_fingerprint_with_len(uint8_t *fp, struct ist word)
|
||||||
|
{
|
||||||
|
memset(fp, 0, 1024);
|
||||||
|
update_word_fingerprint_with_len(fp, word);
|
||||||
|
}
|
||||||
|
|
||||||
/* Return the distance between two word fingerprints created by function
|
/* Return the distance between two word fingerprints created by function
|
||||||
* make_word_fingerprint(). It's a positive integer calculated as the sum of
|
* make_word_fingerprint(). It's a positive integer calculated as the sum of
|
||||||
* the differences between each location.
|
* the differences between each location.
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user