diff --git a/doc/configuration.txt b/doc/configuration.txt index b6b3f088f..f72339a04 100644 --- a/doc/configuration.txt +++ b/doc/configuration.txt @@ -493,6 +493,7 @@ The following keywords are supported in the "global" section : - tune.maxaccept - tune.maxpollevents - tune.maxrewrite + - tune.pattern.cache-size - tune.pipesize - tune.rcvbuf.client - tune.rcvbuf.server @@ -1050,6 +1051,25 @@ tune.maxrewrite larger than that. This means you don't have to worry about it when changing bufsize. +tune.pattern.cache-size + Sets the size of the pattern lookup cache to entries. This is an LRU + cache which reminds previous lookups and their results. It is used by ACLs + and maps on slow pattern lookups, namely the ones using the "sub", "reg", + "dir", "dom", "end", "bin" match methods as well as the case-insensitive + strings. It applies to pattern expressions which means that it will be able + to memorize the result of a lookup among all the patterns specified on a + configuration line (including all those loaded from files). It automatically + invalidates entries which are updated using HTTP actions or on the CLI. The + default cache size is set to 10000 entries, which limits its footprint to + about 5 MB on 32-bit systems and 8 MB on 64-bit systems. There is a very low + risk of collision in this cache, which is in the order of the size of the + cache divided by 2^64. Typically, at 10000 requests per second with the + default cache size of 10000 entries, there's 1% chance that a brute force + attack could cause a single collision after 60 years, or 0.1% after 6 years. + This is considered much lower than the risk of a memory corruption caused by + aging components. If this is not acceptable, the cache can be disabled by + setting this parameter to 0. + tune.pipesize Sets the kernel pipe buffer size to this size (in bytes). By default, pipes are the default size for the system. But sometimes when using TCP splicing, diff --git a/include/common/defaults.h b/include/common/defaults.h index 63b2b89ea..6193bdc73 100644 --- a/include/common/defaults.h +++ b/include/common/defaults.h @@ -295,4 +295,15 @@ #ifndef TLS_TICKETS_NO #define TLS_TICKETS_NO 3 #endif + +/* pattern lookup default cache size, in number of entries : + * 10k entries at 10k req/s mean 1% risk of a collision after 60 years, that's + * already much less than the memory's reliability in most machines and more + * durable than most admin's life expectancy. A collision will result in a + * valid result to be returned for a different entry from the same list. + */ +#ifndef DEFAULT_PAT_LRU_SIZE +#define DEFAULT_PAT_LRU_SIZE 10000 +#endif + #endif /* _COMMON_DEFAULTS_H */ diff --git a/include/types/global.h b/include/types/global.h index afd7aef11..7533bb026 100644 --- a/include/types/global.h +++ b/include/types/global.h @@ -142,6 +142,7 @@ struct global { int pipesize; /* pipe size in bytes, system defaults if zero */ int max_http_hdr; /* max number of HTTP headers, use MAX_HTTP_HDR if zero */ int cookie_len; /* max length of cookie captures */ + int pattern_cache; /* max number of entries in the pattern cache. */ int sslcachesize; /* SSL cache size in session, defaults to 20000 */ #ifdef USE_OPENSSL int sslprivatecache; /* Force to use a private session cache even if nbproc > 1 */ diff --git a/src/cfgparse.c b/src/cfgparse.c index edaee56a0..4f75256de 100644 --- a/src/cfgparse.c +++ b/src/cfgparse.c @@ -907,6 +907,22 @@ int cfg_parse_global(const char *file, int linenum, char **args, int kwm) goto out; } } + else if (!strcmp(args[0], "tune.pattern.cache-size")) { + if (*args[1]) { + global.tune.pattern_cache = atoi(args[1]); + if (global.tune.pattern_cache < 0) { + Alert("parsing [%s:%d] : '%s' expects a positive numeric value\n", + file, linenum, args[0]); + err_code |= ERR_ALERT | ERR_FATAL; + goto out; + } + } else { + Alert("parsing [%s:%d] : '%s' expects a positive numeric value\n", + file, linenum, args[0]); + err_code |= ERR_ALERT | ERR_FATAL; + goto out; + } + } else if (!strcmp(args[0], "uid")) { if (global.uid != 0) { Alert("parsing [%s:%d] : user/uid already specified. Continuing.\n", file, linenum); diff --git a/src/haproxy.c b/src/haproxy.c index 474179cf1..5822a8bfc 100644 --- a/src/haproxy.c +++ b/src/haproxy.c @@ -146,6 +146,7 @@ struct global global = { .maxrewrite = MAXREWRITE, .chksize = BUFSIZE, .reserved_bufs = RESERVED_BUFS, + .pattern_cache = DEFAULT_PAT_LRU_SIZE, #ifdef USE_OPENSSL .sslcachesize = SSLCACHESIZE, .ssl_default_dh_param = SSL_DEFAULT_DH_PARAM, diff --git a/src/pattern.c b/src/pattern.c index ebae85db0..cbfa20db1 100644 --- a/src/pattern.c +++ b/src/pattern.c @@ -24,6 +24,8 @@ #include #include +#include +#include char *pat_match_names[PAT_MATCH_NUM] = { [PAT_MATCH_FOUND] = "found", @@ -144,6 +146,9 @@ static struct pattern static_pattern; /* This is the root of the list of all pattern_ref avalaibles. */ struct list pattern_reference = LIST_HEAD_INIT(pattern_reference); +static struct lru64_head *pat_lru_tree; +static unsigned long long pat_lru_seed; + /* * * The following functions are not exported and are used by internals process @@ -443,6 +448,8 @@ struct pattern *pat_match_str(struct sample *smp, struct pattern_expr *expr, int struct pattern_tree *elt; struct pattern_list *lst; struct pattern *pattern; + struct pattern *ret = NULL; + struct lru64 *lru = NULL; /* Lookup a string in the expression's pattern tree. */ if (!eb_is_empty(&expr->pattern_tree)) { @@ -468,6 +475,15 @@ struct pattern *pat_match_str(struct sample *smp, struct pattern_expr *expr, int } /* look in the list */ + if (pat_lru_tree) { + unsigned long long seed = pat_lru_seed ^ (unsigned long long)expr; + + lru = lru64_get(XXH64(smp->data.str.str, smp->data.str.len, seed), + pat_lru_tree, expr, expr->revision); + if (lru && lru->domain) + return lru->data; + } + list_for_each_entry(lst, &expr->patterns, list) { pattern = &lst->pat; @@ -476,11 +492,16 @@ struct pattern *pat_match_str(struct sample *smp, struct pattern_expr *expr, int icase = expr->mflags & PAT_MF_IGNORE_CASE; if ((icase && strncasecmp(pattern->ptr.str, smp->data.str.str, smp->data.str.len) == 0) || - (!icase && strncmp(pattern->ptr.str, smp->data.str.str, smp->data.str.len) == 0)) - return pattern; + (!icase && strncmp(pattern->ptr.str, smp->data.str.str, smp->data.str.len) == 0)) { + ret = pattern; + break; + } } - return NULL; + if (lru) + lru64_commit(lru, ret, expr, expr->revision); + + return ret; } /* NB: For two binaries buf to be identical, it is required that their lengths match */ @@ -488,19 +509,34 @@ struct pattern *pat_match_bin(struct sample *smp, struct pattern_expr *expr, int { struct pattern_list *lst; struct pattern *pattern; + struct pattern *ret = NULL; + struct lru64 *lru = NULL; + + if (pat_lru_tree) { + unsigned long long seed = pat_lru_seed ^ (unsigned long long)expr; + + lru = lru64_get(XXH64(smp->data.str.str, smp->data.str.len, seed), + pat_lru_tree, expr, expr->revision); + if (lru && lru->domain) + return lru->data; + } - /* Look in the list. */ list_for_each_entry(lst, &expr->patterns, list) { pattern = &lst->pat; if (pattern->len != smp->data.str.len) continue; - if (memcmp(pattern->ptr.str, smp->data.str.str, smp->data.str.len) == 0) - return pattern; + if (memcmp(pattern->ptr.str, smp->data.str.str, smp->data.str.len) == 0) { + ret = pattern; + break; + } } - return NULL; + if (lru) + lru64_commit(lru, ret, expr, expr->revision); + + return ret; } /* Executes a regex. It temporarily changes the data to add a trailing zero, @@ -510,15 +546,31 @@ struct pattern *pat_match_reg(struct sample *smp, struct pattern_expr *expr, int { struct pattern_list *lst; struct pattern *pattern; + struct pattern *ret = NULL; + struct lru64 *lru = NULL; + + if (pat_lru_tree) { + unsigned long long seed = pat_lru_seed ^ (unsigned long long)expr; + + lru = lru64_get(XXH64(smp->data.str.str, smp->data.str.len, seed), + pat_lru_tree, expr, expr->revision); + if (lru && lru->domain) + return lru->data; + } - /* look in the list */ list_for_each_entry(lst, &expr->patterns, list) { pattern = &lst->pat; - if (regex_exec2(pattern->ptr.reg, smp->data.str.str, smp->data.str.len)) - return pattern; + if (regex_exec2(pattern->ptr.reg, smp->data.str.str, smp->data.str.len)) { + ret = pattern; + break; + } } - return NULL; + + if (lru) + lru64_commit(lru, ret, expr, expr->revision); + + return ret; } /* Checks that the pattern matches the beginning of the tested string. */ @@ -530,6 +582,8 @@ struct pattern *pat_match_beg(struct sample *smp, struct pattern_expr *expr, int struct pattern_tree *elt; struct pattern_list *lst; struct pattern *pattern; + struct pattern *ret = NULL; + struct lru64 *lru = NULL; /* Lookup a string in the expression's pattern tree. */ if (!eb_is_empty(&expr->pattern_tree)) { @@ -555,6 +609,15 @@ struct pattern *pat_match_beg(struct sample *smp, struct pattern_expr *expr, int } /* look in the list */ + if (pat_lru_tree) { + unsigned long long seed = pat_lru_seed ^ (unsigned long long)expr; + + lru = lru64_get(XXH64(smp->data.str.str, smp->data.str.len, seed), + pat_lru_tree, expr, expr->revision); + if (lru && lru->domain) + return lru->data; + } + list_for_each_entry(lst, &expr->patterns, list) { pattern = &lst->pat; @@ -566,9 +629,14 @@ struct pattern *pat_match_beg(struct sample *smp, struct pattern_expr *expr, int (!icase && strncmp(pattern->ptr.str, smp->data.str.str, pattern->len) != 0)) continue; - return pattern; + ret = pattern; + break; } - return NULL; + + if (lru) + lru64_commit(lru, ret, expr, expr->revision); + + return ret; } /* Checks that the pattern matches the end of the tested string. */ @@ -577,6 +645,17 @@ struct pattern *pat_match_end(struct sample *smp, struct pattern_expr *expr, int int icase; struct pattern_list *lst; struct pattern *pattern; + struct pattern *ret = NULL; + struct lru64 *lru = NULL; + + if (pat_lru_tree) { + unsigned long long seed = pat_lru_seed ^ (unsigned long long)expr; + + lru = lru64_get(XXH64(smp->data.str.str, smp->data.str.len, seed), + pat_lru_tree, expr, expr->revision); + if (lru && lru->domain) + return lru->data; + } list_for_each_entry(lst, &expr->patterns, list) { pattern = &lst->pat; @@ -589,9 +668,14 @@ struct pattern *pat_match_end(struct sample *smp, struct pattern_expr *expr, int (!icase && strncmp(pattern->ptr.str, smp->data.str.str + smp->data.str.len - pattern->len, pattern->len) != 0)) continue; - return pattern; + ret = pattern; + break; } - return NULL; + + if (lru) + lru64_commit(lru, ret, expr, expr->revision); + + return ret; } /* Checks that the pattern is included inside the tested string. @@ -604,6 +688,17 @@ struct pattern *pat_match_sub(struct sample *smp, struct pattern_expr *expr, int char *c; struct pattern_list *lst; struct pattern *pattern; + struct pattern *ret = NULL; + struct lru64 *lru = NULL; + + if (pat_lru_tree) { + unsigned long long seed = pat_lru_seed ^ (unsigned long long)expr; + + lru = lru64_get(XXH64(smp->data.str.str, smp->data.str.len, seed), + pat_lru_tree, expr, expr->revision); + if (lru && lru->domain) + return lru->data; + } list_for_each_entry(lst, &expr->patterns, list) { pattern = &lst->pat; @@ -617,19 +712,27 @@ struct pattern *pat_match_sub(struct sample *smp, struct pattern_expr *expr, int for (c = smp->data.str.str; c <= end; c++) { if (tolower(*c) != tolower(*pattern->ptr.str)) continue; - if (strncasecmp(pattern->ptr.str, c, pattern->len) == 0) - return pattern; + if (strncasecmp(pattern->ptr.str, c, pattern->len) == 0) { + ret = pattern; + goto leave; + } } } else { for (c = smp->data.str.str; c <= end; c++) { if (*c != *pattern->ptr.str) continue; - if (strncmp(pattern->ptr.str, c, pattern->len) == 0) - return pattern; + if (strncmp(pattern->ptr.str, c, pattern->len) == 0) { + ret = pattern; + goto leave; + } } } } - return NULL; + leave: + if (lru) + lru64_commit(lru, ret, expr, expr->revision); + + return ret; } /* This one is used by other real functions. It checks that the pattern is @@ -2321,6 +2424,10 @@ void pattern_finalize_config(void) struct pat_ref *ref, *ref2, *ref3; struct list pr = LIST_HEAD_INIT(pr); + pat_lru_seed = random(); + if (global.tune.pattern_cache) + pat_lru_tree = lru64_new(global.tune.pattern_cache); + list_for_each_entry(ref, &pattern_reference, list) { if (ref->unique_id == -1) { /* Look for the first free id. */