mirror of
				https://git.haproxy.org/git/haproxy.git/
				synced 2025-10-31 16:41:01 +01:00 
			
		
		
		
	This function (and its sister regex_exec_match2()) abstract the regex execution but make it impossible to pass flags to the regex engine. Currently we don't use them but we'll need to support REG_NOTBOL soon (to indicate that we're not at the beginning of a line). So let's add support for this flag and update the API accordingly.
		
			
				
	
	
		
			335 lines
		
	
	
		
			7.9 KiB
		
	
	
	
		
			C
		
	
	
	
	
	
			
		
		
	
	
			335 lines
		
	
	
		
			7.9 KiB
		
	
	
	
		
			C
		
	
	
	
	
	
| /*
 | |
|  * Regex and string management functions.
 | |
|  *
 | |
|  * Copyright 2000-2010 Willy Tarreau <w@1wt.eu>
 | |
|  *
 | |
|  * This program is free software; you can redistribute it and/or
 | |
|  * modify it under the terms of the GNU General Public License
 | |
|  * as published by the Free Software Foundation; either version
 | |
|  * 2 of the License, or (at your option) any later version.
 | |
|  *
 | |
|  */
 | |
| 
 | |
| #include <ctype.h>
 | |
| #include <stdlib.h>
 | |
| #include <string.h>
 | |
| 
 | |
| #include <common/config.h>
 | |
| #include <common/defaults.h>
 | |
| #include <common/regex.h>
 | |
| #include <common/standard.h>
 | |
| #include <proto/log.h>
 | |
| 
 | |
| /* regex trash buffer used by various regex tests */
 | |
| regmatch_t pmatch[MAX_MATCH];  /* rm_so, rm_eo for regular expressions */
 | |
| 
 | |
| int exp_replace(char *dst, unsigned int dst_size, char *src, const char *str, const regmatch_t *matches)
 | |
| {
 | |
| 	char *old_dst = dst;
 | |
| 	char* dst_end = dst + dst_size;
 | |
| 
 | |
| 	while (*str) {
 | |
| 		if (*str == '\\') {
 | |
| 			str++;
 | |
| 			if (!*str)
 | |
| 				return -1;
 | |
| 
 | |
| 			if (isdigit((unsigned char)*str)) {
 | |
| 				int len, num;
 | |
| 
 | |
| 				num = *str - '0';
 | |
| 				str++;
 | |
| 
 | |
| 				if (matches[num].rm_eo > -1 && matches[num].rm_so > -1) {
 | |
| 					len = matches[num].rm_eo - matches[num].rm_so;
 | |
| 
 | |
| 					if (dst + len >= dst_end)
 | |
| 						return -1;
 | |
| 
 | |
| 					memcpy(dst, src + matches[num].rm_so, len);
 | |
| 					dst += len;
 | |
| 				}
 | |
| 		
 | |
| 			} else if (*str == 'x') {
 | |
| 				unsigned char hex1, hex2;
 | |
| 				str++;
 | |
| 
 | |
| 				if (!*str)
 | |
| 					return -1;
 | |
| 
 | |
| 				hex1 = toupper(*str++) - '0';
 | |
| 
 | |
| 				if (!*str)
 | |
| 					return -1;
 | |
| 
 | |
| 				hex2 = toupper(*str++) - '0';
 | |
| 
 | |
| 				if (hex1 > 9) hex1 -= 'A' - '9' - 1;
 | |
| 				if (hex2 > 9) hex2 -= 'A' - '9' - 1;
 | |
| 
 | |
| 				if (dst >= dst_end)
 | |
| 					return -1;
 | |
| 
 | |
| 				*dst++ = (hex1<<4) + hex2;
 | |
| 			} else {
 | |
| 				if (dst >= dst_end)
 | |
| 					return -1;
 | |
| 
 | |
| 				*dst++ = *str++;
 | |
| 			}
 | |
| 		} else {
 | |
| 			if (dst >= dst_end)
 | |
| 				return -1;
 | |
| 
 | |
| 			*dst++ = *str++;
 | |
| 		}
 | |
| 	}
 | |
| 	if (dst >= dst_end)
 | |
| 		return -1;
 | |
| 
 | |
| 	*dst = '\0';
 | |
| 	return dst - old_dst;
 | |
| }
 | |
| 
 | |
| /* returns NULL if the replacement string <str> is valid, or the pointer to the first error */
 | |
| const char *check_replace_string(const char *str)
 | |
| {
 | |
| 	const char *err = NULL;
 | |
| 	while (*str) {
 | |
| 		if (*str == '\\') {
 | |
| 			err = str; /* in case of a backslash, we return the pointer to it */
 | |
| 			str++;
 | |
| 			if (!*str)
 | |
| 				return err;
 | |
| 			else if (isdigit((unsigned char)*str))
 | |
| 				err = NULL;
 | |
| 			else if (*str == 'x') {
 | |
| 				str++;
 | |
| 				if (!ishex(*str))
 | |
| 					return err;
 | |
| 				str++;
 | |
| 				if (!ishex(*str))
 | |
| 					return err;
 | |
| 				err = NULL;
 | |
| 			}
 | |
| 			else {
 | |
| 				Warning("'\\%c' : deprecated use of a backslash before something not '\\','x' or a digit.\n", *str);
 | |
| 				err = NULL;
 | |
| 			}
 | |
| 		}
 | |
| 		str++;
 | |
| 	}
 | |
| 	return err;
 | |
| }
 | |
| 
 | |
| 
 | |
| /* returns the pointer to an error in the replacement string, or NULL if OK */
 | |
| const char *chain_regex(struct hdr_exp **head, struct my_regex *preg,
 | |
| 			int action, const char *replace, void *cond)
 | |
| {
 | |
| 	struct hdr_exp *exp;
 | |
| 
 | |
| 	if (replace != NULL) {
 | |
| 		const char *err;
 | |
| 		err = check_replace_string(replace);
 | |
| 		if (err)
 | |
| 			return err;
 | |
| 	}
 | |
| 
 | |
| 	while (*head != NULL)
 | |
| 		head = &(*head)->next;
 | |
| 
 | |
| 	exp = calloc(1, sizeof(struct hdr_exp));
 | |
| 
 | |
| 	exp->preg = preg;
 | |
| 	exp->replace = replace;
 | |
| 	exp->action = action;
 | |
| 	exp->cond = cond;
 | |
| 	*head = exp;
 | |
| 
 | |
| 	return NULL;
 | |
| }
 | |
| 
 | |
| /* This function apply regex. It take const null terminated char as input.
 | |
|  * If the function doesn't match, it returns false, else it returns true.
 | |
|  * When it is compiled with JIT, this function execute strlen on the subject.
 | |
|  * Currently the only supported flag is REG_NOTBOL.
 | |
|  */
 | |
| int regex_exec_match(const struct my_regex *preg, const char *subject,
 | |
|                      size_t nmatch, regmatch_t pmatch[], int flags) {
 | |
| #if defined(USE_PCRE) || defined(USE_PCRE_JIT)
 | |
| 	int ret;
 | |
| 	int matches[MAX_MATCH * 3];
 | |
| 	int enmatch;
 | |
| 	int i;
 | |
| 	int options;
 | |
| 
 | |
| 	/* Silently limit the number of allowed matches. max
 | |
| 	 * match i the maximum value for match, in fact this
 | |
| 	 * limit is not applyied.
 | |
| 	 */
 | |
| 	enmatch = nmatch;
 | |
| 	if (enmatch > MAX_MATCH)
 | |
| 		enmatch = MAX_MATCH;
 | |
| 
 | |
| 	options = 0;
 | |
| 	if (flags & REG_NOTBOL)
 | |
| 		options |= PCRE_NOTBOL;
 | |
| 
 | |
| 	/* The value returned by pcre_exec() is one more than the highest numbered
 | |
| 	 * pair that has been set. For example, if two substrings have been captured,
 | |
| 	 * the returned value is 3. If there are no capturing subpatterns, the return
 | |
| 	 * value from a successful match is 1, indicating that just the first pair of
 | |
| 	 * offsets has been set.
 | |
| 	 *
 | |
| 	 * It seems that this function returns 0 if it detect more matches than avalaible
 | |
| 	 * space in the matches array.
 | |
| 	 */
 | |
| 	ret = pcre_exec(preg->reg, preg->extra, subject, strlen(subject), 0, options, matches, enmatch * 3);
 | |
| 	if (ret < 0)
 | |
| 		return 0;
 | |
| 
 | |
| 	if (ret == 0)
 | |
| 		ret = enmatch;
 | |
| 
 | |
| 	for (i=0; i<nmatch; i++) {
 | |
| 		/* Copy offset. */
 | |
| 		if (i < ret) {
 | |
| 			pmatch[i].rm_so = matches[(i*2)];
 | |
| 			pmatch[i].rm_eo = matches[(i*2)+1];
 | |
| 			continue;
 | |
| 		}
 | |
| 		/* Set the unmatvh flag (-1). */
 | |
| 		pmatch[i].rm_so = -1;
 | |
| 		pmatch[i].rm_eo = -1;
 | |
| 	}
 | |
| 	return 1;
 | |
| #else
 | |
| 	int match;
 | |
| 
 | |
| 	flags &= REG_NOTBOL;
 | |
| 	match = regexec(&preg->regex, subject, nmatch, pmatch, flags);
 | |
| 	if (match == REG_NOMATCH)
 | |
| 		return 0;
 | |
| 	return 1;
 | |
| #endif
 | |
| }
 | |
| 
 | |
| /* This function apply regex. It take a "char *" ans length as input. The
 | |
|  * <subject> can be modified during the processing. If the function doesn't
 | |
|  * match, it returns false, else it returns true.
 | |
|  * When it is compiled with standard POSIX regex or PCRE, this function add
 | |
|  * a temporary null chracters at the end of the <subject>. The <subject> must
 | |
|  * have a real length of <length> + 1. Currently the only supported flag is
 | |
|  * REG_NOTBOL.
 | |
|  */
 | |
| int regex_exec_match2(const struct my_regex *preg, char *subject, int length,
 | |
|                       size_t nmatch, regmatch_t pmatch[], int flags) {
 | |
| #if defined(USE_PCRE) || defined(USE_PCRE_JIT)
 | |
| 	int ret;
 | |
| 	int matches[MAX_MATCH * 3];
 | |
| 	int enmatch;
 | |
| 	int i;
 | |
| 	int options;
 | |
| 
 | |
| 	/* Silently limit the number of allowed matches. max
 | |
| 	 * match i the maximum value for match, in fact this
 | |
| 	 * limit is not applyied.
 | |
| 	 */
 | |
| 	enmatch = nmatch;
 | |
| 	if (enmatch > MAX_MATCH)
 | |
| 		enmatch = MAX_MATCH;
 | |
| 
 | |
| 	options = 0;
 | |
| 	if (flags & REG_NOTBOL)
 | |
| 		options |= PCRE_NOTBOL;
 | |
| 
 | |
| 	/* The value returned by pcre_exec() is one more than the highest numbered
 | |
| 	 * pair that has been set. For example, if two substrings have been captured,
 | |
| 	 * the returned value is 3. If there are no capturing subpatterns, the return
 | |
| 	 * value from a successful match is 1, indicating that just the first pair of
 | |
| 	 * offsets has been set.
 | |
| 	 *
 | |
| 	 * It seems that this function returns 0 if it detect more matches than avalaible
 | |
| 	 * space in the matches array.
 | |
| 	 */
 | |
| 	ret = pcre_exec(preg->reg, preg->extra, subject, length, 0, options, matches, enmatch * 3);
 | |
| 	if (ret < 0)
 | |
| 		return 0;
 | |
| 
 | |
| 	if (ret == 0)
 | |
| 		ret = enmatch;
 | |
| 
 | |
| 	for (i=0; i<nmatch; i++) {
 | |
| 		/* Copy offset. */
 | |
| 		if (i < ret) {
 | |
| 			pmatch[i].rm_so = matches[(i*2)];
 | |
| 			pmatch[i].rm_eo = matches[(i*2)+1];
 | |
| 			continue;
 | |
| 		}
 | |
| 		/* Set the unmatvh flag (-1). */
 | |
| 		pmatch[i].rm_so = -1;
 | |
| 		pmatch[i].rm_eo = -1;
 | |
| 	}
 | |
| 	return 1;
 | |
| #else
 | |
| 	char old_char = subject[length];
 | |
| 	int match;
 | |
| 
 | |
| 	flags &= REG_NOTBOL;
 | |
| 	subject[length] = 0;
 | |
| 	match = regexec(&preg->regex, subject, nmatch, pmatch, flags);
 | |
| 	subject[length] = old_char;
 | |
| 	if (match == REG_NOMATCH)
 | |
| 		return 0;
 | |
| 	return 1;
 | |
| #endif
 | |
| }
 | |
| 
 | |
| int regex_comp(const char *str, struct my_regex *regex, int cs, int cap, char **err)
 | |
| {
 | |
| #if defined(USE_PCRE) || defined(USE_PCRE_JIT)
 | |
| 	int flags = 0;
 | |
| 	const char *error;
 | |
| 	int erroffset;
 | |
| 
 | |
| 	if (!cs)
 | |
| 		flags |= PCRE_CASELESS;
 | |
| 	if (!cap)
 | |
| 		flags |= PCRE_NO_AUTO_CAPTURE;
 | |
| 
 | |
| 	regex->reg = pcre_compile(str, flags, &error, &erroffset, NULL);
 | |
| 	if (!regex->reg) {
 | |
| 		memprintf(err, "regex '%s' is invalid (error=%s, erroffset=%d)", str, error, erroffset);
 | |
| 		return 0;
 | |
| 	}
 | |
| 
 | |
| 	regex->extra = pcre_study(regex->reg, PCRE_STUDY_JIT_COMPILE, &error);
 | |
| 	if (!regex->extra && error != NULL) {
 | |
| 		pcre_free(regex->reg);
 | |
| 		memprintf(err, "failed to compile regex '%s' (error=%s)", str, error);
 | |
| 		return 0;
 | |
| 	}
 | |
| #else
 | |
| 	int flags = REG_EXTENDED;
 | |
| 
 | |
| 	if (!cs)
 | |
| 		flags |= REG_ICASE;
 | |
| 	if (!cap)
 | |
| 		flags |= REG_NOSUB;
 | |
| 
 | |
| 	if (regcomp(®ex->regex, str, flags) != 0) {
 | |
| 		memprintf(err, "regex '%s' is invalid", str);
 | |
| 		return 0;
 | |
| 	}
 | |
| #endif
 | |
| 	return 1;
 | |
| }
 | |
| 
 | |
| /*
 | |
|  * Local variables:
 | |
|  *  c-indent-level: 8
 | |
|  *  c-basic-offset: 8
 | |
|  * End:
 | |
|  */
 |