MEDIUM: log-format: make the format parser more robust and more extensible

The log-format parser reached a limit making it hard to add new features.
It also suffers from a weak handling of certain incorrect corner cases,
for example "%{foo}" is emitted as a litteral while syntactically it's an
argument to no variable. Also the argument parser had to redo some of the
job with some cases causing minor memory leaks (eg: ignored args).

This work aims at improving the situation so that slightly better reporting
is possible and that it becomes possible to extend the log format. The code
has a few more states but looks significantly simpler. The parser is now
capable of reporting ignored arguments and truncated lines.
This commit is contained in:
Willy Tarreau 2012-12-20 21:23:42 +01:00
parent a357166889
commit 8a3f52fc2e
3 changed files with 130 additions and 137 deletions

View File

@ -55,10 +55,10 @@ void sess_log(struct session *s);
int parse_logformat_var_args(char *args, struct logformat_node *node); int parse_logformat_var_args(char *args, struct logformat_node *node);
/* /*
* Parse a variable '%varname' or '%{args}varname' in logformat * Parse a variable '%varname' or '%{args}varname' in log-format
* *
*/ */
int parse_logformat_var(char *str, size_t len, struct proxy *curproxy, struct list *list_format, int *defoptions); int parse_logformat_var(char *arg, int arg_len, char *var, int var_len, struct proxy *curproxy, struct list *list_format, int *defoptions);
/* /*
* add to the logformat linked list * add to the logformat linked list

View File

@ -97,15 +97,16 @@ enum {
LOG_FMT_SSL_VERSION, LOG_FMT_SSL_VERSION,
}; };
/* enum for parse_logformat */ /* enum for parse_logformat_string */
enum { enum {
LF_TEXT = 0, LF_INIT = 0, // before first character
LF_SEPARATOR, LF_TEXT, // normal text
LF_VAR, // after % LF_SEPARATOR, // a single separator
LF_VAR, // variable name, after '%' or '%{..}'
LF_STARTVAR, // % LF_STARTVAR, // % in text
LF_STARG, // { and within { } LF_STARG, // after '%{' and berore '}'
LF_EDARG, // end arg } LF_EDARG, // '}' after '%{'
LF_END, // \0 found
}; };

246
src/log.c
View File

@ -10,6 +10,7 @@
* *
*/ */
#include <ctype.h>
#include <fcntl.h> #include <fcntl.h>
#include <stdarg.h> #include <stdarg.h>
#include <stdio.h> #include <stdio.h>
@ -221,69 +222,54 @@ int parse_logformat_var_args(char *args, struct logformat_node *node)
} }
/* /*
* Parse a variable '%varname' or '%{args}varname' in logformat * Parse a variable '%varname' or '%{args}varname' in log-format. The caller
* * must pass the args part in the <arg> pointer with its length in <arg_len>,
* and varname with its length in <var> and <var_len> respectively. <arg> is
* ignored when arg_len is 0. Neither <var> nor <var_len> may be null.
*/ */
int parse_logformat_var(char *str, size_t len, struct proxy *curproxy, struct list *list_format, int *defoptions) int parse_logformat_var(char *arg, int arg_len, char *var, int var_len, struct proxy *curproxy, struct list *list_format, int *defoptions)
{ {
int i, j; int j;
char *arg = NULL; // arguments struct logformat_node *node;
int fparam = 0;
char *name = NULL;
struct logformat_node *node = NULL;
char varname[255] = { 0 }; // variable name
for (i = 1; i < len; i++) { // escape first char % for (j = 0; logformat_keywords[j].name; j++) { // search a log type
if (!arg && str[i] == '{') { if (strlen(logformat_keywords[j].name) == var_len &&
arg = str + i; strncmp(var, logformat_keywords[j].name, var_len) == 0) {
fparam = 1; if (logformat_keywords[j].mode != PR_MODE_HTTP || curproxy->mode == PR_MODE_HTTP) {
} else if (arg && str[i] == '}') { node = calloc(1, sizeof(struct logformat_node));
char *tmp = arg; node->type = logformat_keywords[j].type;
arg = calloc(str + i - tmp, 1); // without {} node->options = *defoptions;
strncpy(arg, tmp + 1, str + i - tmp - 1); // copy without { and } if (arg_len) {
arg[str + i - tmp - 1] = '\0'; node->arg = my_strndup(arg, arg_len);
fparam = 0; parse_logformat_var_args(node->arg, node);
} else if (!name && !fparam) { }
strncpy(varname, str + i, len - i + 1); if (node->type == LOG_FMT_GLOBAL) {
varname[len - i] = '\0'; *defoptions = node->options;
for (j = 0; logformat_keywords[j].name; j++) { // search a log type free(node->arg);
if (strcmp(varname, logformat_keywords[j].name) == 0) { free(node);
if (!((logformat_keywords[j].mode == PR_MODE_HTTP) && (curproxy->mode == PR_MODE_TCP))) { } else {
node = calloc(1, sizeof(struct logformat_node)); if (logformat_keywords[j].config_callback &&
node->type = logformat_keywords[j].type; logformat_keywords[j].config_callback(node, curproxy) != 0) {
node->options = *defoptions;
node->arg = arg;
parse_logformat_var_args(node->arg, node);
if (node->type == LOG_FMT_GLOBAL) {
*defoptions = node->options;
free(node);
} else {
if (logformat_keywords[j].config_callback != NULL) {
if (logformat_keywords[j].config_callback(node, curproxy) != 0) {
return -1;
}
}
curproxy->to_log |= logformat_keywords[j].lw;
LIST_ADDQ(list_format, &node->list);
}
if (logformat_keywords[j].replace_by)
Warning("Warning: deprecated variable '%s' in log-format, please replace it with '%s'\n",
varname, logformat_keywords[j].replace_by);
return 0;
} else {
Warning("Warning: No such variable name '%s' in this log mode\n", varname);
if (arg)
free(arg);
return -1; return -1;
} }
curproxy->to_log |= logformat_keywords[j].lw;
LIST_ADDQ(list_format, &node->list);
} }
if (logformat_keywords[j].replace_by)
Warning("Warning: deprecated variable '%s' in log-format, please replace it with '%s'\n",
logformat_keywords[j].name, logformat_keywords[j].replace_by);
return 0;
} else {
Warning("Warning: log-format variable name '%s' is not suited to HTTP mode\n", logformat_keywords[j].name);
return -1;
} }
Warning("Warning: No such variable name '%s' in log-format\n", varname);
if (arg)
free(arg);
return -1;
} }
} }
j = var[var_len];
var[var_len] = 0;
Warning("Warning: no such variable name '%s' in log-format\n", var);
var[var_len] = j;
return -1; return -1;
} }
@ -329,9 +315,13 @@ void add_to_logformat_list(char *start, char *end, int type, struct list *list_f
*/ */
void parse_logformat_string(char *str, struct proxy *curproxy, struct list *list_format, int capabilities) void parse_logformat_string(char *str, struct proxy *curproxy, struct list *list_format, int capabilities)
{ {
char *sp = str; /* start pointer */ char *sp = str; /* start pointer for text parts */
int cformat = -1; /* current token format : LOG_TEXT, LOG_SEPARATOR, LOG_VARIABLE */ char *arg = NULL; /* start pointer for args */
int pformat = -1; /* previous token format */ char *var = NULL; /* start pointer for vars */
int arg_len = 0;
int var_len = 0;
int cformat; /* current token format */
int pformat; /* previous token format */
struct logformat_node *tmplf, *back; struct logformat_node *tmplf, *back;
int options = 0; int options = 0;
@ -343,87 +333,89 @@ void parse_logformat_string(char *str, struct proxy *curproxy, struct list *list
free(tmplf); free(tmplf);
} }
while (1) { for (cformat = LF_INIT; cformat != LF_END; str++) {
// push the variable only if formats are different, not
// within a variable, and not the first iteration
if ((cformat != pformat && cformat != -1 && pformat != -1) || *str == '\0') {
if (((pformat != LF_STARTVAR && cformat != LF_VAR) &&
(pformat != LF_STARTVAR && cformat != LF_STARG) &&
(pformat != LF_STARG && cformat != LF_VAR)) || *str == '\0') {
if (pformat > LF_VAR) // unfinished string
pformat = LF_TEXT;
if (pformat == LF_VAR)
parse_logformat_var(sp, str - sp, curproxy, list_format, &options);
else
add_to_logformat_list(sp, str, pformat, list_format);
sp = str;
if (*str == '\0')
break;
}
}
if (cformat != -1)
str++; // consume the string, except on the first tour
pformat = cformat; pformat = cformat;
if (*str == '\0') { if (!*str)
cformat = LF_STARTVAR; // for breaking in all cases cformat = LF_END; // preset it to save all states from doing this
continue;
}
if (pformat == LF_STARTVAR) { // after a % /* The prinicple of the two-step state machine below is to first detect a change, and
if ( (*str >= 'a' && *str <= 'z') || // parse varname * second have all common paths processed at one place. The common paths are the ones
(*str >= 'A' && *str <= 'Z') || * encountered in text areas (LF_INIT, LF_TEXT, LF_SEPARATOR) and at the end (LF_END).
(*str >= '0' && *str <= '9')) { * We use the common LF_INIT state to dispatch to the different final states.
cformat = LF_VAR; // varname */
continue; switch (pformat) {
} else if (*str == '{') { case LF_STARTVAR: // text immediately following a '%'
cformat = LF_STARG; // variable arguments arg = NULL;
continue; arg_len = var_len = 0;
} else { // another unexpected token if (*str == '{') { // optional argument
pformat = LF_TEXT; // redefine the format of the previous token to TEXT
cformat = LF_TEXT;
continue;
}
} else if (pformat == LF_VAR) { // after a varname
if ( (*str >= 'a' && *str <= 'z') || // parse varname
(*str >= 'A' && *str <= 'Z') ||
(*str >= '0' && *str <= '9')) {
cformat = LF_VAR;
continue;
}
} else if (pformat == LF_STARG) { // inside variable arguments
if (*str == '}') { // end of varname
cformat = LF_EDARG;
continue;
} else { // all tokens are acceptable within { }
cformat = LF_STARG; cformat = LF_STARG;
continue; arg = str + 1;
} }
} else if (pformat == LF_EDARG) { // after arguments else if (isalnum((int)*str)) { // variable name
if ( (*str >= 'a' && *str <= 'z') || // parse a varname
(*str >= 'A' && *str <= 'Z') ||
(*str >= '0' && *str <= '9')) {
cformat = LF_VAR; cformat = LF_VAR;
continue; var = str;
} else { // if no varname after arguments, transform in TEXT }
pformat = LF_TEXT; else if (*str == '%')
cformat = LF_TEXT; cformat = LF_TEXT; // convert this character to a litteral (useful for '%')
else
cformat = LF_INIT; // handle other cases of litterals
break;
case LF_STARG: // text immediately following '%{'
if (*str == '}') { // end of arg
cformat = LF_EDARG;
arg_len = str - arg;
*str = 0; // used for reporting errors
}
break;
case LF_EDARG: // text immediately following '%{arg}'
if (isalnum((int)*str)) { // variable name
cformat = LF_VAR;
var = str;
break;
}
Warning("Skipping isolated argument in log-format line : '%%{%s}'\n", arg);
cformat = LF_INIT;
break;
case LF_VAR: // text part of a variable name
var_len = str - var;
if (!isalnum((int)*str))
cformat = LF_INIT; // not variable name anymore
break;
default: // LF_INIT, LF_TEXT, LF_SEPARATOR, LF_END
cformat = LF_INIT;
}
if (cformat == LF_INIT) { /* resynchronize state to text/sep/startvar */
switch (*str) {
case '%': cformat = LF_STARTVAR; break;
case ' ': cformat = LF_SEPARATOR; break;
case 0 : cformat = LF_END; break;
default : cformat = LF_TEXT; break;
} }
} }
// others tokens that don't match previous conditions if (cformat != pformat || pformat == LF_SEPARATOR) {
if (*str == '%') { switch (pformat) {
cformat = LF_STARTVAR; case LF_VAR:
} else if (*str == ' ') { parse_logformat_var(arg, arg_len, var, var_len, curproxy, list_format, &options);
cformat = LF_SEPARATOR; break;
} else { case LF_TEXT:
cformat = LF_TEXT; case LF_SEPARATOR:
add_to_logformat_list(sp, str, pformat, list_format);
break;
}
sp = str; /* new start of text at every state switch and at every separator */
} }
} }
if (pformat == LF_STARTVAR || pformat == LF_STARG)
Warning("Ignoring end of truncated log-format line after '%s'\n", arg ? arg : "%");
} }
/* /*