mirror of
https://git.haproxy.org/git/haproxy.git/
synced 2025-09-21 13:51:26 +02:00
[CONTRIB] halog: faster fgets() and add support for percentile reporting
A new fgets implementation saves about 25-50% of the time spent parsing the logs. Percentile calculation has been added for timers using -pct.
This commit is contained in:
parent
72c285345a
commit
214c203c00
@ -1,10 +1,12 @@
|
||||
INCLUDE = -I../../include
|
||||
OPTIMIZE = -O3 -mtune=pentium-m
|
||||
CC = gcc
|
||||
|
||||
OBJS = halog
|
||||
OBJS = halog halog64
|
||||
|
||||
halog: halog.c
|
||||
halog: halog.c fgets2.c
|
||||
$(CC) $(OPTIMIZE) -o $@ $(INCLUDE) ../../src/ebtree.c ../../src/eb32tree.c $^
|
||||
|
||||
halog64: halog.c fgets2-64.c
|
||||
$(CC) $(OPTIMIZE) -o $@ $(INCLUDE) ../../src/ebtree.c ../../src/eb32tree.c $^
|
||||
|
||||
clean:
|
||||
|
147
contrib/halog/fgets2-64.c
Normal file
147
contrib/halog/fgets2-64.c
Normal file
@ -0,0 +1,147 @@
|
||||
/*
|
||||
* fast fgets() replacement for log parsing
|
||||
*
|
||||
* Copyright 2000-2009 Willy Tarreau <w@1wt.eu>
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU General Public License
|
||||
* as published by the Free Software Foundation; either version
|
||||
* 2 of the License, or (at your option) any later version.
|
||||
*
|
||||
* This function manages its own buffer and returns a pointer to that buffer
|
||||
* in order to avoid expensive memory copies. It also checks for line breaks
|
||||
* 32 bits at a time. It could be improved a lot using mmap() but we would
|
||||
* not be allowed to replace trailing \n with zeroes and we would be limited
|
||||
* to small log files on 32-bit machines.
|
||||
*
|
||||
*/
|
||||
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include <stdio.h>
|
||||
#include <unistd.h>
|
||||
|
||||
// return 1 if the integer contains at least one zero byte
|
||||
static inline unsigned int has_zero(unsigned int x)
|
||||
{
|
||||
if (!(x & 0xFF000000U) ||
|
||||
!(x & 0xFF0000U) ||
|
||||
!(x & 0xFF00U) ||
|
||||
!(x & 0xFFU))
|
||||
return 1;
|
||||
return 0;
|
||||
}
|
||||
|
||||
static inline unsigned int has_zero64(unsigned long long x)
|
||||
{
|
||||
unsigned long long x2;
|
||||
|
||||
x2 = x & (x >> 8);
|
||||
/* no need to split it further */
|
||||
if ((x2 & 0x00FF) && (x2 & 0x00FF0000) && (x2 & 0x00FF00000000ULL) && (x2 & 0x00FF000000000000ULL))
|
||||
return 0; // approx 11/12 return here
|
||||
|
||||
if (!(x & 0xff00000000000000ULL) ||
|
||||
!(x & 0xff000000000000ULL) ||
|
||||
!(x & 0xff0000000000ULL) ||
|
||||
!(x & 0xff00000000ULL) ||
|
||||
!(x & 0xff000000UL) ||
|
||||
!(x & 0xff0000UL) ||
|
||||
!(x & 0xff00UL) ||
|
||||
!(x & 0xffUL))
|
||||
return 1; // approx 1/3 of the remaining return here
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
#define FGETS2_BUFSIZE (256*1024)
|
||||
const char *fgets2(FILE *stream)
|
||||
{
|
||||
static char buffer[FGETS2_BUFSIZE + 5];
|
||||
static char *end = buffer;
|
||||
static char *line = buffer;
|
||||
|
||||
char *next;
|
||||
int ret;
|
||||
|
||||
next = line;
|
||||
|
||||
while (1) {
|
||||
/* this is a speed-up, we read 32 bits at once and check for an
|
||||
* LF character there. We stop if found then continue one at a
|
||||
* time.
|
||||
*/
|
||||
while (next < end && (((unsigned long)next) & 7) && *next != '\n')
|
||||
next++;
|
||||
|
||||
/* now next is multiple of 4 or equal to end */
|
||||
while (next <= (end-32)) {
|
||||
if (has_zero64(*(unsigned long long *)next ^ 0x0A0A0A0A0A0A0A0AULL))
|
||||
break;
|
||||
next += 8;
|
||||
if (has_zero64(*(unsigned long long *)next ^ 0x0A0A0A0A0A0A0A0AULL))
|
||||
break;
|
||||
next += 8;
|
||||
if (has_zero64(*(unsigned long long *)next ^ 0x0A0A0A0A0A0A0A0AULL))
|
||||
break;
|
||||
next += 8;
|
||||
if (has_zero64(*(unsigned long long *)next ^ 0x0A0A0A0A0A0A0A0AULL))
|
||||
break;
|
||||
next += 8;
|
||||
}
|
||||
|
||||
/* we finish if needed. Note that next might be slightly higher
|
||||
* than end here because we might have gone past it above.
|
||||
*/
|
||||
while (next < end) {
|
||||
if (*next == '\n') {
|
||||
const char *start = line;
|
||||
|
||||
*next = '\0';
|
||||
line = next + 1;
|
||||
return start;
|
||||
}
|
||||
next++;
|
||||
}
|
||||
|
||||
/* we found an incomplete line. First, let's move the
|
||||
* remaining part of the buffer to the beginning, then
|
||||
* try to complete the buffer with a new read.
|
||||
*/
|
||||
if (line > buffer) {
|
||||
if (end != line)
|
||||
memmove(buffer, line, end - line);
|
||||
end = buffer + (end - line);
|
||||
next = end;
|
||||
line = buffer;
|
||||
} else {
|
||||
if (end == buffer + FGETS2_BUFSIZE)
|
||||
return NULL;
|
||||
}
|
||||
|
||||
ret = read(fileno(stream), end, buffer + FGETS2_BUFSIZE - end);
|
||||
|
||||
if (ret <= 0) {
|
||||
if (end == line)
|
||||
return NULL;
|
||||
|
||||
*end = '\0';
|
||||
return line;
|
||||
}
|
||||
|
||||
end += ret;
|
||||
/* search for '\n' again */
|
||||
}
|
||||
}
|
||||
|
||||
#ifdef BENCHMARK
|
||||
int main() {
|
||||
const char *p;
|
||||
unsigned int lines = 0;
|
||||
|
||||
while ((p=fgets2(stdin)))
|
||||
lines++;
|
||||
printf("lines=%d\n", lines);
|
||||
return 0;
|
||||
}
|
||||
#endif
|
138
contrib/halog/fgets2.c
Normal file
138
contrib/halog/fgets2.c
Normal file
@ -0,0 +1,138 @@
|
||||
/*
|
||||
* fast fgets() replacement for log parsing
|
||||
*
|
||||
* Copyright 2000-2009 Willy Tarreau <w@1wt.eu>
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU General Public License
|
||||
* as published by the Free Software Foundation; either version
|
||||
* 2 of the License, or (at your option) any later version.
|
||||
*
|
||||
* This function manages its own buffer and returns a pointer to that buffer
|
||||
* in order to avoid expensive memory copies. It also checks for line breaks
|
||||
* 32 bits at a time. It could be improved a lot using mmap() but we would
|
||||
* not be allowed to replace trailing \n with zeroes and we would be limited
|
||||
* to small log files on 32-bit machines.
|
||||
*
|
||||
*/
|
||||
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include <stdio.h>
|
||||
#include <unistd.h>
|
||||
|
||||
// return 1 if the integer contains at least one zero byte
|
||||
static inline unsigned int has_zero(unsigned int x)
|
||||
{
|
||||
if (!(x & 0xFF000000U) ||
|
||||
!(x & 0xFF0000U) ||
|
||||
!(x & 0xFF00U) ||
|
||||
!(x & 0xFFU))
|
||||
return 1;
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
#define FGETS2_BUFSIZE (256*1024)
|
||||
const char *fgets2(FILE *stream)
|
||||
{
|
||||
static char buffer[FGETS2_BUFSIZE + 5];
|
||||
static char *end = buffer;
|
||||
static char *line = buffer;
|
||||
|
||||
char *next;
|
||||
int ret;
|
||||
|
||||
next = line;
|
||||
|
||||
while (1) {
|
||||
/* this is a speed-up, we read 32 bits at once and check for an
|
||||
* LF character there. We stop if found then continue one at a
|
||||
* time.
|
||||
*/
|
||||
while (next < end && (((unsigned long)next) & 3) && *next != '\n')
|
||||
next++;
|
||||
|
||||
/* now next is multiple of 4 or equal to end */
|
||||
while (next <= (end-32)) {
|
||||
if (has_zero(*(unsigned int *)next ^ 0x0A0A0A0A))
|
||||
break;
|
||||
next += 4;
|
||||
if (has_zero(*(unsigned int *)next ^ 0x0A0A0A0A))
|
||||
break;
|
||||
next += 4;
|
||||
if (has_zero(*(unsigned int *)next ^ 0x0A0A0A0A))
|
||||
break;
|
||||
next += 4;
|
||||
if (has_zero(*(unsigned int *)next ^ 0x0A0A0A0A))
|
||||
break;
|
||||
next += 4;
|
||||
if (has_zero(*(unsigned int *)next ^ 0x0A0A0A0A))
|
||||
break;
|
||||
next += 4;
|
||||
if (has_zero(*(unsigned int *)next ^ 0x0A0A0A0A))
|
||||
break;
|
||||
next += 4;
|
||||
if (has_zero(*(unsigned int *)next ^ 0x0A0A0A0A))
|
||||
break;
|
||||
next += 4;
|
||||
if (has_zero(*(unsigned int *)next ^ 0x0A0A0A0A))
|
||||
break;
|
||||
next += 4;
|
||||
}
|
||||
|
||||
/* we finish if needed. Note that next might be slightly higher
|
||||
* than end here because we might have gone past it above.
|
||||
*/
|
||||
while (next < end) {
|
||||
if (*next == '\n') {
|
||||
const char *start = line;
|
||||
|
||||
*next = '\0';
|
||||
line = next + 1;
|
||||
return start;
|
||||
}
|
||||
next++;
|
||||
}
|
||||
|
||||
/* we found an incomplete line. First, let's move the
|
||||
* remaining part of the buffer to the beginning, then
|
||||
* try to complete the buffer with a new read.
|
||||
*/
|
||||
if (line > buffer) {
|
||||
if (end != line)
|
||||
memmove(buffer, line, end - line);
|
||||
end = buffer + (end - line);
|
||||
next = end;
|
||||
line = buffer;
|
||||
} else {
|
||||
if (end == buffer + FGETS2_BUFSIZE)
|
||||
return NULL;
|
||||
}
|
||||
|
||||
ret = read(fileno(stream), end, buffer + FGETS2_BUFSIZE - end);
|
||||
|
||||
if (ret <= 0) {
|
||||
if (end == line)
|
||||
return NULL;
|
||||
|
||||
*end = '\0';
|
||||
return line;
|
||||
}
|
||||
|
||||
end += ret;
|
||||
/* search for '\n' again */
|
||||
}
|
||||
}
|
||||
|
||||
#ifdef BENCHMARK
|
||||
int main() {
|
||||
const char *p;
|
||||
unsigned int lines = 0;
|
||||
|
||||
while ((p=fgets2(stdin)))
|
||||
lines++;
|
||||
printf("lines=%d\n", lines);
|
||||
return 0;
|
||||
}
|
||||
#endif
|
@ -11,7 +11,7 @@
|
||||
*/
|
||||
|
||||
/*
|
||||
* gcc -O2 -o halog halog.c -Iinclude src/ebtree.c src/eb32tree.c
|
||||
* gcc -O2 -o halog2 halog2.c -Iinclude src/ebtree.c src/eb32tree.c fgets2.c
|
||||
*
|
||||
* Usage:
|
||||
* $0 [ min_delay [ min_count [ field_shift ]]] < haproxy.log
|
||||
@ -57,17 +57,19 @@ struct timer {
|
||||
#define FILT_ACC_DELAY 0x10
|
||||
#define FILT_ACC_COUNT 0x20
|
||||
#define FILT_GRAPH_TIMERS 0x40
|
||||
#define FILT_PERCENTILE 0x80
|
||||
|
||||
unsigned int filter = 0;
|
||||
unsigned int filter_invert = 0;
|
||||
char line[MAXLINE];
|
||||
const char *line;
|
||||
|
||||
const char *fgets2(FILE *stream);
|
||||
|
||||
void die(const char *msg)
|
||||
{
|
||||
fprintf(stderr,
|
||||
"%s"
|
||||
"Usage: halog [-c] [-v] [-gt] [-s <skip>] [-e] [-ad <delay>] [-ac <count>] < file.log\n"
|
||||
"Usage: halog [-c] [-v] [-gt] [-pct] [-s <skip>] [-e] [-ad <delay>] [-ac <count>] < file.log\n"
|
||||
"\n",
|
||||
msg ? msg : ""
|
||||
);
|
||||
@ -385,6 +387,8 @@ int main(int argc, char **argv)
|
||||
filter_invert = !filter_invert;
|
||||
else if (strcmp(argv[0], "-gt") == 0)
|
||||
filter |= FILT_GRAPH_TIMERS;
|
||||
else if (strcmp(argv[0], "-pct") == 0)
|
||||
filter |= FILT_PERCENTILE;
|
||||
else if (strcmp(argv[0], "-o") == 0) {
|
||||
if (output_file)
|
||||
die("Fatal: output file name already specified.\n");
|
||||
@ -409,7 +413,7 @@ int main(int argc, char **argv)
|
||||
tot = 0;
|
||||
parse_err = 0;
|
||||
|
||||
while (fgets(line, MAXLINE, stdin) != NULL) {
|
||||
while ((line = fgets2(stdin)) != NULL) {
|
||||
linenum++;
|
||||
|
||||
if (filter & FILT_ERRORS_ONLY) {
|
||||
@ -429,7 +433,7 @@ int main(int argc, char **argv)
|
||||
if (test) {
|
||||
tot++;
|
||||
if (!(filter & FILT_COUNT_ONLY))
|
||||
printf("%s", line);
|
||||
puts(line);
|
||||
}
|
||||
continue;
|
||||
}
|
||||
@ -441,6 +445,7 @@ int main(int argc, char **argv)
|
||||
continue;
|
||||
}
|
||||
|
||||
tot++;
|
||||
val = convert_date(b);
|
||||
//printf("date=%s => %d\n", b, val);
|
||||
if (val < 0) {
|
||||
@ -453,7 +458,7 @@ int main(int argc, char **argv)
|
||||
continue;
|
||||
}
|
||||
|
||||
if (filter & FILT_GRAPH_TIMERS) {
|
||||
if (filter & (FILT_GRAPH_TIMERS|FILT_PERCENTILE)) {
|
||||
int f;
|
||||
|
||||
b = field_start(line, TIME_FIELD + skip_fields);
|
||||
@ -489,6 +494,8 @@ int main(int argc, char **argv)
|
||||
* that the total time is negative, hence the reason to reset
|
||||
* it.
|
||||
*/
|
||||
|
||||
if (filter & FILT_GRAPH_TIMERS) {
|
||||
if (err) {
|
||||
if (array[4] < 0)
|
||||
array[4] = -1;
|
||||
@ -506,6 +513,28 @@ int main(int argc, char **argv)
|
||||
fprintf(stderr, "ERR: %s (%d %d %d %d %d => %d)\n",
|
||||
line, array[0], array[1], array[2], array[3], array[4], v);
|
||||
t2 = insert_timer(&timers[4], &t, v); t2->count++;
|
||||
tot++;
|
||||
}
|
||||
} else { /* percentile */
|
||||
if (err) {
|
||||
if (array[4] < 0)
|
||||
array[4] = -1;
|
||||
t2 = insert_value(&timers[0], &t, array[4]); // total time
|
||||
t2->count++;
|
||||
} else {
|
||||
int v;
|
||||
|
||||
t2 = insert_value(&timers[1], &t, array[0]); t2->count++; // req
|
||||
t2 = insert_value(&timers[2], &t, array[2]); t2->count++; // conn
|
||||
t2 = insert_value(&timers[3], &t, array[3]); t2->count++; // resp
|
||||
|
||||
v = array[4] - array[0] - array[1] - array[2] - array[3]; // data time
|
||||
if (v < 0 && !(filter & FILT_QUIET))
|
||||
fprintf(stderr, "ERR: %s (%d %d %d %d %d => %d)\n",
|
||||
line, array[0], array[1], array[2], array[3], array[4], v);
|
||||
t2 = insert_value(&timers[4], &t, v); t2->count++;
|
||||
tot++;
|
||||
}
|
||||
}
|
||||
continue;
|
||||
}
|
||||
@ -584,6 +613,47 @@ int main(int argc, char **argv)
|
||||
}
|
||||
}
|
||||
}
|
||||
else if (filter & FILT_PERCENTILE) {
|
||||
/* report timers by percentile :
|
||||
* <percent> <total> <max_req_time> <max_conn_time> <max_resp_time> <max_data_time>
|
||||
* We don't count errs.
|
||||
*/
|
||||
struct eb32_node *n[5];
|
||||
unsigned long cum[5];
|
||||
double step;
|
||||
|
||||
for (f = 1; f < 5; f++) {
|
||||
n[f] = eb32_first(&timers[f]);
|
||||
cum[f] = container_of(n[f], struct timer, node)->count;
|
||||
}
|
||||
|
||||
for (step = 1; step <= 1000;) {
|
||||
unsigned int thres = tot * (step / 1000.0);
|
||||
|
||||
printf("%3.1f %d ", step/10.0, thres);
|
||||
for (f = 1; f < 5; f++) {
|
||||
struct eb32_node *next;
|
||||
while (cum[f] < thres) {
|
||||
/* need to find other keys */
|
||||
next = eb32_next(n[f]);
|
||||
if (!next)
|
||||
break;
|
||||
n[f] = next;
|
||||
cum[f] += container_of(next, struct timer, node)->count;
|
||||
}
|
||||
|
||||
/* value still within $step % of total */
|
||||
printf("%d ", n[f]->key);
|
||||
}
|
||||
putchar('\n');
|
||||
if (step >= 100 && step < 900)
|
||||
step += 50; // jump 5% by 5% between those steps.
|
||||
else if (step >= 20 && step < 980)
|
||||
step += 10;
|
||||
else
|
||||
step += 1;
|
||||
}
|
||||
}
|
||||
|
||||
if (!(filter & FILT_QUIET))
|
||||
fprintf(stderr, "%d lines in, %d lines out, %d parsing errors\n",
|
||||
|
Loading…
x
Reference in New Issue
Block a user