From e9f4d67b15cbd98c6d67cc7cad285ea4e7761711 Mon Sep 17 00:00:00 2001 From: Willy Tarreau Date: Mon, 8 Nov 2021 10:02:52 +0100 Subject: [PATCH] OPTIM: halog: skip fields 64 bits at a time when supported Some architectures like x86_64 and aarch64 support efficient unaligned 64-bit reads. On such architectures, we already know that each string passed to field_start() has some margin at the end because it's parsed using fgets2() which looks for the trailing LF using the same method. Thus let's skip spaces by packs of 8. This increases the parsing speed by 35%. --- admin/halog/halog.c | 32 ++++++++++++++++++++++++++++++++ 1 file changed, 32 insertions(+) diff --git a/admin/halog/halog.c b/admin/halog/halog.c index 5c0218b81..8012c48b5 100644 --- a/admin/halog/halog.c +++ b/admin/halog/halog.c @@ -20,6 +20,8 @@ #include #include +#include + #include #include #include @@ -253,6 +255,16 @@ const char *field_stop(const char *p) } #endif +/* return non-zero if the argument contains at least one zero byte. See principle above. */ +static inline __attribute__((unused)) unsigned long long has_zero64(unsigned long long x) +{ + unsigned long long y; + + y = x - 0x0101010101010101ULL; /* generate a carry */ + y &= ~x; /* clear the bits that were already set */ + return y & 0x8080808080808080ULL; +} + /* return field (starting from 1) in string

. Only consider * contiguous spaces (or tabs) as one delimiter. May return pointer to * last char if field is not found. Equivalent to awk '{print $field}'. @@ -280,6 +292,26 @@ const char *field_start(const char *p, int field) /* skip this field */ while (1) { +#if defined(HA_UNALIGNED_LE64) + unsigned long long l = *(unsigned long long *)p; + if (!has_zero64(l)) { + l ^= 0x2020202020202020; + l = has_zero64(l); + if (!l) { + p += 8; + continue; + } + /* there is at least one space, find it and + * skip it now. The lowest byte in with + * a 0x80 is the right one, but checking for + * it remains slower than testing each byte, + * probably due to the numerous short fields. + */ + while (*(p++) != ' ') + ; + break; + } +#endif c = *(p++); if (c == '\0') return p - 1;