From 3f2d696d7261b9d88aba37ce4e45446b4083b9ae Mon Sep 17 00:00:00 2001 From: Willy Tarreau Date: Fri, 7 Dec 2018 08:35:07 +0100 Subject: [PATCH] MINOR: ist: add functions to copy/uppercase/lowercase into a buffer or string The ist functions were missing functions to copy an IST into a target buffer, making some code have to resort to memcpy(), which tends to be overkill for small strings, that the compiler cannot guess. In addition sometimes there is a need to turn a string to lower or upper case so it had to be overwritten after the operation. This patch adds 6 functions to copy an ist to a buffer, as binary or as a string (i.e. a zero is or is not appended), and optionally to apply a lower case or upper case transformation on the fly. A number of tests were performed to optimize the processing for small strings. The loops are marked unlikely to dissuade the compilers from over-optimizing them and switching to SIMD instructions. The lower case or upper case transformations used to rely on external functions for each character and to crappify the code due to clobbered registers, which is not acceptable when we know that only a certain class of chars has to be transformed, so the test was open-coded. --- include/common/ist.h | 139 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 139 insertions(+) diff --git a/include/common/ist.h b/include/common/ist.h index cfab1e32c..dd0c38903 100644 --- a/include/common/ist.h +++ b/include/common/ist.h @@ -372,6 +372,145 @@ static inline ssize_t istscat(struct ist *dst, const struct ist src, size_t coun return -1; } +/* copies the entire over , which must be allocated large enough to + * hold the whole contents. No trailing zero is appended, this is mainly used + * for protocol processing where the frame length has already been checked. An + * ist made of the output and its length are returned. The destination is not + * touched if src.len is null. + */ +static inline struct ist ist2bin(char *dst, const struct ist src) +{ + size_t ofs = 0; + + /* discourage the compiler from trying to optimize for large strings, + * but tell it that most of our strings are not empty. + */ + if (__builtin_expect(ofs < src.len, 1)) { + do { + dst[ofs] = src.ptr[ofs]; + ofs++; + } while (__builtin_expect(ofs < src.len, 0)); + } + return ist2(dst, ofs); +} + +/* copies the entire over , which must be allocated large enough to + * hold the whole contents as well as a trailing zero which is always appended. + * This is mainly used for protocol conversions where the frame length has + * already been checked. An ist made of the output and its length (not counting + * the trailing zero) are returned. + */ +static inline struct ist ist2str(char *dst, const struct ist src, size_t count) +{ + size_t ofs = 0; + + /* discourage the compiler from trying to optimize for large strings, + * but tell it that most of our strings are not empty. + */ + if (__builtin_expect(ofs < src.len, 1)) { + do { + dst[ofs] = src.ptr[ofs]; + ofs++; + } while (__builtin_expect(ofs < src.len, 0)); + } + dst[ofs] = 0; + return ist2(dst, ofs); +} + +/* makes a lower case copy of the entire into , which must have been + * allocated large enough to hold the whole contents. No trailing zero is + * appended, this is mainly used for protocol processing where the frame length + * has already been checked. An ist made of the output and its length are + * returned. The destination is not touched if src.len is null. + */ +static inline struct ist ist2bin_lc(char *dst, const struct ist src) +{ + size_t ofs = 0; + + /* discourage the compiler from trying to optimize for large strings, + * but tell it that most of our strings are not empty. + */ + if (__builtin_expect(ofs < src.len, 1)) { + do { + char c = src.ptr[ofs]; + dst[ofs] = ((unsigned char)(c - 'A') <= 'Z' - 'A') ? c + ('a' - 'A') : c; + ofs++; + } while (__builtin_expect(ofs < src.len, 0)); + } + return ist2(dst, ofs); +} + +/* makes a lower case copy of the entire into , which must have been + * allocated large enough to hold the whole contents as well as a trailing zero + * which is always appended. This is mainly used for protocol conversions where + * the frame length has already been checked. An ist made of the output and its + * length (not counting the trailing zero) are returned. + */ +static inline struct ist ist2str_lc(char *dst, const struct ist src, size_t count) +{ + size_t ofs = 0; + + /* discourage the compiler from trying to optimize for large strings, + * but tell it that most of our strings are not empty. + */ + if (__builtin_expect(ofs < src.len, 1)) { + do { + char c = src.ptr[ofs]; + dst[ofs] = ((unsigned char)(c - 'A') <= 'Z' - 'A') ? c + ('a' - 'A') : c; + ofs++; + } while (__builtin_expect(ofs < src.len, 0)); + } + dst[ofs] = 0; + return ist2(dst, ofs); +} + +/* makes an upper case copy of the entire into , which must have + * been allocated large enough to hold the whole contents. No trailing zero is + * appended, this is mainly used for protocol processing where the frame length + * has already been checked. An ist made of the output and its length are + * returned. The destination is not touched if src.len is null. + */ +static inline struct ist ist2bin_uc(char *dst, const struct ist src) +{ + size_t ofs = 0; + + /* discourage the compiler from trying to optimize for large strings, + * but tell it that most of our strings are not empty. + */ + if (__builtin_expect(ofs < src.len, 1)) { + do { + char c = src.ptr[ofs]; + dst[ofs] = ((unsigned char)(c - 'a') <= 'z' - 'a') ? c + ('A' - 'a') : c; + ofs++; + } while (__builtin_expect(ofs < src.len, 0)); + } + return ist2(dst, ofs); +} + +/* makes an upper case copy of the entire into , which must have been + * allocated large enough to hold the whole contents as well as a trailing zero + * which is always appended. This is mainly used for protocol conversions where + * the frame length has already been checked. An ist made of the output and its + * length (not counting the trailing zero) are returned. + */ +static inline struct ist ist2str_uc(char *dst, const struct ist src, size_t count) +{ + size_t ofs = 0; + + /* discourage the compiler from trying to optimize for large strings, + * but tell it that most of our strings are not empty. + */ + if (__builtin_expect(ofs < src.len, 1)) { + do { + char c = src.ptr[ofs]; + dst[ofs] = ((unsigned char)(c - 'a') <= 'z' - 'a') ? c + ('A' - 'a') : c; + ofs++; + } while (__builtin_expect(ofs < src.len, 0)); + } + dst[ofs] = 0; + return ist2(dst, ofs); +} + /* looks for first occurrence of character in string . Returns the * pointer if found, or NULL if not found. */