MINOR: ist: add functions to copy/uppercase/lowercase into a buffer or string

The ist functions were missing functions to copy an IST into a target buffer, making some code have to resort to memcpy(), which tends to be overkill for small strings, that the compiler cannot guess. In addition sometimes there is a need to turn a string to lower or upper case so it had to be overwritten after the operation. This patch adds 6 functions to copy an ist to a buffer, as binary or as a string (i.e. a zero is or is not appended), and optionally to apply a lower case or upper case transformation on the fly. A number of tests were performed to optimize the processing for small strings. The loops are marked unlikely to dissuade the compilers from over-optimizing them and switching to SIMD instructions. The lower case or upper case transformations used to rely on external functions for each character and to crappify the code due to clobbered registers, which is not acceptable when we know that only a certain class of chars has to be transformed, so the test was open-coded.
2025-11-17 08:51:09 +01:00 · 2018-12-07 08:35:07 +01:00 · 2018-12-07 08:35:07 +01:00 · 3f2d696d72
commit 3f2d696d72
parent ce7fad5232
1 changed files with 139 additions and 0 deletions
--- a/include/common/ist.h
+++ b/include/common/ist.h
@ -372,6 +372,145 @@ static inline ssize_t istscat(struct ist *dst, const struct ist src, size_t coun
 	return -1;
 }
 /* copies the entire <src> over <dst>, which must be allocated large enough to
 * hold the whole contents. No trailing zero is appended, this is mainly used
 * for protocol processing where the frame length has already been checked. An
 * ist made of the output and its length are returned. The destination is not
 * touched if src.len is null.
 */
 static inline struct ist ist2bin(char *dst, const struct ist src)
 {
 	size_t ofs = 0;
 	/* discourage the compiler from trying to optimize for large strings,
 	 * but tell it that most of our strings are not empty.
 	 */
 	if (__builtin_expect(ofs < src.len, 1)) {
 		do {
 			dst[ofs] = src.ptr[ofs];
 			ofs++;
 		} while (__builtin_expect(ofs < src.len, 0));
 	}
 	return ist2(dst, ofs);
 }
 /* copies the entire <src> over <dst>, which must be allocated large enough to
 * hold the whole contents as well as a trailing zero which is always appended.
 * This is mainly used for protocol conversions where the frame length has
 * already been checked. An ist made of the output and its length (not counting
 * the trailing zero) are returned.
 */
 static inline struct ist ist2str(char *dst, const struct ist src, size_t count)
 {
 	size_t ofs = 0;
 	/* discourage the compiler from trying to optimize for large strings,
 	 * but tell it that most of our strings are not empty.
 	 */
 	if (__builtin_expect(ofs < src.len, 1)) {
 		do {
 			dst[ofs] = src.ptr[ofs];
 			ofs++;
 		} while (__builtin_expect(ofs < src.len, 0));
 	}
 	dst[ofs] = 0;
 	return ist2(dst, ofs);
 }
 /* makes a lower case copy of the entire <src> into <dst>, which must have been
 * allocated large enough to hold the whole contents. No trailing zero is
 * appended, this is mainly used for protocol processing where the frame length
 * has already been checked. An ist made of the output and its length are
 * returned. The destination is not touched if src.len is null.
 */
 static inline struct ist ist2bin_lc(char *dst, const struct ist src)
 {
 	size_t ofs = 0;
 	/* discourage the compiler from trying to optimize for large strings,
 	 * but tell it that most of our strings are not empty.
 	 */
 	if (__builtin_expect(ofs < src.len, 1)) {
 		do {
 			char c = src.ptr[ofs];
 			dst[ofs] = ((unsigned char)(c - 'A') <= 'Z' - 'A') ? c + ('a' - 'A') : c;
 			ofs++;
 		} while (__builtin_expect(ofs < src.len, 0));
 	}
 	return ist2(dst, ofs);
 }
 /* makes a lower case copy of the entire <src> into <dst>, which must have been
 * allocated large enough to hold the whole contents as well as a trailing zero
 * which is always appended. This is mainly used for protocol conversions where
 * the frame length has already been checked. An ist made of the output and its
 * length (not counting the trailing zero) are returned.
 */
 static inline struct ist ist2str_lc(char *dst, const struct ist src, size_t count)
 {
 	size_t ofs = 0;
 	/* discourage the compiler from trying to optimize for large strings,
 	 * but tell it that most of our strings are not empty.
 	 */
 	if (__builtin_expect(ofs < src.len, 1)) {
 		do {
 			char c = src.ptr[ofs];
 			dst[ofs] = ((unsigned char)(c - 'A') <= 'Z' - 'A') ? c + ('a' - 'A') : c;
 			ofs++;
 		} while (__builtin_expect(ofs < src.len, 0));
 	}
 	dst[ofs] = 0;
 	return ist2(dst, ofs);
 }
 /* makes an upper case copy of the entire <src> into <dst>, which must have
 * been allocated large enough to hold the whole contents. No trailing zero is
 * appended, this is mainly used for protocol processing where the frame length
 * has already been checked. An ist made of the output and its length are
 * returned. The destination is not touched if src.len is null.
 */
 static inline struct ist ist2bin_uc(char *dst, const struct ist src)
 {
 	size_t ofs = 0;
 	/* discourage the compiler from trying to optimize for large strings,
 	 * but tell it that most of our strings are not empty.
 	 */
 	if (__builtin_expect(ofs < src.len, 1)) {
 		do {
 			char c = src.ptr[ofs];
 			dst[ofs] = ((unsigned char)(c - 'a') <= 'z' - 'a') ? c + ('A' - 'a') : c;
 			ofs++;
 		} while (__builtin_expect(ofs < src.len, 0));
 	}
 	return ist2(dst, ofs);
 }
 /* makes an upper case copy of the entire <src> into <dst>, which must have been
 * allocated large enough to hold the whole contents as well as a trailing zero
 * which is always appended. This is mainly used for protocol conversions where
 * the frame length has already been checked. An ist made of the output and its
 * length (not counting the trailing zero) are returned.
 */
 static inline struct ist ist2str_uc(char *dst, const struct ist src, size_t count)
 {
 	size_t ofs = 0;
 	/* discourage the compiler from trying to optimize for large strings,
 	 * but tell it that most of our strings are not empty.
 	 */
 	if (__builtin_expect(ofs < src.len, 1)) {
 		do {
 			char c = src.ptr[ofs];
 			dst[ofs] = ((unsigned char)(c - 'a') <= 'z' - 'a') ? c + ('A' - 'a') : c;
 			ofs++;
 		} while (__builtin_expect(ofs < src.len, 0));
 	}
 	dst[ofs] = 0;
 	return ist2(dst, ofs);
 }
 /* looks for first occurrence of character <chr> in string <ist>. Returns the
 * pointer if found, or NULL if not found.
 */