From 13515d9fbe9abe7525f00edfc1612146d1fca2ec Mon Sep 17 00:00:00 2001 From: Willy Tarreau Date: Wed, 24 Apr 2024 15:23:28 +0200 Subject: [PATCH] MINOR: intops: add a pair of functions to check multi-byte ranges These new functions is_char4_outside() and is_char8_outside() are meant to be used to verify if any of the 4 or 8 chars represented respectively by a uint32_t or a uint64_t is outside of the min,max byte range passed in argument. This is the simplified, fast version of the function so it is restricted to less than 0x80 distance between min and max (sufficient to validate chars). Extra functions are also provided to check for min or max alone as well, with the same restriction. The use case typically is to check that the output of read_u32() or read_u64() contains exclusively certain bytes. --- include/haproxy/intops.h | 126 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 126 insertions(+) diff --git a/include/haproxy/intops.h b/include/haproxy/intops.h index 34010ccd1..589f90e55 100644 --- a/include/haproxy/intops.h +++ b/include/haproxy/intops.h @@ -96,6 +96,132 @@ static inline uint64_t rotr64(uint64_t v, uint8_t bits) return v; } +/* Returns non-zero if any of the 4 bytes composing the u32 is below the + * value or above +127. Please note that the result will be made + * of a 0x80 at positions corresponding to the offending bytes, and that as + * such the result is a u32 as well. It is designed like this so that the + * operation can be cascaded by ORing the results of multiple blocks. It is + * crucial for performance that is passed as a build-time constant so + * as to avoid an expensive multiply. A zero on output confirms that all four + * bytes are greater than or equal to and not lower than -127. + * This is essentially used to skip long sequences of text matching the rule + * when the cost of stopping on a false positive is low (i.e. parse multiple + * bytes at a time and continue one byte at a time at the end of the series). + */ +static inline __attribute__((always_inline)) +uint32_t is_char4_below_opt(uint32_t x, uint8_t min8) +{ + uint32_t min32 = min8 * 0x01010101U; + + return (x - min32) & 0x80808080U; +} + +/* Returns non-zero if any of the 4 bytes composing the u32 is above the + * value or below -127. Please note that the result will be made + * of a 0x80 at positions corresponding to the offending bytes, and that as + * such the result is a u32 as well. It is designed like this so that the + * operation can be cascaded by ORing the results of multiple blocks. It is + * crucial for performance that is passed as a build-time constant so + * as to avoid an expensive multiply. A zero on output confirms that all four + * bytes are lower than or equal to and not greater than +127. + * This is essentially used to skip long sequences of text matching the rule + * when the cost of stopping on a false positive is low (i.e. parse multiple + * bytes at a time and continue one byte at a time at the end of the series). + */ +static inline __attribute__((always_inline)) +uint32_t is_char4_above_opt(uint32_t x, uint8_t max8) +{ + uint32_t max32 = max8 * 0x01010101U; + + return (max32 - x) & 0x80808080U; +} + +/* Returns non-zero if any of the 4 bytes composing the u32 is outside of + * the range defined by to included. Please note that the result + * will be made of a 0x80 at positions corresponding to the offending bytes, + * and that as such the result is a u32 as well. It is designed like this so + * that the operation can be cascaded by ORing the results of multiple blocks. + * There is one restriction in this simplified version, the distance between + * min8 and max8 must be lower than 0x80. It is crucial for performance that + * the bounds (min8 and max8) are passed as build-time constants so as to avoid + * an expensive multiply. A zero on output confirms that all four bytes are + * included in the defined range. + */ +static inline __attribute__((always_inline)) +uint32_t is_char4_outside(uint32_t x, uint8_t min8, uint8_t max8) +{ + uint32_t min32 = min8 * 0x01010101U; + uint32_t max32 = max8 * 0x01010101U; + + return (((x - min32) | (max32 - x)) & 0x80808080U); +} + +/* Returns non-zero if any of the 8 bytes composing the u64 is below the + * value or above +127. Please note that the result will be made + * of a 0x80 at positions corresponding to the offending bytes, and that as + * such the result is a u64 as well. It is designed like this so that the + * operation can be cascaded by ORing the results of multiple blocks. It is + * crucial for performance that is passed as a build-time constant so + * as to avoid an expensive multiply. A zero on output confirms that all eight + * bytes are greater than or equal to and not lower than -127. + * This is essentially used to skip long sequences of text matching the rule + * when the cost of stopping on a false positive is low (i.e. parse multiple + * bytes at a time and continue one byte at a time at the end of the series). + */ +static inline __attribute__((always_inline)) +uint64_t is_char8_below_opt(uint64_t x, uint8_t min8) +{ + uint64_t min64 = min8 * 0x0101010101010101ULL; + + return (x - min64) & 0x8080808080808080ULL; +} + +/* Returns non-zero if any of the 8 bytes composing the u64 is above the + * value or below -127. Please note that the result will be made + * of a 0x80 at positions corresponding to the offending bytes, and that as + * such the result is a u64 as well. It is designed like this so that the + * operation can be cascaded by ORing the results of multiple blocks. It is + * crucial for performance that is passed as a build-time constant so + * as to avoid an expensive multiply. A zero on output confirms that all eight + * bytes are lower than or equal to and not greater than +127. + * This is essentially used to skip long sequences of text matching the rule + * when the cost of stopping on a false positive is low (i.e. parse multiple + * bytes at a time and continue one byte at a time at the end of the series). + */ +static inline __attribute__((always_inline)) +uint64_t is_char8_above_opt(uint64_t x, uint8_t max8) +{ + uint64_t max64 = max8 * 0x0101010101010101ULL; + + return (max64 - x) & 0x8080808080808080ULL; +} + +/* Returns non-zero if any of the 8 bytes composing the u64 is outside of + * the range defined by to included. Please note that the result + * will be made of a 0x80 at positions corresponding to some of the offending + * bytes, and that as such the result is a u64 as well. On 32-bit mcahines, the + * operation will be made of two adjacent 32-bit checks. It is designed like + * this so that the operation can be cascaded by ORing the results of multiple + * blocks. There is one restriction in this simplified version, the distance + * between min8 and max8 must be lower than 0x80. It is crucial for performance + * that the bounds (min8 and max8) are passed as build-time constants so as to + * avoid an expensive multiply. A zero on output confirms that all eight bytes + * are included in the defined range. + */ +static inline __attribute__((always_inline)) +uint64_t is_char8_outside(uint64_t x, uint8_t min8, uint8_t max8) +{ + if (sizeof(long) >= 8) { + uint64_t min64 = min8 * 0x0101010101010101ULL; + uint64_t max64 = max8 * 0x0101010101010101ULL; + + return (((x - min64) | (max64 - x)) & 0x8080808080808080ULL); + } + else + return is_char4_outside(x >> 0, min8, max8) | + is_char4_outside(x >> 32, min8, max8); +} + /* Simple popcountl implementation. It returns the number of ones in a word. * Described here : https://graphics.stanford.edu/~seander/bithacks.html */