IMPORT: slz: avoid multiple shifts on 64-bits

On 64-bit platforms, disassembling the code shows that send_huff() performs
a left shift followed by a right one, which are the result of integer
truncation and zero-extension caused solely by using different types at
different levels in the call chain. By making encode24() take a 64-bit
int on input and send_huff() take one optionally, we can remove one shift
in the hot path and gain 1% performance without affecting other platforms.

This is slz upstream commit fd165b36c4621579c5305cf3bb3a7f5410d3720b.
This commit is contained in:
Willy Tarreau 2023-04-09 10:23:18 +02:00
parent 0a91c6dcae
commit ea1b70900f

View File

@ -166,9 +166,9 @@ union ref {
* 32-bit words into output buffer. X must not contain non-zero bits above
* xbits.
*/
static inline void enqueue24(struct slz_stream *strm, uint32_t x, uint32_t xbits)
static inline void enqueue24(struct slz_stream *strm, uint64_t x, uint32_t xbits)
{
uint64_t queue = strm->queue + ((uint64_t)x << strm->qbits);
uint64_t queue = strm->queue + (x << strm->qbits);
uint32_t qbits = strm->qbits + xbits;
if (__builtin_expect(qbits >= 32, 1)) {
@ -293,7 +293,8 @@ static inline void copy_32b(struct slz_stream *strm, uint32_t x)
strm->outbuf += 4;
}
static inline void send_huff(struct slz_stream *strm, uint32_t code)
/* Using long because faster on 64-bit (can save one shift) */
static inline void send_huff(struct slz_stream *strm, unsigned long code)
{
uint32_t bits;