From 6653d17b8d6f9fdf347e8e28b37e7eee3af28700 Mon Sep 17 00:00:00 2001 From: Willy Tarreau Date: Sun, 13 May 2007 01:52:05 +0200 Subject: [PATCH 1/2] [BUG] fix ev_sepoll again, this time with a new state machine It was possible in ev_sepoll() to ignore certain events if all speculative events had been processed at once, because the epoll_wait() timeout was not cleared, thus delaying the events delivery. The state machine was complicated, it has been rewritten. It seems faster and more correct right now. --- src/ev_sepoll.c | 212 +++++++++++++++++++++++++----------------------- 1 file changed, 110 insertions(+), 102 deletions(-) diff --git a/src/ev_sepoll.c b/src/ev_sepoll.c index c3b777647..de92c5d95 100644 --- a/src/ev_sepoll.c +++ b/src/ev_sepoll.c @@ -105,6 +105,10 @@ static _syscall4 (int, epoll_wait, int, epfd, struct epoll_event *, events, int, #define FD_EV_MASK (FD_EV_MASK_W | FD_EV_MASK_R) +/* This is the minimum number of events successfully processed in speculative + * mode above which we agree to return without checking epoll() (1/2 times). + */ +#define MIN_RETURN_EVENTS 25 /* descriptor of one FD. * FIXME: should be a bit field */ @@ -215,6 +219,7 @@ REGPRM2 static int __fd_clr(const int fd, int dir) return 0; } +/* normally unused */ REGPRM1 static void __fd_rem(int fd) { __fd_clr(fd, DIR_RD); @@ -232,148 +237,151 @@ REGPRM1 static void __fd_clo(int fd) fd_list[fd].e &= ~(FD_EV_MASK); } -/* - * operations to perform when reaching _do_poll() for some FDs in the spec - * queue depending on their state. This is mainly used to cleanup some FDs - * which are in STOP state. It is also used to compute EPOLL* flags when - * switching from SPEC to WAIT. - */ -static struct ev_to_epoll { - char op; // epoll opcode to switch from spec to wait, 0 if none - char m; // inverted mask for existing events - char ev; // remaining epoll events after change - char pad; -} ev_to_epoll[16] = { - [FD_EV_IDLE_W | FD_EV_STOP_R] = { .op=EPOLL_CTL_DEL, .m=FD_EV_MASK_R }, - [FD_EV_SPEC_W | FD_EV_STOP_R] = { .op=EPOLL_CTL_DEL, .m=FD_EV_MASK_R }, - [FD_EV_STOP_W | FD_EV_IDLE_R] = { .op=EPOLL_CTL_DEL, .m=FD_EV_MASK_W }, - [FD_EV_STOP_W | FD_EV_SPEC_R] = { .op=EPOLL_CTL_DEL, .m=FD_EV_MASK_W }, - [FD_EV_WAIT_W | FD_EV_STOP_R] = { .op=EPOLL_CTL_MOD, .m=FD_EV_MASK_R, .ev=EPOLLOUT }, - [FD_EV_STOP_W | FD_EV_WAIT_R] = { .op=EPOLL_CTL_MOD, .m=FD_EV_MASK_W, .ev=EPOLLIN }, - [FD_EV_STOP_W | FD_EV_STOP_R] = { .op=EPOLL_CTL_DEL, .m=FD_EV_MASK_R|FD_EV_MASK_W }, - [FD_EV_SPEC_W | FD_EV_WAIT_R] = { .ev=EPOLLIN }, - [FD_EV_WAIT_W | FD_EV_SPEC_R] = { .ev=EPOLLOUT }, - [FD_EV_WAIT_W | FD_EV_WAIT_R] = { .ev=EPOLLIN|EPOLLOUT }, -}; - /* * speculative epoll() poller */ REGPRM2 static void _do_poll(struct poller *p, int wait_time) { static unsigned int last_skipped; - int status; + int status, eo; int fd, opcode; int count; int spec_idx; /* Here we have two options : - * - either walk the list forwards and hope to atch more events + * - either walk the list forwards and hope to match more events * - or walk it backwards to minimize the number of changes and * to make better use of the cache. * Tests have shown that walking backwards improves perf by 0.2%. */ + status = 0; spec_idx = nbspec; while (likely(spec_idx > 0)) { spec_idx--; fd = spec_list[spec_idx]; + eo = fd_list[fd].e; /* save old events */ + + /* + * Process the speculative events. + * + * Principle: events which are marked FD_EV_SPEC are processed + * with their assigned function. If the function returns 0, it + * means there is nothing doable without polling first. We will + * then convert the event to a pollable one by assigning them + * the WAIT status. + */ + + fdtab[fd].ev = 0; + if ((eo & FD_EV_MASK_R) == FD_EV_SPEC_R) { + /* The owner is interested in reading from this FD */ + if (fdtab[fd].state != FD_STCLOSE && fdtab[fd].state != FD_STERROR) { + /* Pretend there is something to read */ + fdtab[fd].ev |= FD_POLL_IN; + if (!fdtab[fd].cb[DIR_RD].f(fd)) + fd_list[fd].e ^= (FD_EV_WAIT_R ^ FD_EV_SPEC_R); + else + status++; + } + } + else if ((eo & FD_EV_MASK_R) == FD_EV_STOP_R) { + /* This FD was being polled and is now being removed. */ + fd_list[fd].e &= ~FD_EV_MASK_R; + } + + if ((eo & FD_EV_MASK_W) == FD_EV_SPEC_W) { + /* The owner is interested in writing to this FD */ + if (fdtab[fd].state != FD_STCLOSE && fdtab[fd].state != FD_STERROR) { + /* Pretend there is something to write */ + fdtab[fd].ev |= FD_POLL_OUT; + if (!fdtab[fd].cb[DIR_WR].f(fd)) + fd_list[fd].e ^= (FD_EV_WAIT_W ^ FD_EV_SPEC_W); + else + status++; + } + } + else if ((eo & FD_EV_MASK_W) == FD_EV_STOP_W) { + /* This FD was being polled and is now being removed. */ + fd_list[fd].e &= ~FD_EV_MASK_W; + } + + /* Now, we will adjust the event in the poll list. Indeed, it + * is possible that an event which was previously in the poll + * list now goes out, and the opposite is possible too. We can + * have opposite changes for READ and WRITE too. + */ + + if ((eo ^ fd_list[fd].e) & FD_EV_RW_PL) { + /* poll status changed*/ + if ((fd_list[fd].e & FD_EV_RW_PL) == 0) { + /* fd removed from poll list */ + opcode = EPOLL_CTL_DEL; + } + else if ((eo & FD_EV_RW_PL) == 0) { + /* new fd in the poll list */ + opcode = EPOLL_CTL_ADD; + } + else { + /* fd status changed */ + opcode = EPOLL_CTL_MOD; + } + + /* construct the epoll events based on new state */ + ev.events = 0; + if (fd_list[fd].e & FD_EV_WAIT_R) + ev.events |= EPOLLIN; + + if (fd_list[fd].e & FD_EV_WAIT_W) + ev.events |= EPOLLOUT; - opcode = ev_to_epoll[fd_list[fd].e].op; - if (opcode) { ev.data.fd = fd; - ev.events = ev_to_epoll[fd_list[fd].e].ev; - fd_list[fd].e &= ~(unsigned int)ev_to_epoll[fd_list[fd].e].m; epoll_ctl(epoll_fd, opcode, fd, &ev); } + if (!(fd_list[fd].e & FD_EV_RW_SL)) { - // This one must be removed. Let's clear it now. + /* This fd switched to combinations of either WAIT or + * IDLE. It must be removed from the spec list. + */ delete_spec_entry(spec_idx); continue; } - - /* OK so now we do not have any event marked STOP anymore in - * the list. We can simply try to execute functions for the - * events we have found, and requeue them in case of EAGAIN. - */ - - status = 0; - fdtab[fd].ev = 0; - - if ((fd_list[fd].e & FD_EV_MASK_R) == FD_EV_SPEC_R) { - if (fdtab[fd].state != FD_STCLOSE && fdtab[fd].state != FD_STERROR) { - fdtab[fd].ev |= FD_POLL_IN; - if (fdtab[fd].cb[DIR_RD].f(fd) == 0) - status |= EPOLLIN; - } - } - - if ((fd_list[fd].e & FD_EV_MASK_W) == FD_EV_SPEC_W) { - if (fdtab[fd].state != FD_STCLOSE && fdtab[fd].state != FD_STERROR) { - fdtab[fd].ev |= FD_POLL_OUT; - if (fdtab[fd].cb[DIR_WR].f(fd) == 0) - status |= EPOLLOUT; - } - } - - if (status) { - /* Some speculative accesses have failed, we must - * switch to the WAIT state. - */ - ev.events = status; - ev.data.fd = fd; - if (fd_list[fd].e & FD_EV_RW_PL) { - // Event already in poll list - ev.events |= ev_to_epoll[fd_list[fd].e].ev; - opcode = EPOLL_CTL_MOD; - } else { - // Event not in poll list yet - opcode = EPOLL_CTL_ADD; - } - epoll_ctl(epoll_fd, opcode, fd, &ev); - - if (status & EPOLLIN) { - fd_list[fd].e &= ~FD_EV_MASK_R; - fd_list[fd].e |= FD_EV_WAIT_R; - } - if (status & EPOLLOUT) { - fd_list[fd].e &= ~FD_EV_MASK_W; - fd_list[fd].e |= FD_EV_WAIT_W; - } - - if ((fd_list[fd].e & FD_EV_MASK_R) != FD_EV_SPEC_R && - (fd_list[fd].e & FD_EV_MASK_W) != FD_EV_SPEC_W) { - delete_spec_entry(spec_idx); - continue; - } - } } - /* If some speculative events remain, we must not set the timeout in - * epoll_wait(). Also, if some speculative events remain, it means - * that some have been immediately processed, otherwise they would - * have been disabled. + /* It may make sense to immediately return here if there are enough + * processed events, without passing through epoll_wait() because we + * have exactly done a poll. + * Measures have shown a great performance increase if we call the + * epoll_wait() only the second time after speculative accesses have + * succeeded. This reduces the number of unsucessful calls to + * epoll_wait() by a factor of about 3, and the total number of calls + * by about 2. */ - if (nbspec) { - if (!last_skipped++) { - /* Measures have shown a great performance increase if - * we call the epoll_wait() only the second time after - * speculative accesses have succeeded. This reduces - * the number of unsucessful calls to epoll_wait() by - * a factor of about 3, and the total number of calls - * by about 2. - */ + + if (status >= MIN_RETURN_EVENTS) { + /* We have processed at least MIN_RETURN_EVENTS, it's worth + * returning now without checking epoll_wait(). + */ + if (++last_skipped <= 1) { tv_now(&now); return; } - wait_time = 0; } last_skipped = 0; - /* now let's wait for events */ + if (nbspec || status) { + /* Maybe we have processed some events that we must report, or + * maybe we still have events in the spec list, so we must not + * wait in epoll() otherwise we will delay their delivery by + * the next timeout. + */ + wait_time = 0; + } + + /* now let's wait for real events */ status = epoll_wait(epoll_fd, epoll_events, maxfd, wait_time); + tv_now(&now); for (count = 0; count < status; count++) { From ae5f7dadefa09881a95755540cb4f62756c598b9 Mon Sep 17 00:00:00 2001 From: Willy Tarreau Date: Sun, 13 May 2007 11:40:04 +0200 Subject: [PATCH 2/2] [TESTS] updates to hash experimentations Aleksandar Lazic has collected many hashing algorithms and put them in one file to ease benchmarking. Some algos look promising, some of them have been checked further with uri_hash. Some results on various systems/hardware are stored in hash_results.txt. --- tests/hash_results.txt | 218 +++++++++++++++++ tests/test_hashes.c | 545 +++++++++++++++++++++++++++++++++++++++++ tests/uri_hash.c | 165 ++++++++++++- 3 files changed, 920 insertions(+), 8 deletions(-) create mode 100644 tests/hash_results.txt create mode 100644 tests/test_hashes.c diff --git a/tests/hash_results.txt b/tests/hash_results.txt new file mode 100644 index 000000000..0f14ec9ce --- /dev/null +++ b/tests/hash_results.txt @@ -0,0 +1,218 @@ +Test: ./test_hashes | sort -k 3 -r + +Note: haproxy_server_hash should be avoided as it's just a 32 bit XOR. + +Athlon @ 1533 MHz, gcc-3.4 -march=i686 : + haproxy_server_hash : 18477000 run/sec + SuperFastHash : 6983511 run/sec + hash_djbx33 : 4164334 run/sec + bernstein : 3371838 run/sec + kr_hash : 3257684 run/sec + sax_hash : 3027567 run/sec + fnv_hash : 2818374 run/sec + haproxy_uri_hash : 2108346 run/sec + oat_hash : 2106181 run/sec + hashword : 1936973 run/sec + hashpjw : 1803475 run/sec + fnv_32a_str : 1499198 run/sec + +Pentium-M @1700 MHz, gcc-3.4 -march=i686 : + haproxy_server_hash : 15471737 run/sec + SuperFastHash : 8155706 run/sec + hash_djbx33 : 4520191 run/sec + bernstein : 3956142 run/sec + kr_hash : 3725125 run/sec + fnv_hash : 3155413 run/sec + sax_hash : 2688323 run/sec + oat_hash : 2452789 run/sec + haproxy_uri_hash : 2010853 run/sec + hashword : 1831441 run/sec + hashpjw : 1737000 run/sec + fnv_32a_str : 1643737 run/sec + +Athlon @ 1533 MHz, gcc-4.1 -march=i686 : + haproxy_server_hash : 13592089 run/sec + SuperFastHash2 : 8687957 run/sec + SuperFastHash : 7361242 run/sec + hash_djbx33 : 5741546 run/sec + bernstein : 3368909 run/sec + sax_hash : 3339880 run/sec + kr_hash : 3277230 run/sec + fnv_hash : 2832402 run/sec + hashword : 2500317 run/sec + haproxy_uri_hash : 2433241 run/sec + oat_hash : 2403118 run/sec + hashpjw : 1881229 run/sec + fnv_32a_str : 1815709 run/sec + +Pentium-M @1700 MHz, gcc-4.1 -march=i686 : + haproxy_server_hash : 14128788 run/sec + SuperFastHash2 : 8157119 run/sec + SuperFastHash : 7481027 run/sec + hash_djbx33 : 5660711 run/sec + bernstein : 3961493 run/sec + fnv_hash : 3590727 run/sec + kr_hash : 3389393 run/sec + sax_hash : 2667227 run/sec + oat_hash : 2348211 run/sec + hashword : 2278856 run/sec + haproxy_uri_hash : 2098022 run/sec + hashpjw : 1846583 run/sec + fnv_32a_str : 1661219 run/sec + +Pentium-M @600 MHz, gcc-4.1 -march=i686 : + haproxy_server_hash : 5318468 run/sec + SuperFastHash2 : 3126165 run/sec + SuperFastHash : 2729981 run/sec + hash_djbx33 : 2042181 run/sec + bernstein : 1422927 run/sec + fnv_hash : 1287736 run/sec + kr_hash : 1217924 run/sec + sax_hash : 949694 run/sec + oat_hash : 837279 run/sec + hashword : 812868 run/sec + haproxy_uri_hash : 747611 run/sec + hashpjw : 659890 run/sec + fnv_32a_str : 590895 run/sec + +athlon @ 1.5 GHz, gcc-2.95 -march=i686 : + haproxy_server_hash : 13592864 run/sec + SuperFastHash : 6931251 run/sec + bernstein : 4105179 run/sec + hash_djbx33 : 3920059 run/sec + kr_hash : 2985794 run/sec + fnv_hash : 2815457 run/sec + sax_hash : 2791358 run/sec + haproxy_uri_hash : 2786663 run/sec + oat_hash : 2237859 run/sec + hashword : 1985740 run/sec + hashpjw : 1757733 run/sec + fnv_32a_str : 1697299 run/sec + +Pentium-M @ 600 MHz, gcc-2.95 -march=i686 : + SuperFastHash : 2934387 run/sec + haproxy_server_hash : 2864668 run/sec + hash_djbx33 : 1498043 run/sec + bernstein : 1414993 run/sec + kr_hash : 1297907 run/sec + fnv_hash : 1260343 run/sec + sax_hash : 924764 run/sec + oat_hash : 854545 run/sec + haproxy_uri_hash : 790040 run/sec + hashword : 693501 run/sec + hashpjw : 647346 run/sec + fnv_32a_str : 579691 run/sec + +Pentium-M @ 1700 MHz, gcc-2.95 -march=i686 : + SuperFastHash : 8006127 run/sec + haproxy_server_hash : 7834162 run/sec + hash_djbx33 : 4186025 run/sec + bernstein : 3941492 run/sec + kr_hash : 3630713 run/sec + fnv_hash : 3507488 run/sec + sax_hash : 2528128 run/sec + oat_hash : 2395188 run/sec + haproxy_uri_hash : 2158924 run/sec + hashword : 1910992 run/sec + hashpjw : 1819894 run/sec + fnv_32a_str : 1629844 run/sec + +UltraSparc @ 400 MHz, gcc-3.4.3 : + haproxy_server_hash : 5573220 run/sec + SuperFastHash : 1372714 run/sec + bernstein : 1361733 run/sec + hash_djbx33 : 1090373 run/sec + sax_hash : 872499 run/sec + oat_hash : 730354 run/sec + kr_hash : 645431 run/sec + haproxy_uri_hash : 541157 run/sec + fnv_32a_str : 442608 run/sec + hashpjw : 434858 run/sec + fnv_hash : 401945 run/sec + hashword : 340594 run/sec + +UltraSparc @ 400 MHz, gcc-3.4.3 -mcpu=v9 : + haproxy_server_hash : 5671183 run/sec + bernstein : 1437122 run/sec + hash_djbx33 : 1376294 run/sec + SuperFastHash : 1306634 run/sec + sax_hash : 873650 run/sec + kr_hash : 801439 run/sec + oat_hash : 729920 run/sec + haproxy_uri_hash : 545341 run/sec + hashpjw : 472190 run/sec + fnv_32a_str : 443668 run/sec + hashword : 357295 run/sec + fnv_hash : 208823 run/sec + + +Alpha EV6 @ 466 MHz, gcc-3.3 : + haproxy_server_hash : 2495928 run/sec + SuperFastHash : 2037208 run/sec + hash_djbx33 : 1625092 run/sec + kr_hash : 1532206 run/sec + bernstein : 1256746 run/sec + haproxy_uri_hash : 999106 run/sec + oat_hash : 841943 run/sec + sax_hash : 737447 run/sec + hashpjw : 676170 run/sec + fnv_hash : 644054 run/sec + fnv_32a_str : 638526 run/sec + hashword : 421777 run/sec + +VIA EPIA @ 533 MHz, gcc-2.95 -march=i586 : + haproxy_server_hash : 1391374 run/sec + SuperFastHash : 912397 run/sec + hash_djbx33 : 589868 run/sec + kr_hash : 453706 run/sec + bernstein : 437318 run/sec + sax_hash : 334456 run/sec + hashpjw : 316670 run/sec + hashword : 315476 run/sec + haproxy_uri_hash : 311112 run/sec + oat_hash : 259127 run/sec + fnv_32a_str : 229485 run/sec + fnv_hash : 151620 run/sec + +VIA EPIA @ 533 MHz, gcc-3.4 -march=i586 : + haproxy_server_hash : 1660407 run/sec + SuperFastHash : 791981 run/sec + hash_djbx33 : 680498 run/sec + kr_hash : 384076 run/sec + bernstein : 377247 run/sec + sax_hash : 355183 run/sec + hashpjw : 298879 run/sec + haproxy_uri_hash : 296748 run/sec + oat_hash : 283932 run/sec + hashword : 269429 run/sec + fnv_32a_str : 204776 run/sec + fnv_hash : 155301 run/sec + +Pentium @ 133 MHz, gcc-3.4 -march=i586 : + haproxy_server_hash : 930788 run/sec + SuperFastHash : 344988 run/sec + hash_djbx33 : 278996 run/sec + bernstein : 211545 run/sec + sax_hash : 185225 run/sec + kr_hash : 156603 run/sec + oat_hash : 135163 run/sec + hashword : 128518 run/sec + fnv_hash : 107024 run/sec + haproxy_uri_hash : 105523 run/sec + fnv_32a_str : 99913 run/sec + hashpjw : 97860 run/sec + +VAX VLC4000 @30 MHz, gcc-2.95 : + haproxy_server_hash : 13208 run/sec + hash_djbx33 : 12963 run/sec + fnv_hash : 12150 run/sec + SuperFastHash : 12037 run/sec + bernstein : 11765 run/sec + kr_hash : 11111 run/sec + sax_hash : 7273 run/sec + hashword : 7143 run/sec + oat_hash : 6931 run/sec + hashpjw : 6667 run/sec + haproxy_uri_hash : 5714 run/sec + fnv_32a_str : 4800 run/sec + diff --git a/tests/test_hashes.c b/tests/test_hashes.c new file mode 100644 index 000000000..baac40479 --- /dev/null +++ b/tests/test_hashes.c @@ -0,0 +1,545 @@ +/* + This file only show how many operations a hash is able to handle. + It don't show the distribution nor collisions. + + gcc -Wall -O3 -o test_hashes test_hashes.c + ./test_hashes |sort -k 3 -r + */ +#include +#include +#include +#include +#include +#include +//#include + + +static struct timeval timeval_current(void) +{ + struct timeval tv; + gettimeofday(&tv, NULL); + return tv; +} + +static double timeval_elapsed(struct timeval *tv) +{ + struct timeval tv2 = timeval_current(); + return (tv2.tv_sec - tv->tv_sec) + + (tv2.tv_usec - tv->tv_usec)*1.0e-6; +} + +#define HAPROXY_BACKENDS 4 + +unsigned long haproxy_uri_hash(char *uri, int uri_len){ + + unsigned long hash = 0; + int c; + + while (uri_len--) { + c = *uri++; + if (c == '?') + break; + hash = c + (hash << 6) + (hash << 16) - hash; + } + + return hash%HAPROXY_BACKENDS; /* I assume 4 active backends */ +} /* end haproxy_hash() */ + +/* + * http://eternallyconfuzzled.com/tuts/algorithms/jsw_tut_hashing.aspx + */ +unsigned sax_hash ( void *key, int len ) +{ + unsigned char *p = key; + unsigned h = 0; + int i; + + for ( i = 0; i < len; i++ ) + h ^= ( h << 5 ) + ( h >> 2 ) + p[i]; + + return h; +} + +#include +/* len 4 for ipv4 and 16 for ipv6 */ +unsigned int haproxy_server_hash(const char *addr, int len){ + unsigned int h, l; + l = h = 0; + + while ((l + sizeof (int)) <= len) { + h ^= ntohl(*(unsigned int *)(&addr[l])); + l += sizeof (int); + } + return h %= HAPROXY_BACKENDS; +}/* end haproxy_server_hash() */ + + +int hashpjw(const void *key) { + + const char *ptr; + unsigned int val; + /********************************************************************* + * * + * Hash the key by performing a number of bit operations on it. * + * * + *********************************************************************/ + + val = 0; + ptr = key; + + while (*ptr != '\0') { + + int tmp; + + val = (val << 4) + (*ptr); + + if((tmp = (val & 0xf0000000))) { + val = val ^ (tmp >> 24); + val = val ^ tmp; + } + ptr++; + }/* end while */ + + return val; +}/* end hashpjw */ + +static unsigned long +hash_djbx33( + register unsigned char *key, + register size_t len) +{ + register unsigned long hash = 5381; + + /* the hash unrolled eight times */ + for (; len >= 8; len -= 8) { + hash = ((hash << 5) + hash) + *key++; + hash = ((hash << 5) + hash) + *key++; + hash = ((hash << 5) + hash) + *key++; + hash = ((hash << 5) + hash) + *key++; + hash = ((hash << 5) + hash) + *key++; + hash = ((hash << 5) + hash) + *key++; + hash = ((hash << 5) + hash) + *key++; + hash = ((hash << 5) + hash) + *key++; + } + switch (len) { + case 7: hash = ((hash << 5) + hash) + *key++; /* fallthrough... */ + case 6: hash = ((hash << 5) + hash) + *key++; /* fallthrough... */ + case 5: hash = ((hash << 5) + hash) + *key++; /* fallthrough... */ + case 4: hash = ((hash << 5) + hash) + *key++; /* fallthrough... */ + case 3: hash = ((hash << 5) + hash) + *key++; /* fallthrough... */ + case 2: hash = ((hash << 5) + hash) + *key++; /* fallthrough... */ + case 1: hash = ((hash << 5) + hash) + *key++; break; + default: /* case 0: */ break; + } + return hash; +} + +typedef unsigned long int ub4; /* unsigned 4-byte quantities */ +typedef unsigned char ub1; /* unsigned 1-byte quantities */ + +ub4 bernstein(ub1 *key, ub4 len, ub4 level){ + ub4 hash = level; + ub4 i; + for (i=0; i>= 2; + + /* Main loop */ + for (;len > 0; len--) { + hash += get16bits (data); + tmp = (get16bits (data+2) << 11) ^ hash; + hash = (hash << 16) ^ tmp; + data += 2*sizeof (uint16_t); + hash += hash >> 11; + } + + /* Handle end cases */ + switch (rem) { + case 3: hash += get16bits (data); + hash ^= hash << 16; + hash ^= data[sizeof (uint16_t)] << 18; + hash += hash >> 11; + break; + case 2: hash += get16bits (data); + hash ^= hash << 11; + hash += hash >> 17; + break; + case 1: hash += *data; + hash ^= hash << 10; + hash += hash >> 1; + } + + /* Force "avalanching" of final 127 bits */ + hash ^= hash << 3; + hash += hash >> 5; + hash ^= hash << 4; + hash += hash >> 17; + hash ^= hash << 25; + hash += hash >> 6; + + return hash; +} + +/* + * This variant uses all bits from the input block, and is about 15% faster. + */ +uint32_t SuperFastHash2 (const char * data, int len) { +uint32_t hash = len, tmp; +int rem; + + if (len <= 0 || data == NULL) return 0; + + rem = len & 3; + len >>= 2; + + /* Main loop */ + for (;len > 0; len--) { + register uint32_t next; + next = get16bits(data+2); + hash += get16bits(data); + tmp = ((next << 11) | (next >> 21)) ^ hash; + hash = (hash << 16) ^ tmp; + data += 2*sizeof (uint16_t); + hash += hash >> 11; + } + + /* Handle end cases */ + switch (rem) { + case 3: hash += get16bits (data); + hash ^= hash << 16; + hash ^= data[sizeof (uint16_t)] << 18; + hash += hash >> 11; + break; + case 2: hash += get16bits (data); + hash ^= hash << 11; + hash += hash >> 17; + break; + case 1: hash += *data; + hash ^= hash << 10; + hash += hash >> 1; + } + + /* Force "avalanching" of final 127 bits */ + hash ^= hash << 3; + hash += hash >> 5; + hash ^= hash << 4; + hash += hash >> 17; + hash ^= hash << 25; + hash += hash >> 6; + + return hash; +} + +/* + * 32 bit FNV-0 hash type + */ +typedef unsigned long Fnv32_t; + +/* + * fnv_32a_str - perform a 32 bit Fowler/Noll/Vo FNV-1a hash on a string + * + * input: + * str - string to hash + * hval - previous hash value or 0 if first call + * + * returns: + * 32 bit hash as a static hash type + * + * NOTE: To use the recommended 32 bit FNV-1a hash, use FNV1_32A_INIT as the + * hval arg on the first call to either fnv_32a_buf() or fnv_32a_str(). + */ +Fnv32_t +fnv_32a_str(char *str, Fnv32_t hval) +{ + unsigned char *s = (unsigned char *)str; /* unsigned string */ + + /* + * FNV-1a hash each octet in the buffer + */ + while (*s) { + + /* xor the bottom with the current octet */ + hval ^= (Fnv32_t)*s++; + +/* #define NO_FNV_GCC_OPTIMIZATION */ + /* multiply by the 32 bit FNV magic prime mod 2^32 */ +#if defined(NO_FNV_GCC_OPTIMIZATION) + /* + * 32 bit magic FNV-1a prime + */ +#define FNV_32_PRIME ((Fnv32_t)0x01000193) + hval *= FNV_32_PRIME; +#else + hval += (hval<<1) + (hval<<4) + (hval<<7) + (hval<<8) + (hval<<24); +#endif + } + + /* return our new hash value */ + return hval; +} + +/* + * from lookup3.c, by Bob Jenkins, May 2006, Public Domain. + */ + +#define rot(x,k) (((x)<<(k)) | ((x)>>(32-(k)))) + +/* +------------------------------------------------------------------------------- +mix -- mix 3 32-bit values reversibly. + +This is reversible, so any information in (a,b,c) before mix() is +still in (a,b,c) after mix(). + +If four pairs of (a,b,c) inputs are run through mix(), or through +mix() in reverse, there are at least 32 bits of the output that +are sometimes the same for one pair and different for another pair. +This was tested for: +* pairs that differed by one bit, by two bits, in any combination + of top bits of (a,b,c), or in any combination of bottom bits of + (a,b,c). +* "differ" is defined as +, -, ^, or ~^. For + and -, I transformed + the output delta to a Gray code (a^(a>>1)) so a string of 1's (as + is commonly produced by subtraction) look like a single 1-bit + difference. +* the base values were pseudorandom, all zero but one bit set, or + all zero plus a counter that starts at zero. + +Some k values for my "a-=c; a^=rot(c,k); c+=b;" arrangement that +satisfy this are + 4 6 8 16 19 4 + 9 15 3 18 27 15 + 14 9 3 7 17 3 +Well, "9 15 3 18 27 15" didn't quite get 32 bits diffing +for "differ" defined as + with a one-bit base and a two-bit delta. I +used http://burtleburtle.net/bob/hash/avalanche.html to choose +the operations, constants, and arrangements of the variables. + +This does not achieve avalanche. There are input bits of (a,b,c) +that fail to affect some output bits of (a,b,c), especially of a. The +most thoroughly mixed value is c, but it doesn't really even achieve +avalanche in c. + +This allows some parallelism. Read-after-writes are good at doubling +the number of bits affected, so the goal of mixing pulls in the opposite +direction as the goal of parallelism. I did what I could. Rotates +seem to cost as much as shifts on every machine I could lay my hands +on, and rotates are much kinder to the top and bottom bits, so I used +rotates. +------------------------------------------------------------------------------- +*/ +#define mix(a,b,c) \ +{ \ + a -= c; a ^= rot(c, 4); c += b; \ + b -= a; b ^= rot(a, 6); a += c; \ + c -= b; c ^= rot(b, 8); b += a; \ + a -= c; a ^= rot(c,16); c += b; \ + b -= a; b ^= rot(a,19); a += c; \ + c -= b; c ^= rot(b, 4); b += a; \ +} + +/* +------------------------------------------------------------------------------- +final -- final mixing of 3 32-bit values (a,b,c) into c + +Pairs of (a,b,c) values differing in only a few bits will usually +produce values of c that look totally different. This was tested for +* pairs that differed by one bit, by two bits, in any combination + of top bits of (a,b,c), or in any combination of bottom bits of + (a,b,c). +* "differ" is defined as +, -, ^, or ~^. For + and -, I transformed + the output delta to a Gray code (a^(a>>1)) so a string of 1's (as + is commonly produced by subtraction) look like a single 1-bit + difference. +* the base values were pseudorandom, all zero but one bit set, or + all zero plus a counter that starts at zero. + +These constants passed: + 14 11 25 16 4 14 24 + 12 14 25 16 4 14 24 +and these came close: + 4 8 15 26 3 22 24 + 10 8 15 26 3 22 24 + 11 8 15 26 3 22 24 +------------------------------------------------------------------------------- +*/ +#define final(a,b,c) \ +{ \ + c ^= b; c -= rot(b,14); \ + a ^= c; a -= rot(c,11); \ + b ^= a; b -= rot(a,25); \ + c ^= b; c -= rot(b,16); \ + a ^= c; a -= rot(c,4); \ + b ^= a; b -= rot(a,14); \ + c ^= b; c -= rot(b,24); \ +} + +/* +-------------------------------------------------------------------- + This works on all machines. To be useful, it requires + -- that the key be an array of uint32_t's, and + -- that the length be the number of uint32_t's in the key + + The function hashword() is identical to hashlittle() on little-endian + machines, and identical to hashbig() on big-endian machines, + except that the length has to be measured in uint32_ts rather than in + bytes. hashlittle() is more complicated than hashword() only because + hashlittle() has to dance around fitting the key bytes into registers. +-------------------------------------------------------------------- +*/ +uint32_t hashword( +const uint32_t *k, /* the key, an array of uint32_t values */ +size_t length, /* the length of the key, in uint32_ts */ +uint32_t initval) /* the previous hash, or an arbitrary value */ +{ + uint32_t a,b,c; + + /* Set up the internal state */ + a = b = c = 0xdeadbeef + (((uint32_t)length)<<2) + initval; + + /*------------------------------------------------- handle most of the key */ + while (length > 3) + { + a += k[0]; + b += k[1]; + c += k[2]; + mix(a,b,c); + length -= 3; + k += 3; + } + + /*------------------------------------------- handle the last 3 uint32_t's */ + switch(length) /* all the case statements fall through */ + { + case 3 : c+=k[2]; + case 2 : b+=k[1]; + case 1 : a+=k[0]; + final(a,b,c); + case 0: /* case 0: nothing left to add */ + break; + } + /*------------------------------------------------------ report the result */ + return c; +} + +/* from K&R book site 139 */ +#define HASHSIZE 101 + +unsigned kr_hash(char *s){ + unsigned hashval; + + for(hashval = 0; *s != '\0';s++) + hashval = *s + 31 * hashval; + + return hashval % HASHSIZE; + +} /* end kr_hash() */ + +unsigned fnv_hash ( void *key, int len ) +{ + unsigned char *p = key; + unsigned h = 2166136261; + int i; + + for ( i = 0; i < len; i++ ) + h = ( h * 16777619 ) ^ p[i]; + + return h; +} + +unsigned oat_hash ( void *key, int len ) +{ + unsigned char *p = key; + unsigned h = 0; + int i; + + for ( i = 0; i < len; i++ ) { + h += p[i]; + h += ( h << 10 ); + h ^= ( h >> 6 ); + } + + h += ( h << 3 ); + h ^= ( h >> 11 ); + h += ( h << 15 ); + + return h; +} + + +#define run_test(fct, args) { \ + unsigned long loop, count; \ + volatile unsigned long result; \ + double delta; \ + struct timeval tv; \ + fprintf(stderr, "Starting %s\n", #fct); \ + tv = timeval_current(); \ + count = 0; \ + do { \ + delta = timeval_elapsed(&tv); \ + for (loop = 0; loop < 1000; loop++) { \ + result = fct args; \ + count++; \ + } \ + } while (delta < 1.0); \ + fprintf(stdout, "%-20s : %10.0f run/sec\n", #fct, count/delta); \ + fflush(stdout); \ +} + +int main(){ + + char **start; + int len; + + char *urls[] = { + "http://www.microsoft.com/shared/core/1/webservice/navigation.asmx/DisplayDownlevelNavHtml", + NULL + }; + + start = urls; + len = strlen(*urls); + + run_test(SuperFastHash2, (*urls, len)); + run_test(SuperFastHash, (*urls, len)); + run_test(haproxy_uri_hash, (*urls, len)); + run_test(haproxy_server_hash, (*urls, len)); + run_test(hashpjw, (*urls)); + run_test(hash_djbx33, ((unsigned char *)*urls, len)); + run_test(bernstein, ((unsigned char *)*urls, len, 4)); + run_test(fnv_32a_str, (*urls, 0)); + run_test(hashword, ((const uint32_t *)*urls,strlen(*urls),0)); + run_test(kr_hash, (*urls)); + run_test(sax_hash, (*urls, len)); + run_test(fnv_hash, (*urls, len)); + run_test(oat_hash, (*urls, len)); + + return 0; + +}/* end main() */ diff --git a/tests/uri_hash.c b/tests/uri_hash.c index 4fc1f9f58..0b5d755a3 100644 --- a/tests/uri_hash.c +++ b/tests/uri_hash.c @@ -1,4 +1,6 @@ #include +#include +#include #define NSERV 10 #define MAXLINE 1000 @@ -11,7 +13,7 @@ static unsigned long hash_gd1(char *uri) unsigned long hash = 0; int c; - while (c = *uri++) + while ((c = *uri++)) hash = c + (hash << 6) + (hash << 16) - hash; return hash; @@ -23,7 +25,7 @@ static unsigned long hash_gd2(char *uri) unsigned long hash = 0; int c; - while (c = *uri++) { + while ((c = *uri++)) { if (c == '?' || c == '\n') break; hash = c + (hash << 6) + (hash << 16) - hash; @@ -39,7 +41,7 @@ static unsigned long hash_gd3(char *uri) unsigned long hash = 0; int c; - while (c = *uri++) { + while ((c = *uri++)) { if (c == '?' || c == '\n') break; hash = c - (hash << 3) + (hash << 15) - hash; @@ -55,7 +57,7 @@ static unsigned long hash_gd4(char *uri) unsigned long hash = 0; int c; - while (c = *uri++) { + while ((c = *uri++)) { if (c == '?' || c == '\n') break; hash = hash + (hash << 6) - (hash << 15) - c; @@ -71,7 +73,7 @@ static unsigned long hash_gd5(char *uri) unsigned long hash = 0; int c; - while (c = *uri++) { + while ((c = *uri++)) { if (c == '?' || c == '\n') break; hash = hash + (hash << 2) - (hash << 19) - c; @@ -87,7 +89,7 @@ static unsigned long hash_gd6(char *uri) unsigned long hash = 0; int c; - while (c = *uri++) { + while ((c = *uri++)) { if (c == '?' || c == '\n') break; hash = hash + (hash << 2) - (hash << 22) - c; @@ -152,7 +154,7 @@ static unsigned int rev32(unsigned int c) { int hash_wt2(const char *src, int len) { unsigned int i = 0x3C964BA5; /* as many ones as zeroes */ - unsigned int j, k; + unsigned int j; unsigned int ih, il; int bit; @@ -171,6 +173,142 @@ int hash_wt2(const char *src, int len) { return i; } + +//typedef unsigned int uint32_t; +//typedef unsigned short uint8_t; +//typedef unsigned char uint16_t; + +/* + * http://www.azillionmonkeys.com/qed/hash.html + */ +#undef get16bits +#if (defined(__GNUC__) && defined(__i386__)) || defined(__WATCOMC__) \ + || defined(_MSC_VER) || defined (__BORLANDC__) || defined (__TURBOC__) +#define get16bits(d) (*((const uint16_t *) (d))) +#endif + +#if !defined (get16bits) +#define get16bits(d) ((((uint32_t)(((const uint8_t *)(d))[1])) << 8)\ + +(uint32_t)(((const uint8_t *)(d))[0]) ) +#endif + +/* + * This function has a hole of 11 unused bits in bytes 2 and 3 of each block of + * 32 bits. + */ +int counts_SuperFastHash[NSERV][NSERV]; + +uint32_t SuperFastHash (const char * data, int len) { +uint32_t hash = len, tmp; +int rem; + + if (len <= 0 || data == NULL) return 0; + + rem = len & 3; + len >>= 2; + + /* Main loop */ + for (;len > 0; len--) { + hash += get16bits (data); + tmp = (get16bits (data+2) << 11) ^ hash; + hash = (hash << 16) ^ tmp; + data += 2*sizeof (uint16_t); + hash += hash >> 11; + } + + /* Handle end cases */ + switch (rem) { + case 3: hash += get16bits (data); + hash ^= hash << 16; + hash ^= data[sizeof (uint16_t)] << 18; + hash += hash >> 11; + break; + case 2: hash += get16bits (data); + hash ^= hash << 11; + hash += hash >> 17; + break; + case 1: hash += *data; + hash ^= hash << 10; + hash += hash >> 1; + } + + /* Force "avalanching" of final 127 bits */ + hash ^= hash << 3; + hash += hash >> 5; + hash ^= hash << 4; + hash += hash >> 17; + hash ^= hash << 25; + hash += hash >> 6; + + return hash; +} + +/* + * This variant uses all bits from the input block, and is about 15% faster. + */ +int counts_SuperFastHash2[NSERV][NSERV]; +uint32_t SuperFastHash2 (const char * data, int len) { +uint32_t hash = len, tmp; +int rem; + + if (len <= 0 || data == NULL) return 0; + + rem = len & 3; + len >>= 2; + + /* Main loop */ + for (;len > 0; len--) { + register uint32_t next; + next = get16bits(data+2); + hash += get16bits(data); + tmp = ((next << 11) | (next >> 21)) ^ hash; + hash = (hash << 16) ^ tmp; + data += 2*sizeof (uint16_t); + hash += hash >> 11; + } + + /* Handle end cases */ + switch (rem) { + case 3: hash += get16bits (data); + hash ^= hash << 16; + hash ^= data[sizeof (uint16_t)] << 18; + hash += hash >> 11; + break; + case 2: hash += get16bits (data); + hash ^= hash << 11; + hash += hash >> 17; + break; + case 1: hash += *data; + hash ^= hash << 10; + hash += hash >> 1; + } + + /* Force "avalanching" of final 127 bits */ + hash ^= hash << 3; + hash += hash >> 5; + hash ^= hash << 4; + hash += hash >> 17; + hash ^= hash << 25; + hash += hash >> 6; + + return hash; +} + +/* len 4 for ipv4 and 16 for ipv6 */ +int counts_srv[NSERV][NSERV]; +unsigned int haproxy_server_hash(const char *addr, int len){ + unsigned int h, l; + l = h = 0; + + while ((l + sizeof (int)) <= len) { + h ^= ntohl(*(unsigned int *)(&addr[l])); + l += sizeof (int); + } + return h; +}/* end haproxy_server_hash() */ + + + void count_hash_results(unsigned long hash, int counts[NSERV][NSERV]) { int srv, nsrv; @@ -196,7 +334,7 @@ void dump_hash_results(char *name, int counts[NSERV][NSERV]) { printf("\n"); } -main() { +int main() { memset(counts_gd1, 0, sizeof(counts_gd1)); memset(counts_gd2, 0, sizeof(counts_gd2)); memset(counts_gd3, 0, sizeof(counts_gd3)); @@ -205,6 +343,9 @@ main() { memset(counts_gd6, 0, sizeof(counts_gd6)); memset(counts_wt1, 0, sizeof(counts_wt1)); memset(counts_wt2, 0, sizeof(counts_wt2)); + memset(counts_srv, 0, sizeof(counts_srv)); + memset(counts_SuperFastHash, 0, sizeof(counts_SuperFastHash)); + memset(counts_SuperFastHash2, 0, sizeof(counts_SuperFastHash2)); while (fgets(line, MAXLINE, stdin) != NULL) { count_hash_results(hash_gd1(line), counts_gd1); @@ -215,6 +356,9 @@ main() { count_hash_results(hash_gd6(line), counts_gd6); count_hash_results(hash_wt1(31, line), counts_wt1); count_hash_results(hash_wt2(line, strlen(line)), counts_wt2); + count_hash_results(haproxy_server_hash(line, strlen(line)), counts_srv); + count_hash_results(SuperFastHash(line, strlen(line)), counts_SuperFastHash); + count_hash_results(SuperFastHash2(line, strlen(line)), counts_SuperFastHash2); } dump_hash_results("hash_gd1", counts_gd1); @@ -225,4 +369,9 @@ main() { dump_hash_results("hash_gd6", counts_gd6); dump_hash_results("hash_wt1", counts_wt1); dump_hash_results("hash_wt2", counts_wt2); + dump_hash_results("haproxy_server_hash", counts_srv); + dump_hash_results("SuperFastHash", counts_SuperFastHash); + dump_hash_results("SuperFastHash2", counts_SuperFastHash2); + + return 0; }