Date: Wed, 6 Dec 2006 04:51:51 +0100 From: Max Laier <max@love2party.net> To: Luigi Rizzo <rizzo@icir.org> Cc: freebsd-ipfw@freebsd.org Subject: Re: Better "hash_packet6" Message-ID: <200612060451.58473.max@love2party.net> In-Reply-To: <20061205161744.A48319@xorpc.icir.org> References: <200612052010.36789.max@love2party.net> <20061205161744.A48319@xorpc.icir.org>
next in thread | previous in thread | raw e-mail | index | archive | help
--nextPart1742497.VWN3z6gnJ1 Content-Type: multipart/mixed; boundary="Boundary-01=_Z5jdFG6xnwahQje" Content-Transfer-Encoding: 7bit Content-Disposition: inline --Boundary-01=_Z5jdFG6xnwahQje Content-Type: text/plain; charset="iso-8859-1" Content-Transfer-Encoding: quoted-printable Content-Disposition: inline On Wednesday 06 December 2006 01:17, Luigi Rizzo wrote: > On Tue, Dec 05, 2006 at 08:10:30PM +0100, Max Laier wrote: > > Hi, > > > > with a lot of help from David Malone and JINMEI Tatuya we came up > > with the following hash function for IPv6 connections using universal > > hashing. > > I followed the discussion on the topic a few days (weeks ?) > ago and investigated around a bit, also following some of the pointers > that passed on the list. So I have the following comments: > > First, this proposal, with 36 multiplies and one division, the > function seems rather expensive for e.g. a low end cpu (arm or > soekris) as you might find on network-appliance boxes. > Any chance to get performance numbers on that hardware ? I tried the reference machines (see hacked up attachment): 78x ia64 40x amd64 60x p3 16x p4 I don't have my Soekris set up, so if somebody could give it a try. > I wonder if you have considered alternatives such as the one at > > http://www.azillionmonkeys.com/qed/hash.html > > which seems reasonable in terms of theoretical background, performance > and also in terms of license (see the reference about being used > in Apple products). It needs a seed at very least. > Second, and related to this specific hash function: if we end up ^ not? In which case I'd agree. > using it, i think it would be good to add a bit of documentation > on algorithm used and on constraints that there are (e.g. wrt operand > sizes and values of the hash keys) so that people don't break it > in the future trying to optimze things that they should not touch. > > In particular, i have the following questions: > - is the algorithm defined on a byte-by-byte basis, or it is just > your decision to implement it this way ? (having it work on 16-bit > entries would e.g. halve the number of multiplies). > > - i guess that you use addr6_cmp() to sort the components of the > address insuring the algorithm hashes forward and reverse traffic > to the same value. symmetric. But for this, one of the suggestions > was to xor SRC and DST to achieve the same thing, and i don't > understand why you use this different approach (which doubles the > input size and the number of multiplications). If an attacker can set src and dst it remains trivial to create (many)=20 collisions. In order to degrade the hash table it is enough to spoof=20 SYNs, isn't it? On that note, if I'm correct it might be a good idea to=20 use a seeded hash for IPv4 as well. > - what are the requirements on ipfw_hash_key[] values ? > E.g. it seems that 0 should not be allowed or the corresponding > byte would have no contribution in the hash. Any other invalid > value, recommended range etc ? I think the answer is "good random data" in [0, HASH_PRIME). That=20 includes zero, but I agree that using zero is a bit pointless. I'll let=20 others speak to the details of universal hashing. > In any case i am glad that people are looking at improving some code > that i am certainly guilty of :) =2D-=20 /"\ Best regards, | mlaier@freebsd.org \ / Max Laier | ICQ #67774661 X http://pf4freebsd.love2party.net/ | mlaier@EFnet / \ ASCII Ribbon Campaign | Against HTML Mail and News --Boundary-01=_Z5jdFG6xnwahQje Content-Type: text/plain; charset="iso-8859-1"; name="hash_cost.c" Content-Transfer-Encoding: 7bit Content-Disposition: attachment; filename="hash_cost.c" #include <sys/param.h> #include <sys/mbuf.h> #include <sys/socket.h> #include <sys/sockio.h> #include <sys/sysctl.h> #include <sys/time.h> #include <sys/wait.h> #include <sys/queue.h> #include <ctype.h> #include <err.h> #include <errno.h> #include <grp.h> #include <limits.h> #include <netdb.h> #include <pwd.h> #include <signal.h> #include <stdio.h> #include <stdlib.h> #include <stdarg.h> #include <string.h> #include <timeconv.h> /* XXX do we need this ? */ #include <unistd.h> #include <sysexits.h> #include <unistd.h> #include <fcntl.h> #include <net/if.h> #include <net/pfvar.h> #include <net/route.h> /* def. of struct route */ #include <netinet/in.h> #include <netinet/in_systm.h> #include <netinet/ip.h> #include <netinet/ip_icmp.h> #include <netinet/ip_fw.h> #include <netinet/icmp6.h> #include <netinet/ip_dummynet.h> #include <netinet/tcp.h> #include <arpa/inet.h> #define HASHKEYLEN 36 /* sizeof(in6_addr) * 2 + sizeof(in_port_t) * 2 */ #define HASHPRIME 65537 static u_int32_t ipfw_hash_key[HASHKEYLEN]; #define s6_addr8 __u6_addr.__u6_addr8 #define s6_addr16 __u6_addr.__u6_addr16 #define s6_addr32 __u6_addr.__u6_addr32 static __inline int addr6_cmp(struct ipfw_flow_id *id) { int i; if (id->src_port < id->dst_port) return 1; if (id->src_port > id->dst_port) return -1; for (i = 7; i >= 0; i--) if (id->dst_ip6.s6_addr16[i] != id->src_ip6.s6_addr16[i]) return ((int)id->dst_ip6.s6_addr16[i] - id->src_ip6.s6_addr16[i]); return (0); } static __inline int hash_packet5(struct ipfw_flow_id *id) { u_int32_t i; i = (id->dst_ip6.__u6_addr.__u6_addr32[2]) ^ (id->dst_ip6.__u6_addr.__u6_addr32[3]) ^ (id->src_ip6.__u6_addr.__u6_addr32[2]) ^ (id->src_ip6.__u6_addr.__u6_addr32[3]) ^ (id->dst_port) ^ (id->src_port); return i; } static __inline int hash_packet6(struct ipfw_flow_id *id) { u_int32_t a; int i, j; struct in6_addr *a1, *a2; u_int8_t *p1, *p2; if (addr6_cmp(id) >= 0) { a1 = &id->dst_ip6; a2 = &id->src_ip6; p1 = (u_int8_t *)&id->dst_port; p2 = (u_int8_t *)&id->src_port; } else { a1 = &id->src_ip6; a2 = &id->dst_ip6; p1 = (u_int8_t *)&id->src_port; p2 = (u_int8_t *)&id->dst_port; } a = 0; j = 0; for (i = 0; i < sizeof(a1->s6_addr); i++) a += a1->s6_addr[i] * ipfw_hash_key[j++]; for (i = 0; i < sizeof(a2->s6_addr); i++) a += a2->s6_addr[i] * ipfw_hash_key[j++]; a += p1[0] * ipfw_hash_key[j++]; a += p1[1] * ipfw_hash_key[j++]; a += p2[0] * ipfw_hash_key[j++]; a += p2[1] * ipfw_hash_key[j++]; return (a % HASHPRIME); } #define COUNT 100000 #define COUNT2 4096 int main(int argc, char **argv) { int i, j; struct ipfw_flow_id *ids; struct timeval start, stop; uint64_t dur1, dur2; for (j = 0; j < HASHKEYLEN; j++) ipfw_hash_key[j] = arc4random() % HASHPRIME; ids = calloc(sizeof(struct ipfw_flow_id), COUNT2); for (i = 0; i < COUNT2; i++) { for (j = 0; j < 8; j++) { ids[0].src_ip6.s6_addr32[j] = arc4random(); ids[0].dst_ip6.s6_addr32[j] = arc4random(); } ids[0].src_port = (uint16_t)arc4random(); ids[0].dst_port = (uint16_t)arc4random(); } gettimeofday(&start, NULL); for (i = 0; i < COUNT; i++) for (j = 0; j < COUNT2; j++) (volatile)hash_packet6(&ids[j]); gettimeofday(&stop, NULL); dur1 = stop.tv_sec - start.tv_sec; dur1 *= 1000000; dur1 += (stop.tv_usec - start.tv_usec); printf("%ld\n", dur1); printf("%Lf\n", (long double)dur1 / (COUNT * COUNT2)); gettimeofday(&start, NULL); for (i = 0; i < COUNT; i++) for (j = 0; j < COUNT2; j++) (volatile)hash_packet5(&ids[j]); gettimeofday(&stop, NULL); dur2 = stop.tv_sec - start.tv_sec; dur2 *= 1000000; dur2 += (stop.tv_usec - start.tv_usec); printf("%ld\n", dur2); printf("%Lf\n", (long double)dur2 / (COUNT * COUNT2)); printf("%Lf\n", (long double)dur1 / dur2); return (0); } --Boundary-01=_Z5jdFG6xnwahQje-- --nextPart1742497.VWN3z6gnJ1 Content-Type: application/pgp-signature -----BEGIN PGP SIGNATURE----- Version: GnuPG v1.4.5 (FreeBSD) iD8DBQBFdj5eXyyEoT62BG0RAotFAJ9Rfk0KkPplXHX8zKb+R2jUu3xruwCcDe9w Lc2uPSu4Hd9XhJUhD/skeqI= =C23x -----END PGP SIGNATURE----- --nextPart1742497.VWN3z6gnJ1--
Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?200612060451.58473.max>