Date: Thu, 29 Aug 2013 14:51:41 -0700 From: "T.C. Gubatayao" <tgubatayao@barracuda.com> To: Alan Somers <asomers@freebsd.org> Cc: Jack F Vogel <jfv@freebsd.org>, "Justin T. Gibbs" <gibbs@freebsd.org>, Andre Oppermann <andre@freebsd.org>, "net@freebsd.org" <net@freebsd.org> Subject: Re: Flow ID, LACP, and igb Message-ID: <49170157-EFC7-44A3-B881-12B4F2644F59@barracuda.com> In-Reply-To: <C209B12F-A404-47EC-8225-3F5E4123E05E@barracuda.com> References: <D01A0CB2-B1E3-4F4B-97FA-4C821C0E3FD2@FreeBSD.org> <521BBD21.4070304@freebsd.org> <CAOtMX2jvKGY==t9i-a_8RtMAPH2p1VDj950nMHHouryoz3nbsA@mail.gmail.com> <521EE8DA.3060107@freebsd.org> <BCC2C62D4FE171479E2F1C2593FE508B0BE24383@BN-SCL-MBX03.Cudanet.local> <CAOtMX2h5SGh5eYV50y%2BQB_s367V9iattGU862wwXcONDV%2BTG8g@mail.gmail.com> <0771FC4F-BCDD-4985-A33F-09951806AD99@barracuda.com> <CAOtMX2i5BXqm4_gP67MEmN8szCabp8_QRKfZM0tqFtbEKS31SA@mail.gmail.com> <C209B12F-A404-47EC-8225-3F5E4123E05E@barracuda.com>
next in thread | previous in thread | raw e-mail | index | archive | help
On Aug 29, 2013, at 5:40 PM, T.C. Gubatayao <tgubatayao@barracuda.com> wrot= e: > On Aug 29, 2013, at 4:21 PM, Alan Somers <asomers@freebsd.org> wrote: > >> They're faster, but even with this change, jenkins_hash is still 6 times >> slower than FNV hash. > > Actually, I think your test isn't accurately simulating memory access, wh= ich > might be skewing the results. > > For example, from net/if_lagg.c: > > p =3D hash32_buf(&eh->ether_shost, ETHER_ADDR_LEN, p); > p =3D hash32_buf(&eh->ether_dhost, ETHER_ADDR_LEN, p); > > These two calls can't both be aligned, since ETHER_ADDR_LEN is 6 octets. = The > same is true for the other hashed fields in the IP and TCP/UDP headers. > Assuming the mbuf data pointer is aligned, the IP addresses and ports are= both > on 2-byte alignments (without VLAN or IP options). In your test, they're= all > aligned and in the same cache line. > > When I modify the test to simulate an mbuf, lookup3 beats FNV and hash32,= and > SipHash is only 2-3 times slower. > >> Also, your technique of copying the hashable fields into a separate buff= er >> would need modification to work with different types of packet and diffe= rent >> LAGG_F_HASH[234] flags. Because different packets have different hashab= le >> fields, struct key would need to be expanded to include the vlan tag, IP= V6 >> addresses, and IPv6 flowid. lagg_hashmbuf would then have to zero the u= nused >> fields. > > Agreed, but this is relatively simple with a buffer on the stack, and doe= s not > require zeroes or padding. See my modified test, attached. > > T.C. Attachment was stripped. --- a/lagg_hash.c 2013-08-29 14:21:17.255307349 -0400 +++ b/lagg_hash.c 2013-08-29 17:26:14.055404918 -0400 @@ -7,35 +7,63 @@ #include <sys/hash.h> #include <sys/fnv_hash.h> #include <sys/time.h> - -uint32_t jenkins_hash32(const uint32_t *, size_t, uint32_t); +#include <string.h> +#include <net/ethernet.h> +#include <netinet/ip.h> +#include <netinet/tcp.h> =20 #define ITERATIONS 100000000 =20 -typedef uint32_t do_hash_t(void); +typedef uint32_t do_hash_t(uint32_t); + +/* + * Simulate mbuf data for a packet. + * No VLAN tagging and no IP options. + */ +struct _mbuf { + struct ether_header eh; + struct ip ip; + struct tcphdr th; +} __attribute__((packed)) m =3D { + { + .ether_dhost =3D { 181, 16, 73, 9, 219, 22 }, + .ether_shost =3D { 69, 170, 210, 11, 24, 120 }, + .ether_type =3D 0x008 + }, + { + .ip_src.s_addr =3D 1329258245, + .ip_dst.s_addr =3D 1319097119, + .ip_p =3D 0x06 + }, + { + .th_sport =3D 12506, + .th_dport =3D 47804 + } +}; =20 -// Pad the MACs with 0s because jenkins_hash operates on 32-bit inputs -const uint8_t ether_shost[] =3D {181, 16, 73, 9, 219, 22, 0, 0}; -const uint8_t ether_dhost[] =3D {69, 170, 210, 111, 24, 120, 0, 0}; -const struct in_addr ip_src =3D {.s_addr =3D 1329258245}; -const struct in_addr ip_dst =3D {.s_addr =3D 1319097119}; -const uint32_t ports =3D 3132895450; const uint8_t sipkey[16] =3D {7, 239, 255, 43, 68, 53, 56, 225, 98, 81, 177, 80, 92, 235, 242, 39}; =20 +#define LAGG_F_HASHL2 0x1 +#define LAGG_F_HASHL3 0x2 +#define LAGG_F_HASHL4 0x4 +#define LAGG_F_HASHALL (LAGG_F_HASHL2|LAGG_F_HASHL3|LAGG_F_HASHL4) + /* * Simulate how lagg_hashmbuf uses FNV hash for a TCP/IP packet * No VLAN tagging */ -uint32_t do_fnv(void) +uint32_t do_fnv(uint32_t flags) { uint32_t p =3D FNV1_32_INIT; =20 - p =3D fnv_32_buf(ether_shost, 6, p); - p =3D fnv_32_buf(ether_dhost, 6, p); - p =3D fnv_32_buf(&ip_src, sizeof(struct in_addr), p); - p =3D fnv_32_buf(&ip_dst, sizeof(struct in_addr), p); - p =3D fnv_32_buf(&ports, sizeof(ports), p); + if (flags & LAGG_F_HASHL2) + p =3D fnv_32_buf(&m.eh.ether_dhost, 12, p); + if (flags & LAGG_F_HASHL3) + p =3D fnv_32_buf(&m.ip.ip_src, 8, p); + if (flags & LAGG_F_HASHL4) + p =3D fnv_32_buf(&m.th.th_sport, 4, p); + return (p); } =20 @@ -43,59 +71,74 @@ * Simulate how lagg_hashmbuf uses hash32 for a TCP/IP packet * No VLAN tagging */ -uint32_t do_hash32(void) +uint32_t do_hash32(uint32_t flags) { // Actually, if_lagg used a pseudorandom number determined at inter= face // creation time. But this should have the same timing // characteristics. uint32_t p =3D HASHINIT; =20 - p =3D hash32_buf(ether_shost, 6, p); - p =3D hash32_buf(ether_dhost, 6, p); - p =3D hash32_buf(&ip_src, sizeof(struct in_addr), p); - p =3D hash32_buf(&ip_dst, sizeof(struct in_addr), p); - p =3D hash32_buf(&ports, sizeof(ports), p); + if (flags & LAGG_F_HASHL2) + p =3D hash32_buf(&m.eh.ether_dhost, 12, p); + if (flags & LAGG_F_HASHL3) + p =3D hash32_buf(&m.ip.ip_src, 8, p); + if (flags & LAGG_F_HASHL4) + p =3D hash32_buf(&m.th.th_sport, 4, p); + return (p); } =20 +/* Simulate copying the info out of the mbuf. */ +static __inline size_t init_key(char *key, uint32_t flags) +{ + uint16_t etype; + size_t len =3D 0; + + if (flags & LAGG_F_HASHL2) { + memcpy(key + len, &m.eh.ether_dhost, 12); + len +=3D 12; + } + + if (flags & LAGG_F_HASHL3) { + memcpy(key + len, &m.ip.ip_src, 8); + len +=3D 8; + } + + if (flags & LAGG_F_HASHL4) { + memcpy(key + len, &m.th.th_sport, 4); + len +=3D 4; + } + + return (len); +} + /* * Simulate how lagg_hashmbuf would use siphash24 for a TCP/IP packet * No VLAN tagging */ -uint32_t do_siphash24(void) +uint32_t do_siphash24(uint32_t flags) { SIPHASH_CTX ctx; + char key[26]; + size_t len; =20 - SipHash24_Init(&ctx); - SipHash_SetKey(&ctx, sipkey); + len =3D init_key(key, flags); =20 - SipHash_Update(&ctx, ether_shost, 6); - SipHash_Update(&ctx, ether_dhost, 6); - SipHash_Update(&ctx, &ip_src, sizeof(struct in_addr)); - SipHash_Update(&ctx, &ip_dst, sizeof(struct in_addr)); - SipHash_Update(&ctx, &ports, sizeof(ports)); - return (SipHash_End(&ctx) & 0xFFFFFFFF); + return (SipHash24(&ctx, sipkey, key, len) & 0xFFFFFFFF); } =20 /* * Simulate how lagg_hashmbuf would use lookup3 aka jenkins_hash * No VLAN tagging */ -uint32_t do_jenkins(void) +uint32_t do_jenkins(uint32_t flags) { - /* Jenkins hash does not recommend any specific initializer */ - uint32_t p =3D FNV1_32_INIT; + char key[26]; + size_t len; =20 - /*=20 - * jenkins_hash uses 32-bit inputs, so we need to present the MACs = as - * arrays of 2 32-bit values - */ - p =3D jenkins_hash32((uint32_t*)ether_shost, 2, p); - p =3D jenkins_hash32((uint32_t*)ether_dhost, 2, p); - p =3D jenkins_hash32((uint32_t*)&ip_src, sizeof(struct in_addr) / 4= , p); - p =3D jenkins_hash32((uint32_t*)&ip_dst, sizeof(struct in_addr) / 4= , p); - p =3D jenkins_hash32(&ports, sizeof(ports) / 4, p); - return (p); + len =3D init_key(key, flags); + + return (jenkins_hash(key, len, FNV1_32_INIT)); } =20 =20 @@ -120,7 +163,7 @@ =20 gettimeofday(&tv_old, NULL); for (j=3D0; j<ITERATIONS; j++) - funcs[i].f(); + funcs[i].f(LAGG_F_HASHALL); gettimeofday(&tv_new, NULL); timersub(&tv_new, &tv_old, &tv_diff); t =3D tv_diff.tv_sec + tv_diff.tv_usec / 1000000.;
Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?49170157-EFC7-44A3-B881-12B4F2644F59>