Date: Sat, 18 Oct 2014 22:15:12 +0000 (UTC) From: Dag-Erling Smørgrav <des@FreeBSD.org> To: src-committers@freebsd.org, svn-src-all@freebsd.org, svn-src-head@freebsd.org Subject: svn commit: r273268 - in head: share/man/man9 sys/libkern sys/netpfil/pf sys/sys Message-ID: <201410182215.s9IMFC4F093524@svn.freebsd.org>
next in thread | raw e-mail | index | archive | help
Author: des Date: Sat Oct 18 22:15:11 2014 New Revision: 273268 URL: https://svnweb.freebsd.org/changeset/base/273268 Log: Add a complete implementation of MurmurHash3. Tweak both implementations so they match the established idiom. Document them in hash(9). MFC after: 1 month MFC with: r272906 Modified: head/share/man/man9/hash.9 head/sys/libkern/murmur3_32.c head/sys/netpfil/pf/pf.c head/sys/sys/hash.h Modified: head/share/man/man9/hash.9 ============================================================================== --- head/share/man/man9/hash.9 Sat Oct 18 22:11:10 2014 (r273267) +++ head/share/man/man9/hash.9 Sat Oct 18 22:15:11 2014 (r273268) @@ -26,7 +26,7 @@ .\" $OpenBSD: hash.9,v 1.5 2003/04/17 05:08:39 jmc Exp $ .\" $FreeBSD$ .\" -.Dd September 4, 2012 +.Dd October 18, 2014 .Dt HASH 9 .Os .Sh NAME @@ -37,8 +37,10 @@ .Nm hash32_strn , .Nm hash32_stre , .Nm hash32_strne , +.Nm jenkins_hash , .Nm jenkins_hash32 , -.Nm jenkins_hash +.Nm murmur3_32_hash , +.Nm murmur3_32_hash32 .Nd general kernel hashing functions .Sh SYNOPSIS .In sys/hash.h @@ -56,6 +58,10 @@ .Fn jenkins_hash "const void *buf" "size_t len" "uint32_t hash" .Ft uint32_t .Fn jenkins_hash32 "const uint32_t *buf" "size_t count" "uint32_t hash" +.Ft uint32_t +.Fn murmur3_32_hash "const void *buf" "size_t len" "uint32_t hash" +.Ft uint32_t +.Fn murmur3_32_hash32 "const uint32_t *buf" "size_t count" "uint32_t hash" .Sh DESCRIPTION The .Fn hash32 @@ -130,6 +136,16 @@ sized arrays, thus is simplier and faste It accepts an array of .Ft uint32_t values in its first argument and size of this array in the second argument. +.Pp +The +.Fn murmur3_32_hash +and +.Fn murmur3_32_hash32 +functions are similar to +.Fn jenkins_hash +and +.Fn jenkins_hash32 , +but implement the 32-bit version of MurmurHash3. .Sh RETURN VALUES The .Fn hash32 @@ -185,6 +201,10 @@ The .Nm jenkins_hash functions were added in .Fx 10.0 . +The +.Nm murmur3_32_hash +functions were added in +.Fx 10.1 . .Sh AUTHORS The .Nm hash32 @@ -192,5 +212,9 @@ functions were written by .An Tobias Weingartner . The .Nm jenkins_hash -functions was written by -Bob Jenkins . +functions were written by +.An Bob Jenkins . +The +.Nm murmur3_32_hash +functions were written by +.An Dag-Erling Sm\(/orgrav Aq Mt des@FreeBSD.org . Modified: head/sys/libkern/murmur3_32.c ============================================================================== --- head/sys/libkern/murmur3_32.c Sat Oct 18 22:11:10 2014 (r273267) +++ head/sys/libkern/murmur3_32.c Sat Oct 18 22:15:11 2014 (r273268) @@ -22,6 +22,8 @@ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. + * + * $FreeBSD$ */ #include <sys/hash.h> @@ -32,27 +34,31 @@ #define rol32(i32, n) ((i32) << (n) | (i32) >> (32 - (n))) /* - * $FreeBSD$ - * Simple implementation of the Murmur3-32 hash function optimized for - * aligned sequences of 32-bit words. If len is not a multiple of 4, it - * will be rounded down, droping trailer bytes. + * Simple implementation of the Murmur3-32 hash function. + * + * This implementation is slow but safe. It can be made significantly + * faster if the caller guarantees that the input is correctly aligned for + * 32-bit reads, and slightly faster yet if the caller guarantees that the + * length of the input is always a multiple of 4 bytes. */ uint32_t -murmur3_aligned_32(const void *data, size_t len, uint32_t seed) +murmur3_32_hash(const void *data, size_t len, uint32_t seed) { - const uint32_t *data32; + const uint8_t *bytes; uint32_t hash, k; size_t res; - /* initialize */ - len -= len % sizeof(*data32); + /* initialization */ + bytes = data; res = len; - data32 = data; hash = seed; - /* iterate */ - for (res = 0; res < len; res += sizeof(*data32), data32++) { - k = le32toh(*data32); + /* main loop */ + while (res >= 4) { + /* replace with le32toh() if input is aligned */ + k = le32dec(bytes); + bytes += 4; + res -= 4; k *= 0xcc9e2d51; k = rol32(k, 15); k *= 0x1b873593; @@ -62,6 +68,25 @@ murmur3_aligned_32(const void *data, siz hash += 0xe6546b64; } + /* remainder */ + /* remove if input length is a multiple of 4 */ + if (res > 0) { + k = 0; + switch (res) { + case 3: + k |= bytes[2] << 16; + case 2: + k |= bytes[1] << 8; + case 1: + k |= bytes[0]; + k *= 0xcc9e2d51; + k = rol32(k, 15); + k *= 0x1b873593; + hash ^= k; + break; + } + } + /* finalize */ hash ^= (uint32_t)len; hash ^= hash >> 16; @@ -72,3 +97,36 @@ murmur3_aligned_32(const void *data, siz return (hash); } +/* + * Simplified version of the above optimized for aligned sequences of + * 32-bit words. The count argument is the number of words, not the + * length in bytes. + */ +uint32_t +murmur3_32_hash32(const uint32_t *data, size_t count, uint32_t seed) +{ + uint32_t hash, k; + size_t res; + + /* iterate */ + for (res = count, hash = seed; res > 0; res--, data++) { + k = le32toh(*data); + k *= 0xcc9e2d51; + k = rol32(k, 15); + k *= 0x1b873593; + hash ^= k; + hash = rol32(hash, 13); + hash *= 5; + hash += 0xe6546b64; + } + + /* finalize */ + hash ^= (uint32_t)count; + hash ^= hash >> 16; + hash *= 0x85ebca6b; + hash ^= hash >> 13; + hash *= 0xc2b2ae35; + hash ^= hash >> 16; + return (hash); +} + Modified: head/sys/netpfil/pf/pf.c ============================================================================== --- head/sys/netpfil/pf/pf.c Sat Oct 18 22:11:10 2014 (r273267) +++ head/sys/netpfil/pf/pf.c Sat Oct 18 22:15:11 2014 (r273268) @@ -374,9 +374,9 @@ pf_hashkey(struct pf_state_key *sk) { uint32_t h; - h = murmur3_aligned_32((uint32_t *)sk, - sizeof(struct pf_state_key_cmp), - V_pf_hashseed); + h = murmur3_32_hash32((uint32_t *)sk, + sizeof(struct pf_state_key_cmp)/sizeof(uint32_t), + V_pf_hashseed); return (h & pf_hashmask); } @@ -388,12 +388,12 @@ pf_hashsrc(struct pf_addr *addr, sa_fami switch (af) { case AF_INET: - h = murmur3_aligned_32((uint32_t *)&addr->v4, - sizeof(addr->v4), V_pf_hashseed); + h = murmur3_32_hash32((uint32_t *)&addr->v4, + sizeof(addr->v4)/sizeof(uint32_t), V_pf_hashseed); break; case AF_INET6: - h = murmur3_aligned_32((uint32_t *)&addr->v6, - sizeof(addr->v6), V_pf_hashseed); + h = murmur3_32_hash32((uint32_t *)&addr->v6, + sizeof(addr->v6)/sizeof(uint32_t), V_pf_hashseed); break; default: panic("%s: unknown address family %u", __func__, af); Modified: head/sys/sys/hash.h ============================================================================== --- head/sys/sys/hash.h Sat Oct 18 22:11:10 2014 (r273267) +++ head/sys/sys/hash.h Sat Oct 18 22:15:11 2014 (r273268) @@ -126,7 +126,8 @@ hash32_strne(const void *buf, size_t len uint32_t jenkins_hash(const void *, size_t, uint32_t); uint32_t jenkins_hash32(const uint32_t *, size_t, uint32_t); -uint32_t murmur3_aligned_32(const void *data, size_t len, uint32_t seed); +uint32_t murmur3_32_hash(const void *, size_t, uint32_t); +uint32_t murmur3_32_hash32(const uint32_t *, size_t, uint32_t); #endif /* _KERNEL */
Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?201410182215.s9IMFC4F093524>