Skip site navigation (1)Skip section navigation (2)
Date:      Sat, 18 Oct 2014 22:15:12 +0000 (UTC)
From:      Dag-Erling Smørgrav <des@FreeBSD.org>
To:        src-committers@freebsd.org, svn-src-all@freebsd.org, svn-src-head@freebsd.org
Subject:   svn commit: r273268 - in head: share/man/man9 sys/libkern sys/netpfil/pf sys/sys
Message-ID:  <201410182215.s9IMFC4F093524@svn.freebsd.org>

next in thread | raw e-mail | index | archive | help
Author: des
Date: Sat Oct 18 22:15:11 2014
New Revision: 273268
URL: https://svnweb.freebsd.org/changeset/base/273268

Log:
  Add a complete implementation of MurmurHash3.  Tweak both implementations
  so they match the established idiom.  Document them in hash(9).
  
  MFC after:	1 month
  MFC with:	r272906

Modified:
  head/share/man/man9/hash.9
  head/sys/libkern/murmur3_32.c
  head/sys/netpfil/pf/pf.c
  head/sys/sys/hash.h

Modified: head/share/man/man9/hash.9
==============================================================================
--- head/share/man/man9/hash.9	Sat Oct 18 22:11:10 2014	(r273267)
+++ head/share/man/man9/hash.9	Sat Oct 18 22:15:11 2014	(r273268)
@@ -26,7 +26,7 @@
 .\"     $OpenBSD: hash.9,v 1.5 2003/04/17 05:08:39 jmc Exp $
 .\" $FreeBSD$
 .\"
-.Dd September 4, 2012
+.Dd October 18, 2014
 .Dt HASH 9
 .Os
 .Sh NAME
@@ -37,8 +37,10 @@
 .Nm hash32_strn ,
 .Nm hash32_stre ,
 .Nm hash32_strne ,
+.Nm jenkins_hash ,
 .Nm jenkins_hash32 ,
-.Nm jenkins_hash
+.Nm murmur3_32_hash ,
+.Nm murmur3_32_hash32
 .Nd general kernel hashing functions
 .Sh SYNOPSIS
 .In sys/hash.h
@@ -56,6 +58,10 @@
 .Fn jenkins_hash "const void *buf" "size_t len" "uint32_t hash"
 .Ft uint32_t
 .Fn jenkins_hash32 "const uint32_t *buf" "size_t count" "uint32_t hash"
+.Ft uint32_t
+.Fn murmur3_32_hash "const void *buf" "size_t len" "uint32_t hash"
+.Ft uint32_t
+.Fn murmur3_32_hash32 "const uint32_t *buf" "size_t count" "uint32_t hash"
 .Sh DESCRIPTION
 The
 .Fn hash32
@@ -130,6 +136,16 @@ sized arrays, thus is simplier and faste
 It accepts an array of
 .Ft uint32_t
 values in its first argument and size of this array in the second argument.
+.Pp
+The
+.Fn murmur3_32_hash
+and
+.Fn murmur3_32_hash32
+functions are similar to
+.Fn jenkins_hash
+and
+.Fn jenkins_hash32 ,
+but implement the 32-bit version of MurmurHash3.
 .Sh RETURN VALUES
 The
 .Fn hash32
@@ -185,6 +201,10 @@ The
 .Nm jenkins_hash
 functions were added in
 .Fx 10.0 .
+The
+.Nm murmur3_32_hash
+functions were added in
+.Fx 10.1 .
 .Sh AUTHORS
 The
 .Nm hash32
@@ -192,5 +212,9 @@ functions were written by
 .An Tobias Weingartner .
 The
 .Nm jenkins_hash
-functions was written by
-Bob Jenkins .
+functions were written by
+.An Bob Jenkins .
+The
+.Nm murmur3_32_hash
+functions were written by
+.An Dag-Erling Sm\(/orgrav Aq Mt des@FreeBSD.org .

Modified: head/sys/libkern/murmur3_32.c
==============================================================================
--- head/sys/libkern/murmur3_32.c	Sat Oct 18 22:11:10 2014	(r273267)
+++ head/sys/libkern/murmur3_32.c	Sat Oct 18 22:15:11 2014	(r273268)
@@ -22,6 +22,8 @@
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
+ *
+ * $FreeBSD$
  */
 
 #include <sys/hash.h>
@@ -32,27 +34,31 @@
 #define rol32(i32, n) ((i32) << (n) | (i32) >> (32 - (n)))
 
 /*
- * $FreeBSD$
- * Simple implementation of the Murmur3-32 hash function optimized for
- * aligned sequences of 32-bit words.  If len is not a multiple of 4, it
- * will be rounded down, droping trailer bytes.
+ * Simple implementation of the Murmur3-32 hash function.
+ *
+ * This implementation is slow but safe.  It can be made significantly
+ * faster if the caller guarantees that the input is correctly aligned for
+ * 32-bit reads, and slightly faster yet if the caller guarantees that the
+ * length of the input is always a multiple of 4 bytes.
  */
 uint32_t
-murmur3_aligned_32(const void *data, size_t len, uint32_t seed)
+murmur3_32_hash(const void *data, size_t len, uint32_t seed)
 {
-	const uint32_t *data32;
+	const uint8_t *bytes;
 	uint32_t hash, k;
 	size_t res;
 
-	/* initialize */
-	len -= len % sizeof(*data32);
+	/* initialization */
+	bytes = data;
 	res = len;
-	data32 = data;
 	hash = seed;
 
-	/* iterate */
-	for (res = 0; res < len; res += sizeof(*data32), data32++) {
-		k = le32toh(*data32);
+	/* main loop */
+	while (res >= 4) {
+		/* replace with le32toh() if input is aligned */
+		k = le32dec(bytes);
+		bytes += 4;
+		res -= 4;
 		k *= 0xcc9e2d51;
 		k = rol32(k, 15);
 		k *= 0x1b873593;
@@ -62,6 +68,25 @@ murmur3_aligned_32(const void *data, siz
 		hash += 0xe6546b64;
 	}
 
+	/* remainder */
+	/* remove if input length is a multiple of 4 */
+	if (res > 0) {
+		k = 0;
+		switch (res) {
+		case 3:
+			k |= bytes[2] << 16;
+		case 2:
+			k |= bytes[1] << 8;
+		case 1:
+			k |= bytes[0];
+			k *= 0xcc9e2d51;
+			k = rol32(k, 15);
+			k *= 0x1b873593;
+			hash ^= k;
+			break;
+		}
+	}
+
 	/* finalize */
 	hash ^= (uint32_t)len;
 	hash ^= hash >> 16;
@@ -72,3 +97,36 @@ murmur3_aligned_32(const void *data, siz
 	return (hash);
 }
 
+/*
+ * Simplified version of the above optimized for aligned sequences of
+ * 32-bit words.  The count argument is the number of words, not the
+ * length in bytes.
+ */
+uint32_t
+murmur3_32_hash32(const uint32_t *data, size_t count, uint32_t seed)
+{
+	uint32_t hash, k;
+	size_t res;
+
+	/* iterate */
+	for (res = count, hash = seed; res > 0; res--, data++) {
+		k = le32toh(*data);
+		k *= 0xcc9e2d51;
+		k = rol32(k, 15);
+		k *= 0x1b873593;
+		hash ^= k;
+		hash = rol32(hash, 13);
+		hash *= 5;
+		hash += 0xe6546b64;
+	}
+
+	/* finalize */
+	hash ^= (uint32_t)count;
+	hash ^= hash >> 16;
+	hash *= 0x85ebca6b;
+	hash ^= hash >> 13;
+	hash *= 0xc2b2ae35;
+	hash ^= hash >> 16;
+	return (hash);
+}
+

Modified: head/sys/netpfil/pf/pf.c
==============================================================================
--- head/sys/netpfil/pf/pf.c	Sat Oct 18 22:11:10 2014	(r273267)
+++ head/sys/netpfil/pf/pf.c	Sat Oct 18 22:15:11 2014	(r273268)
@@ -374,9 +374,9 @@ pf_hashkey(struct pf_state_key *sk)
 {
 	uint32_t h;
 
-	h = murmur3_aligned_32((uint32_t *)sk,
-			       sizeof(struct pf_state_key_cmp),
-			       V_pf_hashseed);
+	h = murmur3_32_hash32((uint32_t *)sk,
+	    sizeof(struct pf_state_key_cmp)/sizeof(uint32_t),
+	    V_pf_hashseed);
 
 	return (h & pf_hashmask);
 }
@@ -388,12 +388,12 @@ pf_hashsrc(struct pf_addr *addr, sa_fami
 
 	switch (af) {
 	case AF_INET:
-		h = murmur3_aligned_32((uint32_t *)&addr->v4,
-				       sizeof(addr->v4), V_pf_hashseed);
+		h = murmur3_32_hash32((uint32_t *)&addr->v4,
+		    sizeof(addr->v4)/sizeof(uint32_t), V_pf_hashseed);
 		break;
 	case AF_INET6:
-		h = murmur3_aligned_32((uint32_t *)&addr->v6,
-				       sizeof(addr->v6), V_pf_hashseed);
+		h = murmur3_32_hash32((uint32_t *)&addr->v6,
+		    sizeof(addr->v6)/sizeof(uint32_t), V_pf_hashseed);
 		break;
 	default:
 		panic("%s: unknown address family %u", __func__, af);

Modified: head/sys/sys/hash.h
==============================================================================
--- head/sys/sys/hash.h	Sat Oct 18 22:11:10 2014	(r273267)
+++ head/sys/sys/hash.h	Sat Oct 18 22:15:11 2014	(r273268)
@@ -126,7 +126,8 @@ hash32_strne(const void *buf, size_t len
 uint32_t jenkins_hash(const void *, size_t, uint32_t);
 uint32_t jenkins_hash32(const uint32_t *, size_t, uint32_t);
 
-uint32_t murmur3_aligned_32(const void *data, size_t len, uint32_t seed);
+uint32_t murmur3_32_hash(const void *, size_t, uint32_t);
+uint32_t murmur3_32_hash32(const uint32_t *, size_t, uint32_t);
 
 #endif /* _KERNEL */
 



Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?201410182215.s9IMFC4F093524>