Skip site navigation (1)Skip section navigation (2)
Date:      Thu, 31 May 2012 14:48:00 +0000
From:      gpf@FreeBSD.org
To:        svn-soc-all@FreeBSD.org
Subject:   socsvn commit: r236814 - soc2012/gpf/misc
Message-ID:  <20120531144800.288D7106566B@hub.freebsd.org>

next in thread | raw e-mail | index | archive | help
Author: gpf
Date: Thu May 31 14:47:59 2012
New Revision: 236814
URL: http://svnweb.FreeBSD.org/socsvn/?view=rev&rev=236814

Log:
  Altered gleb's script to test how often cuckoo_insert fails (infinfinite loop)
  and the relationship between success rate and hash table size.
  

Added:
  soc2012/gpf/misc/ck.py

Added: soc2012/gpf/misc/ck.py
==============================================================================
--- /dev/null	00:00:00 1970	(empty, because file is newly added)
+++ soc2012/gpf/misc/ck.py	Thu May 31 14:47:59 2012	(r236814)
@@ -0,0 +1,92 @@
+#!/usr/bin/env python
+# gpf: testing how often cuckoo_insert goes into infinite loop
+# comments:
+# This implementation uses 2 hash tables and 2 respective hash functions.
+# The size of a hash table is the next mprime number of the total amount of elements
+# So for 100 elements we will have 2 * 101 = 202 cells. 
+# observations:
+# This way, success rate is around ~85%.
+# If we increate the size of the tables by 10%, success rate goes to ~96%
+# In this case, success rates increase as the number of total elements n increase.
+
+import random
+import gmpy
+from gmpy import mpz
+
+def hash1(elem, hts):
+	pos = elem % hts
+	return pos
+
+def hash2(elem, hts):
+	pos = (elem / hts) % hts
+	return pos
+
+def cuckoo_insert(elem, ht1, ht2, hts):
+	max_tries = hts
+
+	for i in xrange(0, max_tries):
+		pos1 = hash1(elem, hts)
+		elem1 = ht1[pos1]
+		# do the cuckoo
+		ht1[pos1] = elem
+		if elem1 == 0:
+			return 0
+		pos2 = hash2(elem1, hts)
+		elem2 = ht2[pos2]
+		# do the cuckoo
+		ht2[pos2] = elem1
+		if elem2 == 0:
+			return 0
+		else:
+			elem = elem2
+	return 1
+
+def build_cuckoo_table(R, hts):
+	ht1 = [ 0 for x in xrange(0, hts) ]
+	ht2 = [ 0 for x in xrange(0, hts) ]
+
+	for i in xrange(0, len(R)):
+		r = R[i]
+		inf = cuckoo_insert(r, ht1, ht2, hts)
+		if inf == 1:
+			return 1
+
+	return 0
+
+def generate_ht(n, tries, extra_space):
+	nextra = int(n * extra_space / 100)
+	hts = int(gmpy.next_prime(mpz(n + nextra)))
+
+	print 'Items: %s' % (n,)
+	print 'Hash Table Sizes: ', hts
+	print 'Extra Space for each table: %.02f%%' % extra_space
+
+	successful = 0
+	for j in xrange(0, tries):
+		R = [ random.getrandbits(64) for i in xrange(0, n) ]
+		while len(R) != len(set(R)):
+			print 'Initial hash collisions, regenerating: %s/%s', len(set(R)), n
+			R = [ random.getrandbits(64) for i in xrange(0, n) ]
+
+		res = build_cuckoo_table(R, hts)
+		if res == 0:
+			successful += 1
+		#done = float(100) * j / tries
+		#if (done % 25) == 0:
+			#print 'Done: %.02f%%' % (done)
+
+	succ_rate = float(100) * successful / tries
+	print 'Tries: ', tries
+	print 'Successful %d, Success rate %.02f%%' % (successful, succ_rate)
+	print ''
+
+# XXXgpf: better to lower the 2nd argument by 1/10 if you want results in a couple of mins
+generate_ht(100, 10000, 0)
+generate_ht(1000, 10000, 0)
+generate_ht(10000, 10000, 0)
+generate_ht(100000, 1000, 0)
+
+generate_ht(100, 10000, 10)
+generate_ht(1000, 10000, 10)
+generate_ht(10000, 10000, 10)
+generate_ht(100000, 1000, 10)



Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?20120531144800.288D7106566B>