Date: Thu, 31 May 2012 14:48:00 +0000 From: gpf@FreeBSD.org To: svn-soc-all@FreeBSD.org Subject: socsvn commit: r236814 - soc2012/gpf/misc Message-ID: <20120531144800.288D7106566B@hub.freebsd.org>
next in thread | raw e-mail | index | archive | help
Author: gpf Date: Thu May 31 14:47:59 2012 New Revision: 236814 URL: http://svnweb.FreeBSD.org/socsvn/?view=rev&rev=236814 Log: Altered gleb's script to test how often cuckoo_insert fails (infinfinite loop) and the relationship between success rate and hash table size. Added: soc2012/gpf/misc/ck.py Added: soc2012/gpf/misc/ck.py ============================================================================== --- /dev/null 00:00:00 1970 (empty, because file is newly added) +++ soc2012/gpf/misc/ck.py Thu May 31 14:47:59 2012 (r236814) @@ -0,0 +1,92 @@ +#!/usr/bin/env python +# gpf: testing how often cuckoo_insert goes into infinite loop +# comments: +# This implementation uses 2 hash tables and 2 respective hash functions. +# The size of a hash table is the next mprime number of the total amount of elements +# So for 100 elements we will have 2 * 101 = 202 cells. +# observations: +# This way, success rate is around ~85%. +# If we increate the size of the tables by 10%, success rate goes to ~96% +# In this case, success rates increase as the number of total elements n increase. + +import random +import gmpy +from gmpy import mpz + +def hash1(elem, hts): + pos = elem % hts + return pos + +def hash2(elem, hts): + pos = (elem / hts) % hts + return pos + +def cuckoo_insert(elem, ht1, ht2, hts): + max_tries = hts + + for i in xrange(0, max_tries): + pos1 = hash1(elem, hts) + elem1 = ht1[pos1] + # do the cuckoo + ht1[pos1] = elem + if elem1 == 0: + return 0 + pos2 = hash2(elem1, hts) + elem2 = ht2[pos2] + # do the cuckoo + ht2[pos2] = elem1 + if elem2 == 0: + return 0 + else: + elem = elem2 + return 1 + +def build_cuckoo_table(R, hts): + ht1 = [ 0 for x in xrange(0, hts) ] + ht2 = [ 0 for x in xrange(0, hts) ] + + for i in xrange(0, len(R)): + r = R[i] + inf = cuckoo_insert(r, ht1, ht2, hts) + if inf == 1: + return 1 + + return 0 + +def generate_ht(n, tries, extra_space): + nextra = int(n * extra_space / 100) + hts = int(gmpy.next_prime(mpz(n + nextra))) + + print 'Items: %s' % (n,) + print 'Hash Table Sizes: ', hts + print 'Extra Space for each table: %.02f%%' % extra_space + + successful = 0 + for j in xrange(0, tries): + R = [ random.getrandbits(64) for i in xrange(0, n) ] + while len(R) != len(set(R)): + print 'Initial hash collisions, regenerating: %s/%s', len(set(R)), n + R = [ random.getrandbits(64) for i in xrange(0, n) ] + + res = build_cuckoo_table(R, hts) + if res == 0: + successful += 1 + #done = float(100) * j / tries + #if (done % 25) == 0: + #print 'Done: %.02f%%' % (done) + + succ_rate = float(100) * successful / tries + print 'Tries: ', tries + print 'Successful %d, Success rate %.02f%%' % (successful, succ_rate) + print '' + +# XXXgpf: better to lower the 2nd argument by 1/10 if you want results in a couple of mins +generate_ht(100, 10000, 0) +generate_ht(1000, 10000, 0) +generate_ht(10000, 10000, 0) +generate_ht(100000, 1000, 0) + +generate_ht(100, 10000, 10) +generate_ht(1000, 10000, 10) +generate_ht(10000, 10000, 10) +generate_ht(100000, 1000, 10)
Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?20120531144800.288D7106566B>