Skip site navigation (1)Skip section navigation (2)
Date:      Mon, 04 Jun 2012 13:31:32 +0000
From:      gpf@FreeBSD.org
To:        svn-soc-all@FreeBSD.org
Subject:   socsvn commit: r237061 - soc2012/gpf/pefs_kmod/sbin/pefs
Message-ID:  <20120604133132.EC5DC1065670@hub.freebsd.org>

next in thread | raw e-mail | index | archive | help
Author: gpf
Date: Mon Jun  4 13:31:32 2012
New Revision: 237061
URL: http://svnweb.FreeBSD.org/socsvn/?view=rev&rev=237061

Log:
  properly handle scenario where cuckoo insertion falls into infinite loop.
  new hash tables are allocated where new_size=next_prime(old_size).
  all file_header elements are kept in a 'global' tail so that we won't have to
  re-produce them, should insertion fail.
  

Modified:
  soc2012/gpf/pefs_kmod/sbin/pefs/pefs_checksum.c
  soc2012/gpf/pefs_kmod/sbin/pefs/pefs_ctl.h

Modified: soc2012/gpf/pefs_kmod/sbin/pefs/pefs_checksum.c
==============================================================================
--- soc2012/gpf/pefs_kmod/sbin/pefs/pefs_checksum.c	Mon Jun  4 12:49:21 2012	(r237060)
+++ soc2012/gpf/pefs_kmod/sbin/pefs/pefs_checksum.c	Mon Jun  4 13:31:32 2012	(r237061)
@@ -53,7 +53,7 @@
 
 #include "pefs_ctl.h"
 
-//#define PEFS_INTEGRITY_DEBUG
+#define PEFS_INTEGRITY_DEBUG
 #if defined (PEFS_INTEGRITY_DEBUG)
 #define dprintf(a)		printf a
 #else
@@ -65,6 +65,7 @@
 #define PEFS_EXTRA_TABLE_SIZE 15
 
 TAILQ_HEAD(checksum_head, checksum);
+TAILQ_HEAD(file_header_head, file_header);
 
 #define PEFS_CFH_SIZE 16
 #define PEFS_FH_SIZE 16
@@ -89,9 +90,9 @@
 	uint32_t nhashes;
 	uint64_t file_id;
 	char path[MAXPATHLEN];
-	LIST_ENTRY(file_header) bucket_entries;
 	uint32_t offset_to_checksums;
 	struct checksum_head checksums;
+	TAILQ_ENTRY(file_header) file_header_entries;
 };
 
 struct bucket {
@@ -132,6 +133,9 @@
 {
 	uint32_t i;
 
+	if (num == 2 || num == 3)
+		return num;
+
 	if (num % 2 == 0)
 		num+=1;
 
@@ -262,22 +266,30 @@
 	chtp->buckets1 = NULL;
 	chtp->buckets2 = NULL;
 }
-
+	
 static int
-pefs_allocate_hash_table(struct cuckoo_hash_table *chtp, uint32_t nelements)
+pefs_allocate_hash_table(struct cuckoo_hash_table *chtp, uint32_t nelements, char reallocate)
 {
 	uint32_t i;
 
-	/*
-	 * spending 15% more space for each table lowers the chance to fall into an
-	 * infinite loop during cuckoo insertion to about 1.5%.
-	 */
-	chtp->size = pefs_next_prime(nelements + ((nelements * PEFS_EXTRA_TABLE_SIZE)/100));
-	chtp->nelements = nelements;
-	if (chtp->size < chtp->nelements) {
-		pefs_warn("numeric overflow while computing hash table size");
-		return (PEFS_ERR_GENERIC);
+	if (reallocate == 0) {
+		/*
+		 * spending 15% more space for each table lowers the chance to fall into an
+		 * infinite loop during cuckoo insertion to about 1.5%.
+		 */
+		chtp->size = pefs_next_prime(nelements + ((nelements * PEFS_EXTRA_TABLE_SIZE)/100));
+		chtp->nelements = nelements;
+		if (chtp->size < chtp->nelements) {
+			pefs_warn("numeric overflow while computing hash table size");
+			return (PEFS_ERR_GENERIC);
+		}
 	}
+	else {
+		chtp->size = pefs_next_prime(chtp->size + 1);
+		free(chtp->buckets1);
+		free(chtp->buckets2);
+	}
+
 	dprintf(("hash table elem:%u\tsize: %u\n", chtp->nelements, chtp->size));
 
 	chtp->buckets1 = malloc (chtp->size * sizeof(struct bucket));
@@ -422,7 +434,7 @@
 	}
 
 	pefs_warn("cuckoo_insert resulted in infinite loop!");
-	return (PEFS_ERR_CUCKOO_LOOP);
+	return (PEFS_ERR_GENERIC);
 }
 
 static int
@@ -593,19 +605,25 @@
 /*
  * This function creates the in memory database that will be later written to
  * the checksum file.
- * A) The total sum of entries is gathered so that a hash table is allocated.
+ * A) The total sum of entries is gathered so that the hash tables are allocated.
  * B) For each file entry:
  * 		B1) semantic checks: file should reside in pefs filesystem &
  * 			file should be regular file
  * 		B2) the file_id is retrieved.
  * 		B3) list of checksums is computed for the file's 4k blocks.
- * 		B4) file entry is added to hash table. (cuckoo hashing is used)
+ * 		B4) file entry is added to fh_head
+ * C) Cuckoo insertion:
+ * We try to populate our hash tables using the cuckoo algorithm. Should we fall
+ * into an infinite loop during insertion, we re-allocate larger hash tables
+ * and try again until we succeed. The possibility to fail twice in a row is 
+ * 1.5% * 1.5% = 0.0225%
  */
 static int
 pefs_create_in_memory_db(FILE *fpin, const EVP_MD *md, uint8_t hash_len,
 	struct cuckoo_hash_table *chtp, char *fsroot)
 {
 	struct statfs fs;
+	struct file_header_head fh_head;
 	struct file_header *fhp;
 	int error;
 	uint32_t nfiles;
@@ -619,10 +637,11 @@
 	if (error != 0)
 		return (error);
 
-	error = pefs_allocate_hash_table(chtp, nfiles);
+	error = pefs_allocate_hash_table(chtp, nfiles, 0);
 	if (error != 0)
 		return (error);
 
+	TAILQ_INIT(&fh_head);
 	while((fhp = pefs_next_file(fpin, &error)) != NULL) {
 		error = pefs_file_semantic_checks(fhp, &fs);
 		if (error != 0)
@@ -636,11 +655,26 @@
 		if (error != 0)
 			return (error);
 
-		error = pefs_add_to_hash_table(chtp, fhp);
-		if (error != 0)
-			return (error);
+		TAILQ_INSERT_TAIL(&fh_head, fhp, file_header_entries);
 	}
 
+cuckoo_insert:
+	 TAILQ_FOREACH(fhp, &fh_head, file_header_entries) {
+		error = pefs_add_to_hash_table(chtp, fhp);
+		/* 
+		 * cuckoo insertion algorithm fell into an infinite loop!
+		 * Create new, larger hash tables where size = next_prime(old_size)
+		 * and try again.
+		 */
+		if (error != 0) {
+			dprintf(("fell into an infinite loop!\n"));
+			error = pefs_allocate_hash_table(chtp, nfiles, 1);
+			if (error != 0)
+				return (error);
+			goto cuckoo_insert;
+		}
+	 }
+
 	pefs_print_hash_table(chtp, hash_len);
 
 	return (error);
@@ -933,11 +967,6 @@
 
 	error = pefs_create_in_memory_db(fpin, md, hash_len,
 		&checksum_hash_table, fsroot);
-	/*
-	 * XXXgpf: [TODO] Properly handle PEFS_ERR_CUCKOO_LOOP by retrying with
-	 * larger tables (next prime number?). We shouldn't have to reread all
-	 * file entries btw.
-	 */
 	if (error != 0)
 		goto out;
 

Modified: soc2012/gpf/pefs_kmod/sbin/pefs/pefs_ctl.h
==============================================================================
--- soc2012/gpf/pefs_kmod/sbin/pefs/pefs_ctl.h	Mon Jun  4 12:49:21 2012	(r237060)
+++ soc2012/gpf/pefs_kmod/sbin/pefs/pefs_ctl.h	Mon Jun  4 13:31:32 2012	(r237061)
@@ -61,7 +61,6 @@
 #define	PEFS_ERR_NOENT			5
 #define	PEFS_ERR_EXIST			6
 #define	PEFS_ERR_INVALID		7
-#define PEFS_ERR_CUCKOO_LOOP	8
 
 #define	PEFS_FS_IGNORE_TYPE		0x0001
 



Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?20120604133132.EC5DC1065670>