From owner-svn-soc-all@FreeBSD.ORG Mon Jun 4 13:31:35 2012 Return-Path: Delivered-To: svn-soc-all@FreeBSD.org Received: from socsvn.FreeBSD.org (unknown [IPv6:2001:4f8:fff6::2f]) by hub.freebsd.org (Postfix) with SMTP id EC5DC1065670 for ; Mon, 4 Jun 2012 13:31:32 +0000 (UTC) (envelope-from gpf@FreeBSD.org) Received: by socsvn.FreeBSD.org (sSMTP sendmail emulation); Mon, 04 Jun 2012 13:31:32 +0000 Date: Mon, 04 Jun 2012 13:31:32 +0000 From: gpf@FreeBSD.org To: svn-soc-all@FreeBSD.org MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Message-Id: <20120604133132.EC5DC1065670@hub.freebsd.org> Cc: Subject: socsvn commit: r237061 - soc2012/gpf/pefs_kmod/sbin/pefs X-BeenThere: svn-soc-all@freebsd.org X-Mailman-Version: 2.1.5 Precedence: list List-Id: SVN commit messages for the entire Summer of Code repository List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , X-List-Received-Date: Mon, 04 Jun 2012 13:31:35 -0000 Author: gpf Date: Mon Jun 4 13:31:32 2012 New Revision: 237061 URL: http://svnweb.FreeBSD.org/socsvn/?view=rev&rev=237061 Log: properly handle scenario where cuckoo insertion falls into infinite loop. new hash tables are allocated where new_size=next_prime(old_size). all file_header elements are kept in a 'global' tail so that we won't have to re-produce them, should insertion fail. Modified: soc2012/gpf/pefs_kmod/sbin/pefs/pefs_checksum.c soc2012/gpf/pefs_kmod/sbin/pefs/pefs_ctl.h Modified: soc2012/gpf/pefs_kmod/sbin/pefs/pefs_checksum.c ============================================================================== --- soc2012/gpf/pefs_kmod/sbin/pefs/pefs_checksum.c Mon Jun 4 12:49:21 2012 (r237060) +++ soc2012/gpf/pefs_kmod/sbin/pefs/pefs_checksum.c Mon Jun 4 13:31:32 2012 (r237061) @@ -53,7 +53,7 @@ #include "pefs_ctl.h" -//#define PEFS_INTEGRITY_DEBUG +#define PEFS_INTEGRITY_DEBUG #if defined (PEFS_INTEGRITY_DEBUG) #define dprintf(a) printf a #else @@ -65,6 +65,7 @@ #define PEFS_EXTRA_TABLE_SIZE 15 TAILQ_HEAD(checksum_head, checksum); +TAILQ_HEAD(file_header_head, file_header); #define PEFS_CFH_SIZE 16 #define PEFS_FH_SIZE 16 @@ -89,9 +90,9 @@ uint32_t nhashes; uint64_t file_id; char path[MAXPATHLEN]; - LIST_ENTRY(file_header) bucket_entries; uint32_t offset_to_checksums; struct checksum_head checksums; + TAILQ_ENTRY(file_header) file_header_entries; }; struct bucket { @@ -132,6 +133,9 @@ { uint32_t i; + if (num == 2 || num == 3) + return num; + if (num % 2 == 0) num+=1; @@ -262,22 +266,30 @@ chtp->buckets1 = NULL; chtp->buckets2 = NULL; } - + static int -pefs_allocate_hash_table(struct cuckoo_hash_table *chtp, uint32_t nelements) +pefs_allocate_hash_table(struct cuckoo_hash_table *chtp, uint32_t nelements, char reallocate) { uint32_t i; - /* - * spending 15% more space for each table lowers the chance to fall into an - * infinite loop during cuckoo insertion to about 1.5%. - */ - chtp->size = pefs_next_prime(nelements + ((nelements * PEFS_EXTRA_TABLE_SIZE)/100)); - chtp->nelements = nelements; - if (chtp->size < chtp->nelements) { - pefs_warn("numeric overflow while computing hash table size"); - return (PEFS_ERR_GENERIC); + if (reallocate == 0) { + /* + * spending 15% more space for each table lowers the chance to fall into an + * infinite loop during cuckoo insertion to about 1.5%. + */ + chtp->size = pefs_next_prime(nelements + ((nelements * PEFS_EXTRA_TABLE_SIZE)/100)); + chtp->nelements = nelements; + if (chtp->size < chtp->nelements) { + pefs_warn("numeric overflow while computing hash table size"); + return (PEFS_ERR_GENERIC); + } } + else { + chtp->size = pefs_next_prime(chtp->size + 1); + free(chtp->buckets1); + free(chtp->buckets2); + } + dprintf(("hash table elem:%u\tsize: %u\n", chtp->nelements, chtp->size)); chtp->buckets1 = malloc (chtp->size * sizeof(struct bucket)); @@ -422,7 +434,7 @@ } pefs_warn("cuckoo_insert resulted in infinite loop!"); - return (PEFS_ERR_CUCKOO_LOOP); + return (PEFS_ERR_GENERIC); } static int @@ -593,19 +605,25 @@ /* * This function creates the in memory database that will be later written to * the checksum file. - * A) The total sum of entries is gathered so that a hash table is allocated. + * A) The total sum of entries is gathered so that the hash tables are allocated. * B) For each file entry: * B1) semantic checks: file should reside in pefs filesystem & * file should be regular file * B2) the file_id is retrieved. * B3) list of checksums is computed for the file's 4k blocks. - * B4) file entry is added to hash table. (cuckoo hashing is used) + * B4) file entry is added to fh_head + * C) Cuckoo insertion: + * We try to populate our hash tables using the cuckoo algorithm. Should we fall + * into an infinite loop during insertion, we re-allocate larger hash tables + * and try again until we succeed. The possibility to fail twice in a row is + * 1.5% * 1.5% = 0.0225% */ static int pefs_create_in_memory_db(FILE *fpin, const EVP_MD *md, uint8_t hash_len, struct cuckoo_hash_table *chtp, char *fsroot) { struct statfs fs; + struct file_header_head fh_head; struct file_header *fhp; int error; uint32_t nfiles; @@ -619,10 +637,11 @@ if (error != 0) return (error); - error = pefs_allocate_hash_table(chtp, nfiles); + error = pefs_allocate_hash_table(chtp, nfiles, 0); if (error != 0) return (error); + TAILQ_INIT(&fh_head); while((fhp = pefs_next_file(fpin, &error)) != NULL) { error = pefs_file_semantic_checks(fhp, &fs); if (error != 0) @@ -636,11 +655,26 @@ if (error != 0) return (error); - error = pefs_add_to_hash_table(chtp, fhp); - if (error != 0) - return (error); + TAILQ_INSERT_TAIL(&fh_head, fhp, file_header_entries); } +cuckoo_insert: + TAILQ_FOREACH(fhp, &fh_head, file_header_entries) { + error = pefs_add_to_hash_table(chtp, fhp); + /* + * cuckoo insertion algorithm fell into an infinite loop! + * Create new, larger hash tables where size = next_prime(old_size) + * and try again. + */ + if (error != 0) { + dprintf(("fell into an infinite loop!\n")); + error = pefs_allocate_hash_table(chtp, nfiles, 1); + if (error != 0) + return (error); + goto cuckoo_insert; + } + } + pefs_print_hash_table(chtp, hash_len); return (error); @@ -933,11 +967,6 @@ error = pefs_create_in_memory_db(fpin, md, hash_len, &checksum_hash_table, fsroot); - /* - * XXXgpf: [TODO] Properly handle PEFS_ERR_CUCKOO_LOOP by retrying with - * larger tables (next prime number?). We shouldn't have to reread all - * file entries btw. - */ if (error != 0) goto out; Modified: soc2012/gpf/pefs_kmod/sbin/pefs/pefs_ctl.h ============================================================================== --- soc2012/gpf/pefs_kmod/sbin/pefs/pefs_ctl.h Mon Jun 4 12:49:21 2012 (r237060) +++ soc2012/gpf/pefs_kmod/sbin/pefs/pefs_ctl.h Mon Jun 4 13:31:32 2012 (r237061) @@ -61,7 +61,6 @@ #define PEFS_ERR_NOENT 5 #define PEFS_ERR_EXIST 6 #define PEFS_ERR_INVALID 7 -#define PEFS_ERR_CUCKOO_LOOP 8 #define PEFS_FS_IGNORE_TYPE 0x0001