Date: Wed, 06 Jun 2012 11:03:05 +0000 From: gpf@FreeBSD.org To: svn-soc-all@FreeBSD.org Subject: socsvn commit: r237202 - soc2012/gpf/pefs_kmod/sbin/pefs Message-ID: <20120606110305.B98AB106564A@hub.freebsd.org>
next in thread | raw e-mail | index | archive | help
Author: gpf Date: Wed Jun 6 11:03:05 2012 New Revision: 237202 URL: http://svnweb.FreeBSD.org/socsvn/?view=rev&rev=237202 Log: -hardlinks: A rb tree of 'hardlink_counter' structs is used with inodes as keys. This tree is used to print warnings to the user when the number of links found for a specific inode is less than the total number of links. Modified: soc2012/gpf/pefs_kmod/sbin/pefs/pefs_checksum.c soc2012/gpf/pefs_kmod/sbin/pefs/pefs_ctl.c Modified: soc2012/gpf/pefs_kmod/sbin/pefs/pefs_checksum.c ============================================================================== --- soc2012/gpf/pefs_kmod/sbin/pefs/pefs_checksum.c Wed Jun 6 10:56:59 2012 (r237201) +++ soc2012/gpf/pefs_kmod/sbin/pefs/pefs_checksum.c Wed Jun 6 11:03:05 2012 (r237202) @@ -32,6 +32,7 @@ #include <sys/ioctl.h> #include <sys/mount.h> #include <sys/queue.h> +#include <sys/tree.h> #include <sys/types.h> #include <sys/stat.h> #include <sys/fnv_hash.h> @@ -67,20 +68,32 @@ TAILQ_HEAD(checksum_head, checksum); TAILQ_HEAD(file_header_head, file_header); +TAILQ_HEAD(hardlink_fh_head, file_header); + +RB_HEAD(hardlink_head, hardlink_counter); +RB_PROTOTYPE(hardlink_head, hardlink_counter, hardlink_entries, pefs_rb_cmp); #define PEFS_CFH_SIZE 16 #define PEFS_FH_SIZE 16 +struct hardlink_counter { + ino_t inode; + uint32_t total_links; + uint32_t links_found; + struct hardlink_fh_head file_headers; + RB_ENTRY(hardlink_counter) hardlink_entries; +}; + /* XXXgpf: [TODO] check pathname string lengths. Some are MAXPATHLEN + 1, some MAXPATHLEN */ /* XXXgpf: unions for on disk structs and move to a different header? */ struct checksum_file_header { - uint8_t version; - uint8_t reserved; - uint8_t hash_len; - uint8_t hash_algo[8]; - uint8_t offset_to_hash_table; - uint32_t hash_table_size; + uint8_t version; + uint8_t reserved; + uint8_t hash_len; + uint8_t hash_algo[8]; + uint8_t offset_to_hash_table; + uint32_t hash_table_size; }; struct checksum { @@ -97,6 +110,7 @@ uint32_t offset_to_checksums; struct checksum_head checksums; TAILQ_ENTRY(file_header) file_header_entries; + TAILQ_ENTRY(file_header) fh_hardlink_entries; }; struct bucket { @@ -471,6 +485,7 @@ if (fhp != NULL) { //dprintf(("\tpath=%s\tid = %llu\tnhashes = %d\n", fhp->path, fhp->file_id, fhp->nhashes)); dprintf(("\tid = %llu\tnhashes = %d\n", fhp->file_id, fhp->nhashes)); + dprintf(("\tpath = %s\n", fhp->path)); TAILQ_FOREACH(csp, &(fhp->checksums), checksum_entries) { dprintf(("\t\tdigest=")); for (j = 0; j < hash_len; j++) @@ -548,8 +563,87 @@ return (error); } +/* XXXgpf: for debugging purposes */ +static void +pefs_rb_print(struct hardlink_head *hlc_headp) +{ + struct hardlink_counter *hlcp; + struct file_header *fhp; + + dprintf(("\n+++Printing RB tree+++\n\n")); + RB_FOREACH(hlcp, hardlink_head, hlc_headp) { + dprintf(("inode %d\ttotal links %d\tlinks found %d\n", + hlcp->inode, hlcp->total_links, hlcp->links_found)); + TAILQ_FOREACH(fhp, &(hlcp->file_headers), fh_hardlink_entries) { + dprintf(("\tpath: %s\n", fhp->path)); + } + } +} + +static void +pefs_rb_warn(struct hardlink_head *hlc_headp) +{ + struct hardlink_counter *hlcp; + struct file_header *fhp; + int i; + + RB_FOREACH(hlcp, hardlink_head, hlc_headp) { + if (hlcp->total_links > hlcp->links_found) { + pefs_warn("%d hard links of total %d were found in input list for file with inode: %d", + hlcp->links_found, hlcp->total_links, hlcp->inode); + i = 1; + TAILQ_FOREACH(fhp, &(hlcp->file_headers), fh_hardlink_entries) { + pefs_warn("link %d: %s", i++, fhp->path); + } + } + } +} + +/* XXXgpf: [TODO] comments */ +static int +pefs_rb_insert(struct hardlink_head *hlc_headp, struct file_header *fhp, struct stat *sbp) +{ + struct hardlink_counter find, *res, *new_hlcp; + + find.inode = sbp->st_ino; + res = RB_FIND(hardlink_head, hlc_headp, &find); + + if (res != NULL) { + res->links_found++; + TAILQ_INSERT_TAIL(&(res->file_headers), fhp, fh_hardlink_entries); + } + else { + new_hlcp = malloc(sizeof(struct hardlink_counter)); + if (new_hlcp == NULL) { + warn("memory allocation error"); + return (PEFS_ERR_SYS); + } + + new_hlcp->inode = sbp->st_ino; + new_hlcp->total_links = sbp->st_nlink; + new_hlcp->links_found = 1; + TAILQ_INIT(&(new_hlcp->file_headers)); + TAILQ_INSERT_TAIL(&(new_hlcp->file_headers), fhp, fh_hardlink_entries); + + RB_INSERT(hardlink_head, hlc_headp, new_hlcp); + } + + return (0); +} + static int -pefs_file_semantic_checks(struct file_header *fhp, struct statfs *fsp) +pefs_rb_cmp(struct hardlink_counter *hlcp1, struct hardlink_counter *hlcp2) +{ + if (hlcp1->inode < hlcp2->inode) + return -1; + else if (hlcp1->inode > hlcp2->inode) + return 1; + else + return 0; +} + +static int +pefs_file_semantic_checks(struct file_header *fhp, struct statfs *fsp, struct hardlink_head *hlc_headp) { char parent_dir[MAXPATHLEN]; char sbuf[MAXPATHLEN]; @@ -647,6 +741,11 @@ fhp->path, fsp->f_mntonname); return (PEFS_ERR_INVALID); } + + /* Keep all hardlink file headers in a rb tree */ + if (sb.st_nlink > 1) + return (pefs_rb_insert(hlc_headp, fhp, &sb)); + return (0); } @@ -687,16 +786,24 @@ * the checksum file. * A) The total sum of entries is gathered so that the hash tables are allocated. * B) For each file entry: - * B1) semantic checks: file should reside in pefs filesystem & - * file should be regular file + * B1) semantic checks: + * B1a) file should reside in pefs filesystem & file should be regular file. + * B1b) if symlink, acquire and save the absolute path of the symlink's + * target. Try to stat() the target but don't do anything else. + * B1c) If hardlink, save a reference to this file entry in our rb tree. + * rb-tree uses inodes as keys and is used in part C to print warnings. * B2) the file_id is retrieved. * B3) list of checksums is computed for the file's 4k blocks. - * B4) file entry is added to fh_head - * C) Cuckoo insertion: + * B4) file entry is added to universal fh_head. + * C) Print warnings for hardlinks if the number of links found in inputlist isn't + * equal to the number of total inode links. + * D) Cuckoo insertion: * We try to populate our hash tables using the cuckoo algorithm. Should we fall * into an infinite loop during insertion, we re-allocate larger hash tables * and try again until we succeed. The possibility to fail twice in a row is * 1.5% * 1.5% = 0.0225% + * + * XXXgpf: [TODO] more comments */ static int pefs_create_in_memory_db(FILE *fpin, const EVP_MD *md, uint8_t hash_len, @@ -704,6 +811,7 @@ { struct statfs fs; struct file_header_head fh_head; + struct hardlink_head hlc_head; struct file_header *fhp; int error; uint32_t nfiles; @@ -722,8 +830,9 @@ return (error); TAILQ_INIT(&fh_head); + RB_INIT(&hlc_head); while((fhp = pefs_next_file(fpin, &error)) != NULL) { - error = pefs_file_semantic_checks(fhp, &fs); + error = pefs_file_semantic_checks(fhp, &fs, &hlc_head); if (error != 0) return (error); @@ -742,6 +851,12 @@ if (error != 0) return (error); + pefs_rb_print(&hlc_head); + pefs_rb_warn(&hlc_head); + /* + * XXXgpf: [TODO] print warnings for dem hardlinks + */ + cuckoo_insert: TAILQ_FOREACH(fhp, &fh_head, file_header_entries) { error = pefs_add_to_hash_table(chtp, fhp); @@ -1067,3 +1182,5 @@ return (error); } + +RB_GENERATE(hardlink_head, hardlink_counter, hardlink_entries, pefs_rb_cmp); Modified: soc2012/gpf/pefs_kmod/sbin/pefs/pefs_ctl.c ============================================================================== --- soc2012/gpf/pefs_kmod/sbin/pefs/pefs_ctl.c Wed Jun 6 10:56:59 2012 (r237201) +++ soc2012/gpf/pefs_kmod/sbin/pefs/pefs_ctl.c Wed Jun 6 11:03:05 2012 (r237202) @@ -1032,6 +1032,12 @@ int error, i, j; const char *algo; + /* + * XXXgpf: [TODO] Now, all input file entries are kept in a 'global' tail structure + * and insertion into hash table occurs after all of them are read/parsed. Therefore, + * it is possible to have fpin = stdin by default and not require an input file, since + * we will not have to go through the input list twice, thus requiring a rewind(). + */ fpin = NULL; /* by default use sha256 */ algo = supported_digests[0];
Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?20120606110305.B98AB106564A>