From owner-svn-src-all@FreeBSD.ORG Tue Dec 29 06:34:23 2009 Return-Path: Delivered-To: svn-src-all@freebsd.org Received: from mx1.freebsd.org (mx1.freebsd.org [IPv6:2001:4f8:fff6::34]) by hub.freebsd.org (Postfix) with ESMTP id 7726A106568F; Tue, 29 Dec 2009 06:34:23 +0000 (UTC) (envelope-from kientzle@FreeBSD.org) Received: from svn.freebsd.org (svn.freebsd.org [IPv6:2001:4f8:fff6::2c]) by mx1.freebsd.org (Postfix) with ESMTP id 641C38FC1C; Tue, 29 Dec 2009 06:34:23 +0000 (UTC) Received: from svn.freebsd.org (localhost [127.0.0.1]) by svn.freebsd.org (8.14.3/8.14.3) with ESMTP id nBT6YNWm013714; Tue, 29 Dec 2009 06:34:23 GMT (envelope-from kientzle@svn.freebsd.org) Received: (from kientzle@localhost) by svn.freebsd.org (8.14.3/8.14.3/Submit) id nBT6YNrV013712; Tue, 29 Dec 2009 06:34:23 GMT (envelope-from kientzle@svn.freebsd.org) Message-Id: <200912290634.nBT6YNrV013712@svn.freebsd.org> From: Tim Kientzle Date: Tue, 29 Dec 2009 06:34:23 +0000 (UTC) To: src-committers@freebsd.org, svn-src-all@freebsd.org, svn-src-head@freebsd.org X-SVN-Group: head MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Cc: Subject: svn commit: r201170 - head/lib/libarchive X-BeenThere: svn-src-all@freebsd.org X-Mailman-Version: 2.1.5 Precedence: list List-Id: "SVN commit messages for the entire src tree \(except for " user" and " projects" \)" List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , X-List-Received-Date: Tue, 29 Dec 2009 06:34:23 -0000 Author: kientzle Date: Tue Dec 29 06:34:23 2009 New Revision: 201170 URL: http://svn.freebsd.org/changeset/base/201170 Log: Essentially eliminate problems with false hardlinks in POSIX cpio archives by generating synthetic ino values and mapping values from disk into the new values. Modified: head/lib/libarchive/archive_write_set_format_cpio.c Modified: head/lib/libarchive/archive_write_set_format_cpio.c ============================================================================== --- head/lib/libarchive/archive_write_set_format_cpio.c Tue Dec 29 06:27:19 2009 (r201169) +++ head/lib/libarchive/archive_write_set_format_cpio.c Tue Dec 29 06:34:23 2009 (r201170) @@ -54,6 +54,12 @@ static int64_t format_octal_recursive(in struct cpio { uint64_t entry_bytes_remaining; + + int64_t ino_next; + + struct { int64_t old; int new;} *ino_list; + size_t ino_list_size; + size_t ino_list_next; }; struct cpio_header { @@ -103,35 +109,102 @@ archive_write_set_format_cpio(struct arc return (ARCHIVE_OK); } +/* + * Ino values are as long as 64 bits on some systems; cpio format + * only allows 18 bits and relies on the ino values to identify hardlinked + * files. So, we can't merely "hash" the ino numbers since collisions + * would corrupt the archive. Instead, we generate synthetic ino values + * to store in the archive and maintain a map of original ino values to + * synthetic ones so we can preserve hardlink information. + * + * TODO: Make this more efficient. It's not as bad as it looks (most + * files don't have any hardlinks and we don't do any work here for those), + * but it wouldn't be hard to do better. + * + * TODO: Work with dev/ino pairs here instead of just ino values. + */ +static int +synthesize_ino_value(struct cpio *cpio, struct archive_entry *entry) +{ + int64_t ino = archive_entry_ino64(entry); + int ino_new; + size_t i; + + /* + * If no index number was given, don't assign one. In + * particular, this handles the end-of-archive marker + * correctly by giving it a zero index value. (This is also + * why we start our synthetic index numbers with one below.) + */ + if (ino == 0) + return (0); + + /* Don't store a mapping if we don't need to. */ + if (archive_entry_nlink(entry) < 2) { + return ++cpio->ino_next; + } + + /* Look up old ino; if we have it, this is a hardlink + * and we reuse the same value. */ + for (i = 0; i < cpio->ino_list_next; ++i) { + if (cpio->ino_list[i].old == ino) + return (cpio->ino_list[i].new); + } + + /* Assign a new index number. */ + ino_new = ++cpio->ino_next; + + /* Ensure space for the new mapping. */ + if (cpio->ino_list_size <= cpio->ino_list_next) { + size_t newsize = cpio->ino_list_size < 512 + ? 512 : cpio->ino_list_size * 2; + void *newlist = realloc(cpio->ino_list, + sizeof(cpio->ino_list[0]) * newsize); + if (newlist == NULL) + return (-1); + + cpio->ino_list_size = newsize; + cpio->ino_list = newlist; + } + + /* Record and return the new value. */ + cpio->ino_list[cpio->ino_list_next].old = ino; + cpio->ino_list[cpio->ino_list_next].new = ino_new; + ++cpio->ino_list_next; + return (ino_new); +} + static int archive_write_cpio_header(struct archive_write *a, struct archive_entry *entry) { struct cpio *cpio; const char *p, *path; - int pathlength, ret; + int pathlength, ret, ret2; + int64_t ino; struct cpio_header h; cpio = (struct cpio *)a->format_data; - ret = 0; + ret2 = ARCHIVE_OK; path = archive_entry_pathname(entry); - pathlength = strlen(path) + 1; /* Include trailing null. */ + pathlength = (int)strlen(path) + 1; /* Include trailing null. */ memset(&h, 0, sizeof(h)); format_octal(070707, &h.c_magic, sizeof(h.c_magic)); format_octal(archive_entry_dev(entry), &h.c_dev, sizeof(h.c_dev)); - /* - * TODO: Generate artificial inode numbers rather than just - * re-using the ones off the disk. That way, the 18-bit c_ino - * field only limits the number of files in the archive. - */ - if ((int)archive_entry_ino(entry) > 0777777) { + + ino = synthesize_ino_value(cpio, entry); + if (ino < 0) { + archive_set_error(&a->archive, ENOMEM, + "No memory for ino translation table"); + return (ARCHIVE_FATAL); + } else if (ino > 0777777) { archive_set_error(&a->archive, ERANGE, - "large inode number truncated"); - ret = ARCHIVE_WARN; + "Too many files for this cpio format"); + return (ARCHIVE_FATAL); } + format_octal(ino & 0777777, &h.c_ino, sizeof(h.c_ino)); - format_octal(archive_entry_ino(entry) & 0777777, &h.c_ino, sizeof(h.c_ino)); format_octal(archive_entry_mode(entry), &h.c_mode, sizeof(h.c_mode)); format_octal(archive_entry_uid(entry), &h.c_uid, sizeof(h.c_uid)); format_octal(archive_entry_gid(entry), &h.c_gid, sizeof(h.c_gid)); @@ -170,6 +243,8 @@ archive_write_cpio_header(struct archive if (p != NULL && *p != '\0') ret = (a->compressor.write)(a, p, strlen(p)); + if (ret == ARCHIVE_OK) + ret = ret2; return (ret); } @@ -218,17 +293,15 @@ format_octal_recursive(int64_t v, char * return (v); v = format_octal_recursive(v, p+1, s-1); *p = '0' + (v & 7); - return (v >>= 3); + return (v >> 3); } static int archive_write_cpio_finish(struct archive_write *a) { - struct cpio *cpio; int er; struct archive_entry *trailer; - cpio = (struct cpio *)a->format_data; trailer = archive_entry_new(); /* nlink = 1 here for GNU cpio compat. */ archive_entry_set_nlink(trailer, 1); @@ -244,6 +317,7 @@ archive_write_cpio_destroy(struct archiv struct cpio *cpio; cpio = (struct cpio *)a->format_data; + free(cpio->ino_list); free(cpio); a->format_data = NULL; return (ARCHIVE_OK); @@ -253,7 +327,8 @@ static int archive_write_cpio_finish_entry(struct archive_write *a) { struct cpio *cpio; - int to_write, ret; + size_t to_write; + int ret; cpio = (struct cpio *)a->format_data; ret = ARCHIVE_OK;