From owner-p4-projects@FreeBSD.ORG Mon Apr 12 12:30:55 2010 Return-Path: Delivered-To: p4-projects@freebsd.org Received: by hub.freebsd.org (Postfix, from userid 32767) id 7D5551065674; Mon, 12 Apr 2010 12:30:55 +0000 (UTC) Delivered-To: perforce@FreeBSD.org Received: from mx1.freebsd.org (mx1.freebsd.org [IPv6:2001:4f8:fff6::34]) by hub.freebsd.org (Postfix) with ESMTP id 291261065670 for ; Mon, 12 Apr 2010 12:30:55 +0000 (UTC) (envelope-from gcooper@FreeBSD.org) Received: from repoman.freebsd.org (repoman.freebsd.org [IPv6:2001:4f8:fff6::29]) by mx1.freebsd.org (Postfix) with ESMTP id 155FA8FC13 for ; Mon, 12 Apr 2010 12:30:55 +0000 (UTC) Received: from repoman.freebsd.org (localhost [127.0.0.1]) by repoman.freebsd.org (8.14.3/8.14.3) with ESMTP id o3CCUsru029148 for ; Mon, 12 Apr 2010 12:30:54 GMT (envelope-from gcooper@FreeBSD.org) Received: (from perforce@localhost) by repoman.freebsd.org (8.14.3/8.14.3/Submit) id o3CCUsIX029146 for perforce@freebsd.org; Mon, 12 Apr 2010 12:30:54 GMT (envelope-from gcooper@FreeBSD.org) Date: Mon, 12 Apr 2010 12:30:54 GMT Message-Id: <201004121230.o3CCUsIX029146@repoman.freebsd.org> X-Authentication-Warning: repoman.freebsd.org: perforce set sender to gcooper@FreeBSD.org using -f From: Garrett Cooper To: Perforce Change Reviews Precedence: bulk Cc: Subject: PERFORCE change 176831 for review X-BeenThere: p4-projects@freebsd.org X-Mailman-Version: 2.1.5 List-Id: p4 projects tree changes List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , X-List-Received-Date: Mon, 12 Apr 2010 12:30:55 -0000 http://p4web.freebsd.org/@@176831?ac=10 Change 176831 by gcooper@gcooper-bayonetta on 2010/04/12 12:30:36 Introducing ... unpack_file_to_fd! A shortcut from the atypical unpack operation as extracting a single file from the beginning of a large tarfile is really braindead, so let's extract the file to disk and return a file descriptor instead :) (can't seem to find a file descriptor returning analog with libarchive... hrmmm...). Overall it could potentially stand to be aligned with the exit code convention from unpack, but for now it works just fine. bde@-ify headers and fields while I'm at it so things are properly ordered and aligned in memory a tad bit better. Sidenote: we'll need to implement similar logic with info/perform.c with a small user-defined loop of all potential metadata files so we don't get killed off performance-wise when reading large tarballs like we are today with unpack. Suggested-by: kientzle Affected files ... .. //depot/projects/soc2007/gcooper-pkg_install-enhancements-simplified/usr.sbin/pkg_install/add/perform.c#5 edit .. //depot/projects/soc2007/gcooper-pkg_install-enhancements-simplified/usr.sbin/pkg_install/lib/file.c#11 edit .. //depot/projects/soc2007/gcooper-pkg_install-enhancements-simplified/usr.sbin/pkg_install/lib/lib.h#5 edit Differences ... ==== //depot/projects/soc2007/gcooper-pkg_install-enhancements-simplified/usr.sbin/pkg_install/add/perform.c#5 (text+ko) ==== @@ -60,23 +60,23 @@ static int pkg_do(char *pkg) { + struct stat sb; Package Plist; + PackingList p; char pkg_fullname[FILENAME_MAX]; char playpen[FILENAME_MAX]; + char pre_script[FILENAME_MAX] = INSTALL_FNAME; + char post_script[FILENAME_MAX]; + char pre_arg[FILENAME_MAX], post_arg[FILENAME_MAX]; + char *conflict[2]; + char **matched; char *extract; const char *where_to; FILE *cfile; int code; - PackingList p; - struct stat sb; int inPlace, conflictsfound, errcode; /* support for separate pre/post install scripts */ int new_m = 0; - char pre_script[FILENAME_MAX] = INSTALL_FNAME; - char post_script[FILENAME_MAX]; - char pre_arg[FILENAME_MAX], post_arg[FILENAME_MAX]; - char *conflict[2]; - char **matched; int fd; conflictsfound = 0; @@ -107,7 +107,6 @@ warnx("unable to fetch '%s' by URL", pkg); return 1; } - strcpy(pkg_fullname, pkg); cfile = fopen(CONTENTS_FNAME, "r"); if (!cfile) { warnx( @@ -119,33 +118,38 @@ fclose(cfile); } else { - strcpy(pkg_fullname, pkg); /* - * Copy for sanity's sake, - * could remove pkg_fullname - */ - if (strcmp(pkg, "-")) { - if (stat(pkg_fullname, &sb) == FAIL) { - warnx("can't stat package file '%s'", pkg_fullname); + + /* + * If TRUE: We have to extract the whole thing to disk because + * this could be our one and only shot to do so... + */ + Boolean extract_whole_archive_from_stdin = FALSE; + + if (strcmp(pkg, "-") == 0) { + extract_whole_archive_from_stdin = TRUE; + sb.st_size = 100000; /* Make up a plausible average size */ + } else { + if (stat(pkg, &sb) == FAIL) { + warnx("can't stat package file '%s'", pkg); goto bomb; } - extract = CONTENTS_FNAME; - } - else { - extract = NULL; - sb.st_size = 100000; /* Make up a plausible average size */ } if (!(where_to = make_playpen(playpen, sb.st_size * 4))) errx(1, "unable to make playpen for %lld bytes", (long long)sb.st_size * 4); /* Since we can call ourselves recursively, keep notes on where we came from */ if (!getenv("_TOP")) setenv("_TOP", where_to, 1); - if (unpack(pkg_fullname, extract)) { - warnx( - "unable to extract table of contents file from '%s' - not a package?", - pkg_fullname); - goto bomb; - } - cfile = fopen(CONTENTS_FNAME, "r"); + if (extract_whole_archive_from_stdin == TRUE) { + if (unpack(NULL, NULL) == 0) + cfile = fopen(CONTENTS_FNAME, "r"); + else { + warnx("unable to extract table of contents file from '%s' " + "- not a package?", pkg); + goto bomb; + } + } else + cfile = unpack_file_to_fd(pkg, CONTENTS_FNAME); + if (!cfile) { warnx( "unable to open table of contents file '%s' - not a package?", @@ -158,7 +162,7 @@ /* Extract directly rather than moving? Oh goodie! */ if (find_plist_option(&Plist, "extract-in-place")) { if (Verbose) - printf("Doing in-place extraction for %s\n", pkg_fullname); + printf("Doing in-place extraction for %s\n", pkg); p = find_plist(&Plist, PLIST_CWD); if (p) { if (!isdir(p->name) && !Fake) { @@ -174,9 +178,8 @@ inPlace = 1; } else { - warnx( - "no prefix specified in '%s' - this is a bad package!", - pkg_fullname); + warnx("no prefix specified in '%s' - this is a bad " + "package!", pkg); goto bomb; } } @@ -192,7 +195,7 @@ "Please set your PKG_TMPDIR variable to point to a location with more\n" "free space and try again", (long long)sb.st_size * 4); warnx("not extracting %s\ninto %s, sorry!", - pkg_fullname, where_to); + pkg, where_to); goto bomb; } @@ -202,8 +205,8 @@ /* Finally unpack the whole mess. If extract is null we already + did so so don't bother doing it again. */ - if (extract && unpack(pkg_fullname, NULL)) { - warnx("unable to extract '%s'!", pkg_fullname); + if (extract && unpack(pkg, NULL)) { + warnx("unable to extract '%s'!", pkg); goto bomb; } } @@ -370,13 +373,13 @@ else if ((cp = fileGetURL(pkg, p->name, KeepPackage)) != NULL) { if (Verbose) printf("Finished loading %s via a URL\n", p->name); - if (!fexists("+CONTENTS")) { - warnx("autoloaded package %s has no +CONTENTS file?", - p->name); + if (!fexists(CONTENTS_FNAME)) { + warnx("autoloaded package %s has no %s file?", + p->name, CONTENTS_FNAME); if (!Force) ++code; } - else if (vsystem("(pwd; /bin/cat +CONTENTS) | %s %s %s %s -S", PkgAddCmd, Verbose ? "-v" : "", PrefixRecursive ? prefixArg : "", KeepPackage ? "-K" : "")) { + else if (vsystem("(pwd; /bin/cat " CONTENTS_FNAME ") | %s %s %s %s -S", PkgAddCmd, Verbose ? "-v" : "", PrefixRecursive ? prefixArg : "", KeepPackage ? "-K" : "")) { warnx("pkg_add of dependency '%s' failed%s", p->name, Force ? " (proceeding anyway)" : "!"); if (!Force) ==== //depot/projects/soc2007/gcooper-pkg_install-enhancements-simplified/usr.sbin/pkg_install/lib/file.c#11 (text+ko) ==== @@ -22,13 +22,13 @@ __FBSDID("$FreeBSD: src/usr.sbin/pkg_install/lib/file.c,v 1.70 2010/04/01 14:27:29 flz Exp $"); #include "lib.h" +#include #include #include #include #include #include #include -#include /* Quick check to see if a file exists */ Boolean @@ -341,13 +341,116 @@ ARCHIVE_EXTRACT_TIME |ARCHIVE_EXTRACT_ACL | \ ARCHIVE_EXTRACT_FFLAGS|ARCHIVE_EXTRACT_XATTR) -/* Unpack a tar file */ +/* + * Unpack a single file from a tar-file to a file descriptor; this is written + * like so as an optimization to abbreviate the extract to *open step, as well + * as to reduce the number of required steps needed when unpacking a tarball on + * disk, as the previous method employed with tar(1) used -q // --fast-read . + * + * Return NULL on failure, and non-NULL on success + * + * XXX (gcooper): this is currently implemented with FILE* / fopen(3) due to + * legacy issues that need to be sorted out over the next couple of weeks for + * 1) locking to function properly, and to gain the potential performance boost + * by using mmap(2), and read(2) (ugh). + * + * But first things first, we need a working solution with minimal changes; + * then we move on from there. + * + * [The return code info will eventually be...] + * + * Return -1 on failure, a value greater than 0 on success [in accordance with + * open(2)]. + */ +FILE* +unpack_file_to_fd(const char *pkg, const char *file) +{ + struct archive *archive; + struct archive_entry *archive_entry; + Boolean found_match = FALSE; + + const char *entry_pathname = NULL; + const char *error = NULL; + const char *pkg_name_humanized; + + FILE *fd = NULL; + /* int fd = -1; */ + int r; + + if (Verbose) + printf("%s: will extract %s from %s\n", __func__, file, pkg); + + archive = archive_read_new(); + archive_read_support_compression_all(archive); + archive_read_support_format_tar(archive); + + /* The initial open failed */ + if (archive_read_open_filename(archive, pkg, + ARCHIVE_DEFAULT_BYTES_PER_BLOCK) != ARCHIVE_OK) { + + error = archive_error_string(archive); + warnx("%s: unable to open the package from %s: %s", + __func__, pkg_name_humanized, error); + + } + else + while (error == NULL && found_match == FALSE && + (r = archive_read_next_header(archive, &archive_entry)) == + ARCHIVE_OK) { + + entry_pathname = archive_entry_pathname(archive_entry); + + if (strncmp(file, entry_pathname, PATH_MAX) == 0) { + + /* + * Regardless of whether or not extract passes, + * we found our target file so let's exit + * quickly because the underlying issue is most + * likely unrecoverable. + */ + found_match = TRUE; + + r = archive_read_extract(archive, archive_entry, + EXTRACT_ARCHIVE_FLAGS); + if (r == ARCHIVE_OK) { + if (Verbose) + printf("X - %s\n", + entry_pathname); + fd = fopen(entry_pathname, "r"); + } else { + error = archive_error_string(archive); + warnx("%s: extraction for %s failed: " + "%s", __func__, pkg_name_humanized, + error); + } + + } else + if (Verbose) + printf("S - %s\n", entry_pathname); + + } + + archive_read_finish(archive); + + return fd; + +} + +/* + * Unpack a tar file, or a subset of the contents. + * + * Return 0 on success, 1 on failure + * + * NOTE: the exit code is 0 / 1 so that this can be fed directly into exit + * when doing piped tar commands for copying hierarchies *hint*, *hint*. + */ int unpack(const char *pkg, const char *file_expr) { struct archive *archive; struct archive_entry *archive_entry; Boolean extract_whole_archive = FALSE; + const char *entry_pathname = NULL; const char *error = NULL; const char *pkg_name_humanized; int r; @@ -355,19 +458,21 @@ if (file_expr == NULL || strcmp("*", file_expr) == 0) extract_whole_archive = TRUE; + if (pkg == NULL || strcmp(pkg, "-") == 0) + pkg_name_humanized = "(stdin)"; + else + pkg_name_humanized = pkg; + if (Verbose) { if (extract_whole_archive) - printf("%s: will extract whole archive\n", __func__); + printf("%s: %s - will extract whole archive\n", + pkg_name_humanized, __func__); else - printf("%s: will extract files that match: %s\n", - __func__, file_expr); + printf("%s: %s - will extract files that match " + "expression: %s\n", + pkg_name_humanized, __func__, file_expr); } - if (pkg == NULL || strcmp(pkg, "-") == 0) - pkg_name_humanized = "(stdin)"; - else - pkg_name_humanized = pkg; - archive = archive_read_new(); archive_read_support_compression_all(archive); archive_read_support_format_tar(archive); @@ -386,29 +491,29 @@ (r = archive_read_next_header(archive, &archive_entry)) == ARCHIVE_OK) { + entry_pathname = archive_entry_pathname(archive_entry); + /* Let's extract the whole archive, or just a file. */ if (extract_whole_archive == TRUE || - (fnmatch(file_expr, - archive_entry_pathname(archive_entry), - FNM_PATHNAME) == 0)) { + (fnmatch(file_expr, entry_pathname, + FNM_PATHNAME)) == 0) { r = archive_read_extract(archive, archive_entry, EXTRACT_ARCHIVE_FLAGS); - if (r != ARCHIVE_OK) { + if (r == ARCHIVE_OK) { + if (Verbose) + printf("X - %s\n", + entry_pathname); + } else { error = archive_error_string(archive); warnx("%s: extraction for %s failed: " "%s", __func__, pkg_name_humanized, error); } - if (Verbose) { - printf("X - %s\n", - archive_entry_pathname(archive_entry)); - } - } else if (Verbose) { - printf("S - %s\n", - archive_entry_pathname(archive_entry)); - } + } else + if (Verbose) + printf("S - %s\n", entry_pathname); } ==== //depot/projects/soc2007/gcooper-pkg_install-enhancements-simplified/usr.sbin/pkg_install/lib/lib.h#5 (text+ko) ==== @@ -188,6 +188,7 @@ void copy_hierarchy(const char *, const char *, Boolean); int delete_hierarchy(const char *, Boolean, Boolean); int unpack(const char *, const char *); +FILE* unpack_file_to_fd(const char*, const char *); void format_cmd(char *, int, const char *, const char *, const char *); /* Msg */