Skip site navigation (1)Skip section navigation (2)
Date:      Mon, 12 Apr 2010 12:30:54 GMT
From:      Garrett Cooper <gcooper@FreeBSD.org>
To:        Perforce Change Reviews <perforce@FreeBSD.org>
Subject:   PERFORCE change 176831 for review
Message-ID:  <201004121230.o3CCUsIX029146@repoman.freebsd.org>

next in thread | raw e-mail | index | archive | help
http://p4web.freebsd.org/@@176831?ac=10

Change 176831 by gcooper@gcooper-bayonetta on 2010/04/12 12:30:36

	
	Introducing ... unpack_file_to_fd! 
	
	       A shortcut from the atypical unpack operation as extracting a single file from the beginning of a large tarfile is really braindead, so let's extract the file to disk and return a file descriptor instead :) (can't seem to find a file descriptor returning analog with libarchive... hrmmm...).
	
	Overall it could potentially stand to be aligned with the exit code convention from unpack, but for now it works just fine.
	
	bde@-ify headers and fields while I'm at it so things are properly ordered and aligned in memory a tad bit better.
	
	Sidenote: we'll need to implement similar logic with info/perform.c with a small user-defined loop of all potential metadata files so we don't get killed off performance-wise when reading large tarballs like we are today with unpack.
	
	Suggested-by: kientzle

Affected files ...

.. //depot/projects/soc2007/gcooper-pkg_install-enhancements-simplified/usr.sbin/pkg_install/add/perform.c#5 edit
.. //depot/projects/soc2007/gcooper-pkg_install-enhancements-simplified/usr.sbin/pkg_install/lib/file.c#11 edit
.. //depot/projects/soc2007/gcooper-pkg_install-enhancements-simplified/usr.sbin/pkg_install/lib/lib.h#5 edit

Differences ...

==== //depot/projects/soc2007/gcooper-pkg_install-enhancements-simplified/usr.sbin/pkg_install/add/perform.c#5 (text+ko) ====

@@ -60,23 +60,23 @@
 static int
 pkg_do(char *pkg)
 {
+    struct stat sb;
     Package Plist;
+    PackingList p;
     char pkg_fullname[FILENAME_MAX];
     char playpen[FILENAME_MAX];
+    char pre_script[FILENAME_MAX] = INSTALL_FNAME;
+    char post_script[FILENAME_MAX];
+    char pre_arg[FILENAME_MAX], post_arg[FILENAME_MAX];
+    char *conflict[2];
+    char **matched;
     char *extract;
     const char *where_to;
     FILE *cfile;
     int code;
-    PackingList p;
-    struct stat sb;
     int inPlace, conflictsfound, errcode;
     /* support for separate pre/post install scripts */
     int new_m = 0;
-    char pre_script[FILENAME_MAX] = INSTALL_FNAME;
-    char post_script[FILENAME_MAX];
-    char pre_arg[FILENAME_MAX], post_arg[FILENAME_MAX];
-    char *conflict[2];
-    char **matched;
     int fd;
 
     conflictsfound = 0;
@@ -107,7 +107,6 @@
 		warnx("unable to fetch '%s' by URL", pkg);
 		return 1;
 	    }
-	    strcpy(pkg_fullname, pkg);
 	    cfile = fopen(CONTENTS_FNAME, "r");
 	    if (!cfile) {
 		warnx(
@@ -119,33 +118,38 @@
 	    fclose(cfile);
 	}
 	else {
-	    strcpy(pkg_fullname, pkg);		/*
-						 * Copy for sanity's sake,
-						 * could remove pkg_fullname
-						 */
-	    if (strcmp(pkg, "-")) {
-		if (stat(pkg_fullname, &sb) == FAIL) {
-		    warnx("can't stat package file '%s'", pkg_fullname);
+
+	    /* 
+	     * If TRUE: We have to extract the whole thing to disk because
+	     * this could be our one and only shot to do so...
+	     */
+	    Boolean extract_whole_archive_from_stdin = FALSE;
+
+	    if (strcmp(pkg, "-") == 0) {
+		extract_whole_archive_from_stdin = TRUE;
+		sb.st_size = 100000;	/* Make up a plausible average size */
+	    } else {
+		if (stat(pkg, &sb) == FAIL) {
+		    warnx("can't stat package file '%s'", pkg);
 		    goto bomb;
 		}
-		extract = CONTENTS_FNAME;
-	    }
-	    else {
-		extract = NULL;
-		sb.st_size = 100000;	/* Make up a plausible average size */
 	    }
 	    if (!(where_to = make_playpen(playpen, sb.st_size * 4)))
 		errx(1, "unable to make playpen for %lld bytes", (long long)sb.st_size * 4);
 	    /* Since we can call ourselves recursively, keep notes on where we came from */
 	    if (!getenv("_TOP"))
 		setenv("_TOP", where_to, 1);
-	    if (unpack(pkg_fullname, extract)) {
-		warnx(
-	"unable to extract table of contents file from '%s' - not a package?",
-		pkg_fullname);
-		goto bomb;
-	    }
-	    cfile = fopen(CONTENTS_FNAME, "r");
+	    if (extract_whole_archive_from_stdin == TRUE) {
+		if (unpack(NULL, NULL) == 0)
+		    cfile = fopen(CONTENTS_FNAME, "r");
+		else {
+		    warnx("unable to extract table of contents file from '%s' "
+			"- not a package?", pkg);
+		    goto bomb;
+		}
+	    } else
+		cfile = unpack_file_to_fd(pkg, CONTENTS_FNAME);
+
 	    if (!cfile) {
 		warnx(
 	"unable to open table of contents file '%s' - not a package?",
@@ -158,7 +162,7 @@
 	    /* Extract directly rather than moving?  Oh goodie! */
 	    if (find_plist_option(&Plist, "extract-in-place")) {
 		if (Verbose)
-		    printf("Doing in-place extraction for %s\n", pkg_fullname);
+		    printf("Doing in-place extraction for %s\n", pkg);
 		p = find_plist(&Plist, PLIST_CWD);
 		if (p) {
 		    if (!isdir(p->name) && !Fake) {
@@ -174,9 +178,8 @@
 		    inPlace = 1;
 		}
 		else {
-		    warnx(
-		"no prefix specified in '%s' - this is a bad package!",
-			pkg_fullname);
+		    warnx("no prefix specified in '%s' - this is a bad "
+			  "package!", pkg);
 		    goto bomb;
 		}
 	    }
@@ -192,7 +195,7 @@
 "Please set your PKG_TMPDIR variable to point to a location with more\n"
 		       "free space and try again", (long long)sb.st_size * 4);
 		warnx("not extracting %s\ninto %s, sorry!",
-			pkg_fullname, where_to);
+			pkg, where_to);
 		goto bomb;
 	    }
 
@@ -202,8 +205,8 @@
 
 	    /* Finally unpack the whole mess.  If extract is null we
 	       already + did so so don't bother doing it again. */
-	    if (extract && unpack(pkg_fullname, NULL)) {
-		warnx("unable to extract '%s'!", pkg_fullname);
+	    if (extract && unpack(pkg, NULL)) {
+		warnx("unable to extract '%s'!", pkg);
 		goto bomb;
 	    }
 	}
@@ -370,13 +373,13 @@
 		else if ((cp = fileGetURL(pkg, p->name, KeepPackage)) != NULL) {
 		    if (Verbose)
 			printf("Finished loading %s via a URL\n", p->name);
-		    if (!fexists("+CONTENTS")) {
-			warnx("autoloaded package %s has no +CONTENTS file?",
-				p->name);
+		    if (!fexists(CONTENTS_FNAME)) {
+			warnx("autoloaded package %s has no %s file?",
+				p->name, CONTENTS_FNAME);
 			if (!Force)
 			    ++code;
 		    }
-		    else if (vsystem("(pwd; /bin/cat +CONTENTS) | %s %s %s %s -S", PkgAddCmd, Verbose ? "-v" : "", PrefixRecursive ? prefixArg : "", KeepPackage ? "-K" : "")) {
+		    else if (vsystem("(pwd; /bin/cat " CONTENTS_FNAME ") | %s %s %s %s -S", PkgAddCmd, Verbose ? "-v" : "", PrefixRecursive ? prefixArg : "", KeepPackage ? "-K" : "")) {
 			warnx("pkg_add of dependency '%s' failed%s",
 				p->name, Force ? " (proceeding anyway)" : "!");
 			if (!Force)

==== //depot/projects/soc2007/gcooper-pkg_install-enhancements-simplified/usr.sbin/pkg_install/lib/file.c#11 (text+ko) ====

@@ -22,13 +22,13 @@
 __FBSDID("$FreeBSD: src/usr.sbin/pkg_install/lib/file.c,v 1.70 2010/04/01 14:27:29 flz Exp $");
 
 #include "lib.h"
+#include <sys/wait.h>
 #include <archive.h>
 #include <archive_entry.h>
 #include <err.h>
 #include <fnmatch.h>
 #include <pwd.h>
 #include <time.h>
-#include <sys/wait.h>
 
 /* Quick check to see if a file exists */
 Boolean
@@ -341,13 +341,116 @@
 				 ARCHIVE_EXTRACT_TIME  |ARCHIVE_EXTRACT_ACL | \
 				 ARCHIVE_EXTRACT_FFLAGS|ARCHIVE_EXTRACT_XATTR)
 
-/* Unpack a tar file */
+/*
+ * Unpack a single file from a tar-file to a file descriptor; this is written
+ * like so as an optimization to abbreviate the extract to *open step, as well
+ * as to reduce the number of required steps needed when unpacking a tarball on
+ * disk, as the previous method employed with tar(1) used -q // --fast-read .
+ *
+ * Return NULL on failure, and non-NULL on success
+ *
+ * XXX (gcooper): this is currently implemented with FILE* / fopen(3) due to
+ * legacy issues that need to be sorted out over the next couple of weeks for
+ * 1) locking to function properly, and to gain the potential performance boost
+ * by using mmap(2), and read(2) (ugh).
+ *
+ * But first things first, we need a working solution with minimal changes;
+ * then we move on from there.
+ *
+ * [The return code info will eventually be...]
+ *
+ * Return -1 on failure, a value greater than 0 on success [in accordance with
+ * open(2)].
+ */
+FILE*
+unpack_file_to_fd(const char *pkg, const char *file)
+{
+	struct archive *archive;
+	struct archive_entry *archive_entry;
+	Boolean found_match = FALSE;
+
+	const char *entry_pathname = NULL;
+	const char *error = NULL;
+	const char *pkg_name_humanized;
+
+	FILE *fd = NULL;
+	/* int fd = -1; */
+	int r;
+
+	if (Verbose)
+		printf("%s: will extract %s from %s\n", __func__, file, pkg);
+
+	archive = archive_read_new();
+	archive_read_support_compression_all(archive);
+	archive_read_support_format_tar(archive);
+
+	/* The initial open failed */
+	if (archive_read_open_filename(archive, pkg,
+	    ARCHIVE_DEFAULT_BYTES_PER_BLOCK) != ARCHIVE_OK) {
+
+		error = archive_error_string(archive);
+		warnx("%s: unable to open the package from %s: %s",
+		    __func__, pkg_name_humanized, error);
+
+	}
+	else
+		while (error == NULL && found_match == FALSE &&
+		    (r = archive_read_next_header(archive, &archive_entry)) ==
+		     ARCHIVE_OK) {
+
+			entry_pathname = archive_entry_pathname(archive_entry);
+
+			if (strncmp(file, entry_pathname, PATH_MAX) == 0) {
+
+				/* 
+				 * Regardless of whether or not extract passes,
+				 * we found our target file so let's exit
+				 * quickly because the underlying issue is most
+				 * likely unrecoverable.
+				 */
+				found_match = TRUE;
+
+				r = archive_read_extract(archive, archive_entry,
+				    EXTRACT_ARCHIVE_FLAGS);
+				if (r == ARCHIVE_OK) {
+					if (Verbose)
+						printf("X - %s\n",
+						    entry_pathname);
+					fd = fopen(entry_pathname, "r");
+				} else {
+					error = archive_error_string(archive);
+					warnx("%s: extraction for %s failed: "
+					    "%s", __func__, pkg_name_humanized,
+					    error);
+				}
+
+			} else
+				if (Verbose)
+					printf("S - %s\n", entry_pathname);
+
+		}
+
+	archive_read_finish(archive);
+
+	return fd;
+
+}
+
+/* 
+ * Unpack a tar file, or a subset of the contents.
+ *
+ * Return 0 on success, 1 on failure
+ *
+ * NOTE: the exit code is 0 / 1 so that this can be fed directly into exit
+ * when doing piped tar commands for copying hierarchies *hint*, *hint*.
+ */
 int
 unpack(const char *pkg, const char *file_expr)
 {
 	struct archive *archive;
 	struct archive_entry *archive_entry;
 	Boolean extract_whole_archive = FALSE;
+	const char *entry_pathname = NULL;
 	const char *error = NULL;
 	const char *pkg_name_humanized;
 	int r;
@@ -355,19 +458,21 @@
 	if (file_expr == NULL || strcmp("*", file_expr) == 0)
 		extract_whole_archive = TRUE;
 
+	if (pkg == NULL || strcmp(pkg, "-") == 0)
+		pkg_name_humanized = "(stdin)";
+	else
+		pkg_name_humanized = pkg;
+
 	if (Verbose) {
 		if (extract_whole_archive)
-			printf("%s: will extract whole archive\n", __func__);
+			printf("%s: %s - will extract whole archive\n",
+			    pkg_name_humanized, __func__);
 		else
-			printf("%s: will extract files that match: %s\n",
-			    __func__, file_expr);
+			printf("%s: %s - will extract files that match "
+			       "expression: %s\n",
+			    pkg_name_humanized, __func__, file_expr);
 	}
 
-	if (pkg == NULL || strcmp(pkg, "-") == 0)
-		pkg_name_humanized = "(stdin)";
-	else
-		pkg_name_humanized = pkg;
-
 	archive = archive_read_new();
 	archive_read_support_compression_all(archive);
 	archive_read_support_format_tar(archive);
@@ -386,29 +491,29 @@
 		    (r = archive_read_next_header(archive, &archive_entry)) ==
 		     ARCHIVE_OK) {
 
+			entry_pathname = archive_entry_pathname(archive_entry);
+
 			/* Let's extract the whole archive, or just a file. */
 			if (extract_whole_archive == TRUE ||
-			    (fnmatch(file_expr,
-			         archive_entry_pathname(archive_entry),
-			         FNM_PATHNAME) == 0)) {
+			    (fnmatch(file_expr, entry_pathname,
+				FNM_PATHNAME)) == 0) {
 
 				r = archive_read_extract(archive, archive_entry,
 				    EXTRACT_ARCHIVE_FLAGS);
-				if (r != ARCHIVE_OK) {
+				if (r == ARCHIVE_OK) {
+					if (Verbose)
+						printf("X - %s\n",
+						    entry_pathname);
+				} else {
 					error = archive_error_string(archive);
 					warnx("%s: extraction for %s failed: "
 					    "%s", __func__, pkg_name_humanized,
 					    error);
 				}
-				if (Verbose) {
-					printf("X - %s\n",
-					    archive_entry_pathname(archive_entry));
-				}
 
-			} else if (Verbose) {
-				printf("S - %s\n",
-				    archive_entry_pathname(archive_entry));
-			}
+			} else
+				if (Verbose)
+					printf("S - %s\n", entry_pathname);
 
 		}
 

==== //depot/projects/soc2007/gcooper-pkg_install-enhancements-simplified/usr.sbin/pkg_install/lib/lib.h#5 (text+ko) ====

@@ -188,6 +188,7 @@
 void		copy_hierarchy(const char *, const char *, Boolean);
 int		delete_hierarchy(const char *, Boolean, Boolean);
 int		unpack(const char *, const char *);
+FILE*		unpack_file_to_fd(const char*, const char *);
 void		format_cmd(char *, int, const char *, const char *, const char *);
 
 /* Msg */



Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?201004121230.o3CCUsIX029146>