Skip site navigation (1)Skip section navigation (2)
Date:      Sat, 6 Dec 2008 06:45:15 +0000 (UTC)
From:      Tim Kientzle <kientzle@FreeBSD.org>
To:        src-committers@freebsd.org, svn-src-all@freebsd.org, svn-src-head@freebsd.org
Subject:   svn commit: r185679 - head/lib/libarchive
Message-ID:  <200812060645.mB66jFQ7085836@svn.freebsd.org>

next in thread | raw e-mail | index | archive | help
Author: kientzle
Date: Sat Dec  6 06:45:15 2008
New Revision: 185679
URL: http://svn.freebsd.org/changeset/base/185679

Log:
  MfP4: Big read filter refactoring.
  
  This is an attempt to eliminate a lot of redundant
  code from the read ("decompression") filters by
  changing them to juggle arbitrary-sized blocks
  and consolidate reblocking code at a single point
  in archive_read.c.
  
  Along the way, I've changed the internal read/consume
  API used by the format handlers to a slightly
  different style originally suggested by des@.  It
  does seem to simplify a lot of common cases.
  
  The most dramatic change is, of course, to
  archive_read_support_compression_none(), which
  has just evaporated into a no-op as the blocking
  code this used to hold has all been moved up
  a level.
  
  There's at least one more big round of refactoring
  yet to come before the individual filters are as
  straightforward as I think they should be...

Modified:
  head/lib/libarchive/archive_read.c
  head/lib/libarchive/archive_read_private.h
  head/lib/libarchive/archive_read_support_compression_all.c
  head/lib/libarchive/archive_read_support_compression_bzip2.c
  head/lib/libarchive/archive_read_support_compression_compress.c
  head/lib/libarchive/archive_read_support_compression_gzip.c
  head/lib/libarchive/archive_read_support_compression_none.c
  head/lib/libarchive/archive_read_support_compression_program.c
  head/lib/libarchive/archive_read_support_format_ar.c
  head/lib/libarchive/archive_read_support_format_cpio.c
  head/lib/libarchive/archive_read_support_format_empty.c
  head/lib/libarchive/archive_read_support_format_iso9660.c
  head/lib/libarchive/archive_read_support_format_mtree.c
  head/lib/libarchive/archive_read_support_format_tar.c
  head/lib/libarchive/archive_read_support_format_zip.c

Modified: head/lib/libarchive/archive_read.c
==============================================================================
--- head/lib/libarchive/archive_read.c	Sat Dec  6 06:23:37 2008	(r185678)
+++ head/lib/libarchive/archive_read.c	Sat Dec  6 06:45:15 2008	(r185679)
@@ -53,9 +53,10 @@ __FBSDID("$FreeBSD$");
 #include "archive_private.h"
 #include "archive_read_private.h"
 
-static void	choose_decompressor(struct archive_read *, const void*, size_t);
+#define minimum(a, b) (a < b ? a : b)
+
+static int	build_stream(struct archive_read *);
 static int	choose_format(struct archive_read *);
-static off_t	dummy_skip(struct archive_read *, off_t);
 
 /*
  * Allocate, initialize and return a struct archive object.
@@ -74,8 +75,15 @@ archive_read_new(void)
 	a->archive.state = ARCHIVE_STATE_NEW;
 	a->entry = archive_entry_new();
 
-	/* We always support uncompressed archives. */
-	archive_read_support_compression_none(&a->archive);
+	/* Initialize reblocking logic. */
+	a->buffer_size = 64 * 1024; /* 64k */
+	a->buffer = (char *)malloc(a->buffer_size);
+	a->next = a->buffer;
+	if (a->buffer == NULL) {
+		archive_entry_free(a->entry);
+		free(a);
+		return (NULL);
+	}
 
 	return (&a->archive);
 }
@@ -108,6 +116,33 @@ archive_read_open(struct archive *a, voi
 	    client_reader, NULL, client_closer);
 }
 
+static ssize_t
+client_read_proxy(struct archive_read_source *self, const void **buff)
+{
+	return (self->archive->client.reader)((struct archive *)self->archive,
+	    self->data, buff);
+}
+
+static int64_t
+client_skip_proxy(struct archive_read_source *self, int64_t request)
+{
+	return (self->archive->client.skipper)((struct archive *)self->archive,
+	    self->data, request);
+}
+
+static int
+client_close_proxy(struct archive_read_source *self)
+{
+	int r = ARCHIVE_OK;
+
+	if (self->archive->client.closer != NULL)
+		r = (self->archive->client.closer)((struct archive *)self->archive,
+		    self->data);
+	free(self);
+	return (r);
+}
+
+
 int
 archive_read_open2(struct archive *_a, void *client_data,
     archive_open_callback *client_opener,
@@ -116,28 +151,15 @@ archive_read_open2(struct archive *_a, v
     archive_close_callback *client_closer)
 {
 	struct archive_read *a = (struct archive_read *)_a;
-	const void *buffer;
-	ssize_t bytes_read;
 	int e;
 
-	__archive_check_magic(_a, ARCHIVE_READ_MAGIC, ARCHIVE_STATE_NEW, "archive_read_open");
+	__archive_check_magic(_a, ARCHIVE_READ_MAGIC, ARCHIVE_STATE_NEW,
+	    "archive_read_open");
 
 	if (client_reader == NULL)
 		__archive_errx(1,
 		    "No reader function provided to archive_read_open");
 
-	/*
-	 * Set these NULL initially.  If the open or initial read fails,
-	 * we'll leave them NULL to indicate that the file is invalid.
-	 * (In particular, this helps ensure that the closer doesn't
-	 * get called more than once.)
-	 */
-	a->client_opener = NULL;
-	a->client_reader = NULL;
-	a->client_skipper = NULL;
-	a->client_closer = NULL;
-	a->client_data = NULL;
-
 	/* Open data source. */
 	if (client_opener != NULL) {
 		e =(client_opener)(&a->archive, client_data);
@@ -149,129 +171,103 @@ archive_read_open2(struct archive *_a, v
 		}
 	}
 
-	/* Read first block now for compress format detection. */
-	bytes_read = (client_reader)(&a->archive, client_data, &buffer);
-
-	if (bytes_read < 0) {
-		/* If the first read fails, close before returning error. */
-		if (client_closer)
-			(client_closer)(&a->archive, client_data);
-		/* client_reader should have already set error information. */
-		return (ARCHIVE_FATAL);
-	}
+	/* Save the client functions and mock up the initial source. */
+	a->client.opener = client_opener; /* Do we need to remember this? */
+	a->client.reader = client_reader;
+	a->client.skipper = client_skipper;
+	a->client.closer = client_closer;
+	a->client.data = client_data;
 
-	/* Now that the client callbacks have worked, remember them. */
-	a->client_opener = client_opener; /* Do we need to remember this? */
-	a->client_reader = client_reader;
-	a->client_skipper = client_skipper;
-	a->client_closer = client_closer;
-	a->client_data = client_data;
-
-	/* Select a decompression routine. */
-	choose_decompressor(a, buffer, (size_t)bytes_read);
-	if (a->decompressor == NULL)
-		return (ARCHIVE_FATAL);
+	{
+		struct archive_read_source *source;
 
-	/* Initialize decompression routine with the first block of data. */
-	e = (a->decompressor->init)(a, buffer, (size_t)bytes_read);
+		source = calloc(1, sizeof(*source));
+		if (source == NULL)
+			return (ARCHIVE_FATAL);
+		source->reader = NULL;
+		source->upstream = NULL;
+		source->archive = a;
+		source->data = client_data;
+		source->read = client_read_proxy;
+		source->skip = client_skip_proxy;
+		source->close = client_close_proxy;
+		a->source = source;
+	}
+
+	/* In case there's no filter. */
+	a->archive.compression_code = ARCHIVE_COMPRESSION_NONE;
+	a->archive.compression_name = "none";
 
+	/* Build out the input pipeline. */
+	e = build_stream(a);
 	if (e == ARCHIVE_OK)
 		a->archive.state = ARCHIVE_STATE_HEADER;
 
-	/*
-	 * If the decompressor didn't register a skip function, provide a
-	 * dummy compression-layer skip function.
-	 */
-	if (a->decompressor->skip == NULL)
-		a->decompressor->skip = dummy_skip;
-
 	return (e);
 }
 
 /*
- * Allow each registered decompression routine to bid on whether it
- * wants to handle this stream.  Return index of winning bidder.
+ * Allow each registered stream transform to bid on whether
+ * it wants to handle this stream.  Repeat until we've finished
+ * building the pipeline.
  */
-static void
-choose_decompressor(struct archive_read *a,
-    const void *buffer, size_t bytes_read)
+static int
+build_stream(struct archive_read *a)
 {
-	int decompression_slots, i, bid, best_bid;
-	struct decompressor_t *decompressor, *best_decompressor;
+	int number_readers, i, bid, best_bid;
+	struct archive_reader *reader, *best_reader;
+	struct archive_read_source *source;
+	const void *block;
+	ssize_t bytes_read;
 
-	decompression_slots = sizeof(a->decompressors) /
-	    sizeof(a->decompressors[0]);
+	/* Read first block now for compress format detection. */
+	bytes_read = (a->source->read)(a->source, &block);
+	if (bytes_read < 0) {
+		/* If the first read fails, close before returning error. */
+		if (a->source->close != NULL) {
+			(a->source->close)(a->source);
+			a->source = NULL;
+		}
+		/* source->read should have already set error information. */
+		return (ARCHIVE_FATAL);
+	}
+
+	number_readers = sizeof(a->readers) / sizeof(a->readers[0]);
 
 	best_bid = 0;
-	a->decompressor = NULL;
-	best_decompressor = NULL;
+	best_reader = NULL;
 
-	decompressor = a->decompressors;
-	for (i = 0; i < decompression_slots; i++) {
-		if (decompressor->bid) {
-			bid = (decompressor->bid)(buffer, bytes_read);
-			if (bid > best_bid || best_decompressor == NULL) {
+	reader = a->readers;
+	for (i = 0, reader = a->readers; i < number_readers; i++, reader++) {
+		if (reader->bid != NULL) {
+			bid = (reader->bid)(reader, block, bytes_read);
+			if (bid > best_bid) {
 				best_bid = bid;
-				best_decompressor = decompressor;
+				best_reader = reader;
 			}
 		}
-		decompressor ++;
 	}
 
 	/*
-	 * There were no bidders; this is a serious programmer error
-	 * and demands a quick and definitive abort.
-	 */
-	if (best_decompressor == NULL)
-		__archive_errx(1, "No decompressors were registered; you "
-		    "must call at least one "
-		    "archive_read_support_compression_XXX function in order "
-		    "to successfully read an archive.");
-
-	/*
-	 * There were bidders, but no non-zero bids; this means we  can't
-	 * support this stream.
+	 * If we have a winner, it becomes the next stage in the pipeline.
 	 */
-	if (best_bid < 1) {
-		archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT,
-		    "Unrecognized archive format");
-		return;
-	}
-
-	/* Record the best decompressor for this stream. */
-	a->decompressor = best_decompressor;
-}
-
-/*
- * Dummy skip function, for use if the compression layer doesn't provide
- * one: This code just reads data and discards it.
- */
-static off_t
-dummy_skip(struct archive_read * a, off_t request)
-{
-	const void * dummy_buffer;
-	ssize_t bytes_read;
-	off_t bytes_skipped;
-
-	for (bytes_skipped = 0; request > 0;) {
-		bytes_read = (a->decompressor->read_ahead)(a, &dummy_buffer, 1);
-		if (bytes_read < 0)
-			return (bytes_read);
-		if (bytes_read == 0) {
-			/* Premature EOF. */
-			archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC,
-			    "Truncated input file (need to skip %jd bytes)",
-			    (intmax_t)request);
+	if (best_reader != NULL) {
+		source = (best_reader->init)(a, best_reader, a->source,
+		    block, bytes_read);
+		if (source == NULL)
 			return (ARCHIVE_FATAL);
-		}
-		if (bytes_read > request)
-			bytes_read = (ssize_t)request;
-		(a->decompressor->consume)(a, (size_t)bytes_read);
-		request -= bytes_read;
-		bytes_skipped += bytes_read;
+		/* Record the best decompressor for this stream. */
+		a->source = source;
+		/* Recurse to get next pipeline stage. */
+		return (build_stream(a));
 	}
 
-	return (bytes_skipped);
+	/* Save first block of data. */
+	a->client_buff = block;
+	a->client_total = bytes_read;
+	a->client_next = a->client_buff;
+	a->client_avail = a->client_total;
+	return (ARCHIVE_OK);
 }
 
 /*
@@ -598,23 +594,24 @@ archive_read_close(struct archive *_a)
 
 	/* TODO: Clean up the formatters. */
 
-	/* Clean up the decompressors. */
-	n = sizeof(a->decompressors)/sizeof(a->decompressors[0]);
+	/* Clean up the stream pipeline. */
+	if (a->source != NULL) {
+		r1 = (a->source->close)(a->source);
+		if (r1 < r)
+			r = r1;
+		a->source = NULL;
+	}
+
+	/* Release the reader objects. */
+	n = sizeof(a->readers)/sizeof(a->readers[0]);
 	for (i = 0; i < n; i++) {
-		if (a->decompressors[i].finish != NULL) {
-			r1 = (a->decompressors[i].finish)(a);
+		if (a->readers[i].free != NULL) {
+			r1 = (a->readers[i].free)(&a->readers[i]);
 			if (r1 < r)
 				r = r1;
 		}
 	}
 
-	/* Close the client stream. */
-	if (a->client_closer != NULL) {
-		r1 = ((a->client_closer)(&a->archive, a->client_data));
-		if (r1 < r)
-			r = r1;
-	}
-
 	return (r);
 }
 
@@ -651,6 +648,7 @@ archive_read_finish(struct archive *_a)
 	if (a->entry)
 		archive_entry_free(a->entry);
 	a->archive.magic = 0;
+	free(a->buffer);
 	free(a);
 #if ARCHIVE_API_VERSION > 1
 	return (r);
@@ -700,40 +698,350 @@ __archive_read_register_format(struct ar
  * Used internally by decompression routines to register their bid and
  * initialization functions.
  */
-struct decompressor_t *
-__archive_read_register_compression(struct archive_read *a,
-    int (*bid)(const void *, size_t),
-    int (*init)(struct archive_read *, const void *, size_t))
+struct archive_reader *
+__archive_read_get_reader(struct archive_read *a)
 {
 	int i, number_slots;
 
 	__archive_check_magic(&a->archive,
 	    ARCHIVE_READ_MAGIC, ARCHIVE_STATE_NEW,
-	    "__archive_read_register_compression");
+	    "__archive_read_get_reader");
 
-	number_slots = sizeof(a->decompressors) / sizeof(a->decompressors[0]);
+	number_slots = sizeof(a->readers) / sizeof(a->readers[0]);
 
 	for (i = 0; i < number_slots; i++) {
-		if (a->decompressors[i].bid == bid)
-			return (a->decompressors + i);
-		if (a->decompressors[i].bid == NULL) {
-			a->decompressors[i].bid = bid;
-			a->decompressors[i].init = init;
-			return (a->decompressors + i);
-		}
+		if (a->readers[i].bid == NULL)
+			return (a->readers + i);
 	}
 
 	__archive_errx(1, "Not enough slots for compression registration");
 	return (NULL); /* Never actually executed. */
 }
 
-/* used internally to simplify read-ahead */
+/*
+ * The next three functions comprise the peek/consume internal I/O
+ * system used by archive format readers.  This system allows fairly
+ * flexible read-ahead and allows the I/O code to operate in a
+ * zero-copy manner most of the time.
+ *
+ * In the ideal case, block providers give the I/O code blocks of data
+ * and __archive_read_ahead() just returns pointers directly into
+ * those blocks.  Then __archive_read_consume() just bumps those
+ * pointers.  Only if your request would span blocks does the I/O
+ * layer use a copy buffer to provide you with a contiguous block of
+ * data.  The __archive_read_skip() is an optimization; it scans ahead
+ * very quickly (it usually translates into a seek() operation if
+ * you're reading uncompressed disk files).
+ *
+ * A couple of useful idioms:
+ *  * "I just want some data."  Ask for 1 byte and pay attention to
+ *    the "number of bytes available" from __archive_read_ahead().
+ *    You can consume more than you asked for; you just can't consume
+ *    more than is available right now.  If you consume everything that's
+ *    immediately available, the next read_ahead() call will pull
+ *    the next block.
+ *  * "I want to output a large block of data."  As above, ask for 1 byte,
+ *    emit all that's available (up to whatever limit you have), then
+ *    repeat until you're done.
+ *  * "I want to peek ahead by a large amount."  Ask for 4k or so, then
+ *    double and repeat until you get an error or have enough.  Note
+ *    that the I/O layer will likely end up expanding its copy buffer
+ *    to fit your request, so use this technique cautiously.  This
+ *    technique is used, for example, by some of the format tasting
+ *    code that has uncertain look-ahead needs.
+ *
+ * TODO: Someday, provide a more generic __archive_read_seek() for
+ * those cases where it's useful.  This is tricky because there are lots
+ * of cases where seek() is not available (reading gzip data from a
+ * network socket, for instance), so there needs to be a good way to
+ * communicate whether seek() is available and users of that interface
+ * need to use non-seeking strategies whenever seek() is not available.
+ */
+
+/*
+ * Looks ahead in the input stream:
+ *  * If 'avail' pointer is provided, that returns number of bytes available
+ *    in the current buffer, which may be much larger than requested.
+ *  * If end-of-file, *avail gets set to zero.
+ *  * If error, *avail gets error code.
+ *  * If request can be met, returns pointer to data, returns NULL
+ *    if request is not met.
+ *
+ * Note: If you just want "some data", ask for 1 byte and pay attention
+ * to *avail, which will have the actual amount available.  If you
+ * know exactly how many bytes you need, just ask for that and treat
+ * a NULL return as an error.
+ *
+ * Important:  This does NOT move the file pointer.  See
+ * __archive_read_consume() below.
+ */
+
+/*
+ * This is tricky.  We need to provide our clients with pointers to
+ * contiguous blocks of memory but we want to avoid copying whenever
+ * possible.
+ *
+ * Mostly, this code returns pointers directly into the block of data
+ * provided by the client_read routine.  It can do this unless the
+ * request would split across blocks.  In that case, we have to copy
+ * into an internal buffer to combine reads.
+ */
 const void *
-__archive_read_ahead(struct archive_read *a, size_t len)
+__archive_read_ahead(struct archive_read *a, size_t min, ssize_t *avail)
 {
-	const void *h;
+	ssize_t bytes_read;
+	size_t tocopy;
 
-	if ((a->decompressor->read_ahead)(a, &h, len) < (ssize_t)len)
+	if (a->fatal) {
+		if (avail)
+			*avail = ARCHIVE_FATAL;
 		return (NULL);
-	return (h);
+	}
+
+	/*
+	 * Keep pulling more data until we can satisfy the request.
+	 */
+	for (;;) {
+
+		/*
+		 * If we can satisfy from the copy buffer, we're done.
+		 */
+		if (a->avail >= min) {
+			if (avail != NULL)
+				*avail = a->avail;
+			return (a->next);
+		}
+
+		/*
+		 * We can satisfy directly from client buffer if everything
+		 * currently in the copy buffer is still in the client buffer.
+		 */
+		if (a->client_total >= a->client_avail + a->avail
+		    && a->client_avail + a->avail >= min) {
+			/* "Roll back" to client buffer. */
+			a->client_avail += a->avail;
+			a->client_next -= a->avail;
+			/* Copy buffer is now empty. */
+			a->avail = 0;
+			a->next = a->buffer;
+			/* Return data from client buffer. */
+			if (avail != NULL)
+				*avail = a->client_avail;
+			return (a->client_next);
+		}
+
+		/* Move data forward in copy buffer if necessary. */
+		if (a->next > a->buffer &&
+		    a->next + min > a->buffer + a->buffer_size) {
+			if (a->avail > 0)
+				memmove(a->buffer, a->next, a->avail);
+			a->next = a->buffer;
+		}
+
+		/* If we've used up the client data, get more. */
+		if (a->client_avail <= 0) {
+			if (a->end_of_file) {
+				if (avail != NULL)
+					*avail = 0;
+				return (NULL);
+			}
+			bytes_read = (a->source->read)(a->source,
+			    &a->client_buff);
+			if (bytes_read < 0) {		/* Read error. */
+				a->client_total = a->client_avail = 0;
+				a->client_next = a->client_buff = NULL;
+				a->fatal = 1;
+				if (avail != NULL)
+					*avail = ARCHIVE_FATAL;
+				return (NULL);
+			}
+			if (bytes_read == 0) {	/* Premature end-of-file. */
+				a->client_total = a->client_avail = 0;
+				a->client_next = a->client_buff = NULL;
+				a->end_of_file = 1;
+				/* Return whatever we do have. */
+				if (avail != NULL)
+					*avail = a->avail;
+				return (NULL);
+			}
+			a->archive.raw_position += bytes_read;
+			a->client_total = bytes_read;
+			a->client_avail = a->client_total;
+			a->client_next = a->client_buff;
+		}
+		else
+		{
+			/*
+			 * We can't satisfy the request from the copy
+			 * buffer or the existing client data, so we
+			 * need to copy more client data over to the
+			 * copy buffer.
+			 */
+
+			/* Ensure the buffer is big enough. */
+			if (min > a->buffer_size) {
+				size_t s, t;
+				char *p;
+
+				/* Double the buffer; watch for overflow. */
+				s = t = a->buffer_size;
+				while (s < min) {
+					t *= 2;
+					if (t <= s) { /* Integer overflow! */
+						archive_set_error(&a->archive,
+						    ENOMEM,
+						    "Unable to allocate copy buffer");
+						a->fatal = 1;
+						if (avail != NULL)
+							*avail = ARCHIVE_FATAL;
+						return (NULL);
+					}
+					s = t;
+				}
+				/* Now s >= min, so allocate a new buffer. */
+				p = (char *)malloc(s);
+				if (p == NULL) {
+					archive_set_error(&a->archive, ENOMEM,
+					    "Unable to allocate copy buffer");
+					a->fatal = 1;
+					if (avail != NULL)
+						*avail = ARCHIVE_FATAL;
+					return (NULL);
+				}
+				/* Move data into newly-enlarged buffer. */
+				if (a->avail > 0)
+					memmove(p, a->next, a->avail);
+				free(a->buffer);
+				a->next = a->buffer = p;
+				a->buffer_size = s;
+			}
+
+			/* We can add client data to copy buffer. */
+			/* First estimate: copy to fill rest of buffer. */
+			tocopy = (a->buffer + a->buffer_size)
+			    - (a->next + a->avail);
+			/* Don't waste time buffering more than we need to. */
+			if (tocopy + a->avail > min)
+				tocopy = min - a->avail;
+			/* Don't copy more than is available. */
+			if (tocopy > a->client_avail)
+				tocopy = a->client_avail;
+
+			memcpy(a->next + a->avail, a->client_next,
+			    tocopy);
+			/* Remove this data from client buffer. */
+			a->client_next += tocopy;
+			a->client_avail -= tocopy;
+			/* add it to copy buffer. */
+			a->avail += tocopy;
+		}
+	}
+}
+
+/*
+ * Move the file pointer forward.  This should be called after
+ * __archive_read_ahead() returns data to you.  Don't try to move
+ * ahead by more than the amount of data available according to
+ * __archive_read_ahead().
+ */
+/*
+ * Mark the appropriate data as used.  Note that the request here will
+ * often be much smaller than the size of the previous read_ahead
+ * request.
+ */
+ssize_t
+__archive_read_consume(struct archive_read *a, size_t request)
+{
+	if (a->avail > 0) {
+		/* Read came from copy buffer. */
+		a->next += request;
+		a->avail -= request;
+	} else {
+		/* Read came from client buffer. */
+		a->client_next += request;
+		a->client_avail -= request;
+	}
+	a->archive.file_position += request;
+	return (request);
+}
+
+/*
+ * Move the file pointer ahead by an arbitrary amount.  If you're
+ * reading uncompressed data from a disk file, this will actually
+ * translate into a seek() operation.  Even in cases where seek()
+ * isn't feasible, this at least pushes the read-and-discard loop
+ * down closer to the data source.
+ */
+int64_t
+__archive_read_skip(struct archive_read *a, int64_t request)
+{
+	off_t bytes_skipped, total_bytes_skipped = 0;
+	size_t min;
+
+	if (a->fatal)
+		return (-1);
+	/*
+	 * If there is data in the buffers already, use that first.
+	 */
+	if (a->avail > 0) {
+		min = minimum(request, (off_t)a->avail);
+		bytes_skipped = __archive_read_consume(a, min);
+		request -= bytes_skipped;
+		total_bytes_skipped += bytes_skipped;
+	}
+	if (a->client_avail > 0) {
+		min = minimum(request, (off_t)a->client_avail);
+		bytes_skipped = __archive_read_consume(a, min);
+		request -= bytes_skipped;
+		total_bytes_skipped += bytes_skipped;
+	}
+	if (request == 0)
+		return (total_bytes_skipped);
+	/*
+	 * If a client_skipper was provided, try that first.
+	 */
+#if ARCHIVE_API_VERSION < 2
+	if ((a->source->skip != NULL) && (request < SSIZE_MAX)) {
+#else
+	if (a->source->skip != NULL) {
+#endif
+		bytes_skipped = (a->source->skip)(a->source, request);
+		if (bytes_skipped < 0) {	/* error */
+			a->client_total = a->client_avail = 0;
+			a->client_next = a->client_buff = NULL;
+			a->fatal = 1;
+			return (bytes_skipped);
+		}
+		total_bytes_skipped += bytes_skipped;
+		a->archive.file_position += bytes_skipped;
+		request -= bytes_skipped;
+		a->client_next = a->client_buff;
+		a->archive.raw_position += bytes_skipped;
+		a->client_avail = a->client_total = 0;
+	}
+	/*
+	 * Note that client_skipper will usually not satisfy the
+	 * full request (due to low-level blocking concerns),
+	 * so even if client_skipper is provided, we may still
+	 * have to use ordinary reads to finish out the request.
+	 */
+	while (request > 0) {
+		const void* dummy_buffer;
+		ssize_t bytes_read;
+		dummy_buffer = __archive_read_ahead(a, 1, &bytes_read);
+		if (bytes_read < 0)
+			return (bytes_read);
+		if (bytes_read == 0) {
+			/* We hit EOF before we satisfied the skip request. */
+			archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC,
+			    "Truncated input file (need to skip %jd bytes)",
+			    (intmax_t)request);
+			return (ARCHIVE_FATAL);
+		}
+		min = (size_t)(minimum(bytes_read, request));
+		bytes_read = __archive_read_consume(a, min);
+		total_bytes_skipped += bytes_read;
+		request -= bytes_read;
+	}
+	return (total_bytes_skipped);
 }

Modified: head/lib/libarchive/archive_read_private.h
==============================================================================
--- head/lib/libarchive/archive_read_private.h	Sat Dec  6 06:23:37 2008	(r185678)
+++ head/lib/libarchive/archive_read_private.h	Sat Dec  6 06:45:15 2008	(r185679)
@@ -32,6 +32,75 @@
 #include "archive_string.h"
 #include "archive_private.h"
 
+struct archive_read;
+struct archive_reader;
+struct archive_read_source;
+
+/*
+ * A "reader" knows how to provide blocks.  That can include something
+ * that reads blocks from disk or socket or a transformation layer
+ * that reads blocks from another source and transforms them.  This
+ * includes decompression and decryption filters.
+ *
+ * How bidding works:
+ *   * The bid manager reads the first block from the current source.
+ *   * It shows that block to each registered bidder.
+ *   * The winning bidder is initialized (with the block and information
+ *     about the source)
+ *   * The winning bidder becomes the new source and the process repeats
+ * This ends only when no reader provides a non-zero bid.
+ */
+struct archive_reader {
+	/* Configuration data for the reader. */
+	void *data;
+	/* Bidder is handed the initial block from its source. */
+	int (*bid)(struct archive_reader *, const void *buff, size_t);
+	/* Init() is given the archive, upstream source, and the initial
+	 * block above.  It returns a populated source structure. */
+	struct archive_read_source *(*init)(struct archive_read *,
+	    struct archive_reader *, struct archive_read_source *source,
+	    const void *, size_t);
+	/* Release the reader and any configuration data it allocated. */
+	int (*free)(struct archive_reader *);
+};
+
+/*
+ * A "source" is an instance of a reader.  This structure is
+ * allocated and initialized by the init() method of a reader
+ * above.
+ */
+struct archive_read_source {
+	/* Essentially all sources will need these values, so
+	 * just declare them here. */
+	struct archive_reader *reader; /* Reader that I'm an instance of. */
+	struct archive_read_source *upstream; /* Who I get blocks from. */
+	struct archive_read *archive; /* associated archive. */
+	/* Return next block. */
+	ssize_t (*read)(struct archive_read_source *, const void **);
+	/* Skip forward this many bytes. */
+	int64_t (*skip)(struct archive_read_source *self, int64_t request);
+	/* Close (recursively) and free(self). */
+	int (*close)(struct archive_read_source *self);
+	/* My private data. */
+	void *data;
+};
+
+/*
+ * The client source is almost the same as an internal source.
+ *
+ * TODO: Make archive_read_source and archive_read_client identical so
+ * that users of the library can easily register their own
+ * transformation filters.  This will probably break the API/ABI and
+ * so should be deferred until libarchive 3.0.
+ */
+struct archive_read_client {
+	archive_open_callback	*opener;
+	archive_read_callback	*reader;
+	archive_skip_callback	*skipper;
+	archive_close_callback	*closer;
+	void			*data;
+};
+
 struct archive_read {
 	struct archive	archive;
 
@@ -50,46 +119,30 @@ struct archive_read {
 	off_t		  read_data_output_offset;
 	size_t		  read_data_remaining;
 
-	/* Callbacks to open/read/write/close archive stream. */
-	archive_open_callback	*client_opener;
-	archive_read_callback	*client_reader;
-	archive_skip_callback	*client_skipper;
-	archive_close_callback	*client_closer;
-	void			*client_data;
+	/* Callbacks to open/read/write/close client archive stream. */
+	struct archive_read_client client;
+
+	/* Registered readers. */
+	struct archive_reader readers[8];
+
+	/* Source */
+	struct archive_read_source *source;
 
 	/* File offset of beginning of most recently-read header. */
 	off_t		  header_position;
 
-	/*
-	 * Decompressors have a very specific lifecycle:
-	 *    public setup function initializes a slot in this table
-	 *    'config' holds minimal configuration data
-	 *    bid() examines a block of data and returns a bid [1]
-	 *    init() is called for successful bidder
-	 *    'data' is initialized by init()
-	 *    read() returns a pointer to the next block of data
-	 *    consume() indicates how much data is used
-	 *    skip() ignores bytes of data
-	 *    finish() cleans up and frees 'data' and 'config'
-	 *
-	 * [1] General guideline: bid the number of bits that you actually
-	 * test, e.g., 16 if you test a 2-byte magic value.
-	 */
-	struct decompressor_t {
-		void *config;
-		void *data;
-		int	(*bid)(const void *buff, size_t);
-		int	(*init)(struct archive_read *,
-			    const void *buff, size_t);
-		int	(*finish)(struct archive_read *);
-		ssize_t	(*read_ahead)(struct archive_read *,
-			    const void **, size_t);
-		ssize_t	(*consume)(struct archive_read *, size_t);
-		off_t	(*skip)(struct archive_read *, off_t);
-	}	decompressors[4];
 
-	/* Pointer to current decompressor. */
-	struct decompressor_t *decompressor;
+	/* Used by reblocking logic. */
+	char		*buffer;
+	size_t		 buffer_size;
+	char		*next;		/* Current read location. */
+	size_t		 avail;		/* Bytes in my buffer. */
+	const void	*client_buff;	/* Client buffer information. */
+	size_t		 client_total;
+	const char	*client_next;
+	size_t		 client_avail;
+	char		 end_of_file;
+	char		 fatal;
 
 	/*
 	 * Format detection is mostly the same as compression
@@ -124,12 +177,13 @@ int	__archive_read_register_format(struc
 	    int (*read_data_skip)(struct archive_read *),
 	    int (*cleanup)(struct archive_read *));
 
-struct decompressor_t
-	*__archive_read_register_compression(struct archive_read *a,
-	    int (*bid)(const void *, size_t),
-	    int (*init)(struct archive_read *, const void *, size_t));
+struct archive_reader
+	*__archive_read_get_reader(struct archive_read *a);
 
 const void
-	*__archive_read_ahead(struct archive_read *, size_t);
-
+	*__archive_read_ahead(struct archive_read *, size_t, ssize_t *);
+ssize_t
+	__archive_read_consume(struct archive_read *, size_t);
+int64_t
+	__archive_read_skip(struct archive_read *, int64_t);
 #endif

Modified: head/lib/libarchive/archive_read_support_compression_all.c
==============================================================================
--- head/lib/libarchive/archive_read_support_compression_all.c	Sat Dec  6 06:23:37 2008	(r185678)
+++ head/lib/libarchive/archive_read_support_compression_all.c	Sat Dec  6 06:45:15 2008	(r185679)
@@ -39,5 +39,11 @@ archive_read_support_compression_all(str
 #if HAVE_ZLIB_H
 	archive_read_support_compression_gzip(a);
 #endif
+#if HAVE_LZMADEC_H
+	/* LZMA bidding is subject to false positives because
+	 * the LZMA file format has a very weak signature.  It
+	 * may not be feasible to include LZMA detection here. */
+	/* archive_read_support_compression_lzma(a); */
+#endif
 	return (ARCHIVE_OK);
 }

Modified: head/lib/libarchive/archive_read_support_compression_bzip2.c
==============================================================================
--- head/lib/libarchive/archive_read_support_compression_bzip2.c	Sat Dec  6 06:23:37 2008	(r185678)
+++ head/lib/libarchive/archive_read_support_compression_bzip2.c	Sat Dec  6 06:45:15 2008	(r185679)
@@ -51,30 +51,49 @@ __FBSDID("$FreeBSD$");
 #if HAVE_BZLIB_H
 struct private_data {
 	bz_stream	 stream;
-	char		*uncompressed_buffer;
-	size_t		 uncompressed_buffer_size;
-	char		*read_next;
-	int64_t		 total_out;
+	char		*out_block;
+	size_t		 out_block_size;
+	char		 valid; /* True = decompressor is initialized */
 	char		 eof; /* True = found end of compressed data. */
 };
 
-static int	finish(struct archive_read *);
-static ssize_t	read_ahead(struct archive_read *, const void **, size_t);
-static ssize_t	read_consume(struct archive_read *, size_t);
-static int	drive_decompressor(struct archive_read *a, struct private_data *);
+/* Bzip2 source */
+static ssize_t	bzip2_source_read(struct archive_read_source *, const void **);
+static int	bzip2_source_close(struct archive_read_source *);
 #endif
 
-/* These two functions are defined even if we lack the library.  See below. */
-static int	bid(const void *, size_t);
-static int	init(struct archive_read *, const void *, size_t);
+/*
+ * Note that we can detect bzip2 archives even if we can't decompress
+ * them.  (In fact, we like detecting them because we can give better
+ * error messages.)  So the bid framework here gets compiled even
+ * if bzlib is unavailable.
+ */
+static int	bzip2_reader_bid(struct archive_reader *, const void *, size_t);
+static struct archive_read_source *bzip2_reader_init(struct archive_read *,
+    struct archive_reader *, struct archive_read_source *,
+    const void *, size_t);
+static int	bzip2_reader_free(struct archive_reader *);
 
 int
 archive_read_support_compression_bzip2(struct archive *_a)
 {
 	struct archive_read *a = (struct archive_read *)_a;
-	if (__archive_read_register_compression(a, bid, init) != NULL)
-		return (ARCHIVE_OK);
-	return (ARCHIVE_FATAL);
+	struct archive_reader *reader = __archive_read_get_reader(a);
+
+	if (reader == NULL)
+		return (ARCHIVE_FATAL);
+
+	reader->data = NULL;
+	reader->bid = bzip2_reader_bid;
+	reader->init = bzip2_reader_init;
+	reader->free = bzip2_reader_free;
+	return (ARCHIVE_OK);
+}
+
+static int
+bzip2_reader_free(struct archive_reader *self){
+	(void)self; /* UNUSED */
+	return (ARCHIVE_OK);
 }
 
 /*
@@ -85,11 +104,13 @@ archive_read_support_compression_bzip2(s
  * from verifying as much as we would like.
  */
 static int
-bid(const void *buff, size_t len)
+bzip2_reader_bid(struct archive_reader *self, const void *buff, size_t len)
 {
 	const unsigned char *buffer;
 	int bits_checked;
 
+	(void)self; /* UNUSED */
+
 	if (len < 1)
 		return (0);
 
@@ -150,16 +171,19 @@ bid(const void *buff, size_t len)
  * decompression.  We can, however, still detect compressed archives
  * and emit a useful message.
  */
-static int
-init(struct archive_read *a, const void *buff, size_t n)
+static struct archive_read_source *
+bzip2_reader_init(struct archive_read *a, struct archive_reader *reader,
+    struct archive_read_source *upstream, const void *buff, size_t n)
 {
 	(void)a;	/* UNUSED */
+	(void)reader;	/* UNUSED */
+	(void)upstream; /* UNUSED */
 	(void)buff;	/* UNUSED */
 	(void)n;	/* UNUSED */
 
 	archive_set_error(&a->archive, -1,
 	    "This version of libarchive was compiled without bzip2 support");
-	return (ARCHIVE_FATAL);
+	return (NULL);
 }
 
 
@@ -168,37 +192,42 @@ init(struct archive_read *a, const void 
 /*
  * Setup the callbacks.
  */
-static int
-init(struct archive_read *a, const void *buff, size_t n)

*** DIFF OUTPUT TRUNCATED AT 1000 LINES ***



Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?200812060645.mB66jFQ7085836>