From owner-svn-src-head@FreeBSD.ORG Sat Dec 6 06:45:15 2008 Return-Path: Delivered-To: svn-src-head@freebsd.org Received: from mx1.freebsd.org (mx1.freebsd.org [IPv6:2001:4f8:fff6::34]) by hub.freebsd.org (Postfix) with ESMTP id D5C161065670; Sat, 6 Dec 2008 06:45:15 +0000 (UTC) (envelope-from kientzle@FreeBSD.org) Received: from svn.freebsd.org (svn.freebsd.org [IPv6:2001:4f8:fff6::2c]) by mx1.freebsd.org (Postfix) with ESMTP id C2A6B8FC1C; Sat, 6 Dec 2008 06:45:15 +0000 (UTC) (envelope-from kientzle@FreeBSD.org) Received: from svn.freebsd.org (localhost [127.0.0.1]) by svn.freebsd.org (8.14.3/8.14.3) with ESMTP id mB66jFwh085841; Sat, 6 Dec 2008 06:45:15 GMT (envelope-from kientzle@svn.freebsd.org) Received: (from kientzle@localhost) by svn.freebsd.org (8.14.3/8.14.3/Submit) id mB66jFQ7085836; Sat, 6 Dec 2008 06:45:15 GMT (envelope-from kientzle@svn.freebsd.org) Message-Id: <200812060645.mB66jFQ7085836@svn.freebsd.org> From: Tim Kientzle Date: Sat, 6 Dec 2008 06:45:15 +0000 (UTC) To: src-committers@freebsd.org, svn-src-all@freebsd.org, svn-src-head@freebsd.org X-SVN-Group: head MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Cc: Subject: svn commit: r185679 - head/lib/libarchive X-BeenThere: svn-src-head@freebsd.org X-Mailman-Version: 2.1.5 Precedence: list List-Id: SVN commit messages for the src tree for head/-current List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , X-List-Received-Date: Sat, 06 Dec 2008 06:45:16 -0000 Author: kientzle Date: Sat Dec 6 06:45:15 2008 New Revision: 185679 URL: http://svn.freebsd.org/changeset/base/185679 Log: MfP4: Big read filter refactoring. This is an attempt to eliminate a lot of redundant code from the read ("decompression") filters by changing them to juggle arbitrary-sized blocks and consolidate reblocking code at a single point in archive_read.c. Along the way, I've changed the internal read/consume API used by the format handlers to a slightly different style originally suggested by des@. It does seem to simplify a lot of common cases. The most dramatic change is, of course, to archive_read_support_compression_none(), which has just evaporated into a no-op as the blocking code this used to hold has all been moved up a level. There's at least one more big round of refactoring yet to come before the individual filters are as straightforward as I think they should be... Modified: head/lib/libarchive/archive_read.c head/lib/libarchive/archive_read_private.h head/lib/libarchive/archive_read_support_compression_all.c head/lib/libarchive/archive_read_support_compression_bzip2.c head/lib/libarchive/archive_read_support_compression_compress.c head/lib/libarchive/archive_read_support_compression_gzip.c head/lib/libarchive/archive_read_support_compression_none.c head/lib/libarchive/archive_read_support_compression_program.c head/lib/libarchive/archive_read_support_format_ar.c head/lib/libarchive/archive_read_support_format_cpio.c head/lib/libarchive/archive_read_support_format_empty.c head/lib/libarchive/archive_read_support_format_iso9660.c head/lib/libarchive/archive_read_support_format_mtree.c head/lib/libarchive/archive_read_support_format_tar.c head/lib/libarchive/archive_read_support_format_zip.c Modified: head/lib/libarchive/archive_read.c ============================================================================== --- head/lib/libarchive/archive_read.c Sat Dec 6 06:23:37 2008 (r185678) +++ head/lib/libarchive/archive_read.c Sat Dec 6 06:45:15 2008 (r185679) @@ -53,9 +53,10 @@ __FBSDID("$FreeBSD$"); #include "archive_private.h" #include "archive_read_private.h" -static void choose_decompressor(struct archive_read *, const void*, size_t); +#define minimum(a, b) (a < b ? a : b) + +static int build_stream(struct archive_read *); static int choose_format(struct archive_read *); -static off_t dummy_skip(struct archive_read *, off_t); /* * Allocate, initialize and return a struct archive object. @@ -74,8 +75,15 @@ archive_read_new(void) a->archive.state = ARCHIVE_STATE_NEW; a->entry = archive_entry_new(); - /* We always support uncompressed archives. */ - archive_read_support_compression_none(&a->archive); + /* Initialize reblocking logic. */ + a->buffer_size = 64 * 1024; /* 64k */ + a->buffer = (char *)malloc(a->buffer_size); + a->next = a->buffer; + if (a->buffer == NULL) { + archive_entry_free(a->entry); + free(a); + return (NULL); + } return (&a->archive); } @@ -108,6 +116,33 @@ archive_read_open(struct archive *a, voi client_reader, NULL, client_closer); } +static ssize_t +client_read_proxy(struct archive_read_source *self, const void **buff) +{ + return (self->archive->client.reader)((struct archive *)self->archive, + self->data, buff); +} + +static int64_t +client_skip_proxy(struct archive_read_source *self, int64_t request) +{ + return (self->archive->client.skipper)((struct archive *)self->archive, + self->data, request); +} + +static int +client_close_proxy(struct archive_read_source *self) +{ + int r = ARCHIVE_OK; + + if (self->archive->client.closer != NULL) + r = (self->archive->client.closer)((struct archive *)self->archive, + self->data); + free(self); + return (r); +} + + int archive_read_open2(struct archive *_a, void *client_data, archive_open_callback *client_opener, @@ -116,28 +151,15 @@ archive_read_open2(struct archive *_a, v archive_close_callback *client_closer) { struct archive_read *a = (struct archive_read *)_a; - const void *buffer; - ssize_t bytes_read; int e; - __archive_check_magic(_a, ARCHIVE_READ_MAGIC, ARCHIVE_STATE_NEW, "archive_read_open"); + __archive_check_magic(_a, ARCHIVE_READ_MAGIC, ARCHIVE_STATE_NEW, + "archive_read_open"); if (client_reader == NULL) __archive_errx(1, "No reader function provided to archive_read_open"); - /* - * Set these NULL initially. If the open or initial read fails, - * we'll leave them NULL to indicate that the file is invalid. - * (In particular, this helps ensure that the closer doesn't - * get called more than once.) - */ - a->client_opener = NULL; - a->client_reader = NULL; - a->client_skipper = NULL; - a->client_closer = NULL; - a->client_data = NULL; - /* Open data source. */ if (client_opener != NULL) { e =(client_opener)(&a->archive, client_data); @@ -149,129 +171,103 @@ archive_read_open2(struct archive *_a, v } } - /* Read first block now for compress format detection. */ - bytes_read = (client_reader)(&a->archive, client_data, &buffer); - - if (bytes_read < 0) { - /* If the first read fails, close before returning error. */ - if (client_closer) - (client_closer)(&a->archive, client_data); - /* client_reader should have already set error information. */ - return (ARCHIVE_FATAL); - } + /* Save the client functions and mock up the initial source. */ + a->client.opener = client_opener; /* Do we need to remember this? */ + a->client.reader = client_reader; + a->client.skipper = client_skipper; + a->client.closer = client_closer; + a->client.data = client_data; - /* Now that the client callbacks have worked, remember them. */ - a->client_opener = client_opener; /* Do we need to remember this? */ - a->client_reader = client_reader; - a->client_skipper = client_skipper; - a->client_closer = client_closer; - a->client_data = client_data; - - /* Select a decompression routine. */ - choose_decompressor(a, buffer, (size_t)bytes_read); - if (a->decompressor == NULL) - return (ARCHIVE_FATAL); + { + struct archive_read_source *source; - /* Initialize decompression routine with the first block of data. */ - e = (a->decompressor->init)(a, buffer, (size_t)bytes_read); + source = calloc(1, sizeof(*source)); + if (source == NULL) + return (ARCHIVE_FATAL); + source->reader = NULL; + source->upstream = NULL; + source->archive = a; + source->data = client_data; + source->read = client_read_proxy; + source->skip = client_skip_proxy; + source->close = client_close_proxy; + a->source = source; + } + + /* In case there's no filter. */ + a->archive.compression_code = ARCHIVE_COMPRESSION_NONE; + a->archive.compression_name = "none"; + /* Build out the input pipeline. */ + e = build_stream(a); if (e == ARCHIVE_OK) a->archive.state = ARCHIVE_STATE_HEADER; - /* - * If the decompressor didn't register a skip function, provide a - * dummy compression-layer skip function. - */ - if (a->decompressor->skip == NULL) - a->decompressor->skip = dummy_skip; - return (e); } /* - * Allow each registered decompression routine to bid on whether it - * wants to handle this stream. Return index of winning bidder. + * Allow each registered stream transform to bid on whether + * it wants to handle this stream. Repeat until we've finished + * building the pipeline. */ -static void -choose_decompressor(struct archive_read *a, - const void *buffer, size_t bytes_read) +static int +build_stream(struct archive_read *a) { - int decompression_slots, i, bid, best_bid; - struct decompressor_t *decompressor, *best_decompressor; + int number_readers, i, bid, best_bid; + struct archive_reader *reader, *best_reader; + struct archive_read_source *source; + const void *block; + ssize_t bytes_read; - decompression_slots = sizeof(a->decompressors) / - sizeof(a->decompressors[0]); + /* Read first block now for compress format detection. */ + bytes_read = (a->source->read)(a->source, &block); + if (bytes_read < 0) { + /* If the first read fails, close before returning error. */ + if (a->source->close != NULL) { + (a->source->close)(a->source); + a->source = NULL; + } + /* source->read should have already set error information. */ + return (ARCHIVE_FATAL); + } + + number_readers = sizeof(a->readers) / sizeof(a->readers[0]); best_bid = 0; - a->decompressor = NULL; - best_decompressor = NULL; + best_reader = NULL; - decompressor = a->decompressors; - for (i = 0; i < decompression_slots; i++) { - if (decompressor->bid) { - bid = (decompressor->bid)(buffer, bytes_read); - if (bid > best_bid || best_decompressor == NULL) { + reader = a->readers; + for (i = 0, reader = a->readers; i < number_readers; i++, reader++) { + if (reader->bid != NULL) { + bid = (reader->bid)(reader, block, bytes_read); + if (bid > best_bid) { best_bid = bid; - best_decompressor = decompressor; + best_reader = reader; } } - decompressor ++; } /* - * There were no bidders; this is a serious programmer error - * and demands a quick and definitive abort. - */ - if (best_decompressor == NULL) - __archive_errx(1, "No decompressors were registered; you " - "must call at least one " - "archive_read_support_compression_XXX function in order " - "to successfully read an archive."); - - /* - * There were bidders, but no non-zero bids; this means we can't - * support this stream. + * If we have a winner, it becomes the next stage in the pipeline. */ - if (best_bid < 1) { - archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT, - "Unrecognized archive format"); - return; - } - - /* Record the best decompressor for this stream. */ - a->decompressor = best_decompressor; -} - -/* - * Dummy skip function, for use if the compression layer doesn't provide - * one: This code just reads data and discards it. - */ -static off_t -dummy_skip(struct archive_read * a, off_t request) -{ - const void * dummy_buffer; - ssize_t bytes_read; - off_t bytes_skipped; - - for (bytes_skipped = 0; request > 0;) { - bytes_read = (a->decompressor->read_ahead)(a, &dummy_buffer, 1); - if (bytes_read < 0) - return (bytes_read); - if (bytes_read == 0) { - /* Premature EOF. */ - archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC, - "Truncated input file (need to skip %jd bytes)", - (intmax_t)request); + if (best_reader != NULL) { + source = (best_reader->init)(a, best_reader, a->source, + block, bytes_read); + if (source == NULL) return (ARCHIVE_FATAL); - } - if (bytes_read > request) - bytes_read = (ssize_t)request; - (a->decompressor->consume)(a, (size_t)bytes_read); - request -= bytes_read; - bytes_skipped += bytes_read; + /* Record the best decompressor for this stream. */ + a->source = source; + /* Recurse to get next pipeline stage. */ + return (build_stream(a)); } - return (bytes_skipped); + /* Save first block of data. */ + a->client_buff = block; + a->client_total = bytes_read; + a->client_next = a->client_buff; + a->client_avail = a->client_total; + return (ARCHIVE_OK); } /* @@ -598,23 +594,24 @@ archive_read_close(struct archive *_a) /* TODO: Clean up the formatters. */ - /* Clean up the decompressors. */ - n = sizeof(a->decompressors)/sizeof(a->decompressors[0]); + /* Clean up the stream pipeline. */ + if (a->source != NULL) { + r1 = (a->source->close)(a->source); + if (r1 < r) + r = r1; + a->source = NULL; + } + + /* Release the reader objects. */ + n = sizeof(a->readers)/sizeof(a->readers[0]); for (i = 0; i < n; i++) { - if (a->decompressors[i].finish != NULL) { - r1 = (a->decompressors[i].finish)(a); + if (a->readers[i].free != NULL) { + r1 = (a->readers[i].free)(&a->readers[i]); if (r1 < r) r = r1; } } - /* Close the client stream. */ - if (a->client_closer != NULL) { - r1 = ((a->client_closer)(&a->archive, a->client_data)); - if (r1 < r) - r = r1; - } - return (r); } @@ -651,6 +648,7 @@ archive_read_finish(struct archive *_a) if (a->entry) archive_entry_free(a->entry); a->archive.magic = 0; + free(a->buffer); free(a); #if ARCHIVE_API_VERSION > 1 return (r); @@ -700,40 +698,350 @@ __archive_read_register_format(struct ar * Used internally by decompression routines to register their bid and * initialization functions. */ -struct decompressor_t * -__archive_read_register_compression(struct archive_read *a, - int (*bid)(const void *, size_t), - int (*init)(struct archive_read *, const void *, size_t)) +struct archive_reader * +__archive_read_get_reader(struct archive_read *a) { int i, number_slots; __archive_check_magic(&a->archive, ARCHIVE_READ_MAGIC, ARCHIVE_STATE_NEW, - "__archive_read_register_compression"); + "__archive_read_get_reader"); - number_slots = sizeof(a->decompressors) / sizeof(a->decompressors[0]); + number_slots = sizeof(a->readers) / sizeof(a->readers[0]); for (i = 0; i < number_slots; i++) { - if (a->decompressors[i].bid == bid) - return (a->decompressors + i); - if (a->decompressors[i].bid == NULL) { - a->decompressors[i].bid = bid; - a->decompressors[i].init = init; - return (a->decompressors + i); - } + if (a->readers[i].bid == NULL) + return (a->readers + i); } __archive_errx(1, "Not enough slots for compression registration"); return (NULL); /* Never actually executed. */ } -/* used internally to simplify read-ahead */ +/* + * The next three functions comprise the peek/consume internal I/O + * system used by archive format readers. This system allows fairly + * flexible read-ahead and allows the I/O code to operate in a + * zero-copy manner most of the time. + * + * In the ideal case, block providers give the I/O code blocks of data + * and __archive_read_ahead() just returns pointers directly into + * those blocks. Then __archive_read_consume() just bumps those + * pointers. Only if your request would span blocks does the I/O + * layer use a copy buffer to provide you with a contiguous block of + * data. The __archive_read_skip() is an optimization; it scans ahead + * very quickly (it usually translates into a seek() operation if + * you're reading uncompressed disk files). + * + * A couple of useful idioms: + * * "I just want some data." Ask for 1 byte and pay attention to + * the "number of bytes available" from __archive_read_ahead(). + * You can consume more than you asked for; you just can't consume + * more than is available right now. If you consume everything that's + * immediately available, the next read_ahead() call will pull + * the next block. + * * "I want to output a large block of data." As above, ask for 1 byte, + * emit all that's available (up to whatever limit you have), then + * repeat until you're done. + * * "I want to peek ahead by a large amount." Ask for 4k or so, then + * double and repeat until you get an error or have enough. Note + * that the I/O layer will likely end up expanding its copy buffer + * to fit your request, so use this technique cautiously. This + * technique is used, for example, by some of the format tasting + * code that has uncertain look-ahead needs. + * + * TODO: Someday, provide a more generic __archive_read_seek() for + * those cases where it's useful. This is tricky because there are lots + * of cases where seek() is not available (reading gzip data from a + * network socket, for instance), so there needs to be a good way to + * communicate whether seek() is available and users of that interface + * need to use non-seeking strategies whenever seek() is not available. + */ + +/* + * Looks ahead in the input stream: + * * If 'avail' pointer is provided, that returns number of bytes available + * in the current buffer, which may be much larger than requested. + * * If end-of-file, *avail gets set to zero. + * * If error, *avail gets error code. + * * If request can be met, returns pointer to data, returns NULL + * if request is not met. + * + * Note: If you just want "some data", ask for 1 byte and pay attention + * to *avail, which will have the actual amount available. If you + * know exactly how many bytes you need, just ask for that and treat + * a NULL return as an error. + * + * Important: This does NOT move the file pointer. See + * __archive_read_consume() below. + */ + +/* + * This is tricky. We need to provide our clients with pointers to + * contiguous blocks of memory but we want to avoid copying whenever + * possible. + * + * Mostly, this code returns pointers directly into the block of data + * provided by the client_read routine. It can do this unless the + * request would split across blocks. In that case, we have to copy + * into an internal buffer to combine reads. + */ const void * -__archive_read_ahead(struct archive_read *a, size_t len) +__archive_read_ahead(struct archive_read *a, size_t min, ssize_t *avail) { - const void *h; + ssize_t bytes_read; + size_t tocopy; - if ((a->decompressor->read_ahead)(a, &h, len) < (ssize_t)len) + if (a->fatal) { + if (avail) + *avail = ARCHIVE_FATAL; return (NULL); - return (h); + } + + /* + * Keep pulling more data until we can satisfy the request. + */ + for (;;) { + + /* + * If we can satisfy from the copy buffer, we're done. + */ + if (a->avail >= min) { + if (avail != NULL) + *avail = a->avail; + return (a->next); + } + + /* + * We can satisfy directly from client buffer if everything + * currently in the copy buffer is still in the client buffer. + */ + if (a->client_total >= a->client_avail + a->avail + && a->client_avail + a->avail >= min) { + /* "Roll back" to client buffer. */ + a->client_avail += a->avail; + a->client_next -= a->avail; + /* Copy buffer is now empty. */ + a->avail = 0; + a->next = a->buffer; + /* Return data from client buffer. */ + if (avail != NULL) + *avail = a->client_avail; + return (a->client_next); + } + + /* Move data forward in copy buffer if necessary. */ + if (a->next > a->buffer && + a->next + min > a->buffer + a->buffer_size) { + if (a->avail > 0) + memmove(a->buffer, a->next, a->avail); + a->next = a->buffer; + } + + /* If we've used up the client data, get more. */ + if (a->client_avail <= 0) { + if (a->end_of_file) { + if (avail != NULL) + *avail = 0; + return (NULL); + } + bytes_read = (a->source->read)(a->source, + &a->client_buff); + if (bytes_read < 0) { /* Read error. */ + a->client_total = a->client_avail = 0; + a->client_next = a->client_buff = NULL; + a->fatal = 1; + if (avail != NULL) + *avail = ARCHIVE_FATAL; + return (NULL); + } + if (bytes_read == 0) { /* Premature end-of-file. */ + a->client_total = a->client_avail = 0; + a->client_next = a->client_buff = NULL; + a->end_of_file = 1; + /* Return whatever we do have. */ + if (avail != NULL) + *avail = a->avail; + return (NULL); + } + a->archive.raw_position += bytes_read; + a->client_total = bytes_read; + a->client_avail = a->client_total; + a->client_next = a->client_buff; + } + else + { + /* + * We can't satisfy the request from the copy + * buffer or the existing client data, so we + * need to copy more client data over to the + * copy buffer. + */ + + /* Ensure the buffer is big enough. */ + if (min > a->buffer_size) { + size_t s, t; + char *p; + + /* Double the buffer; watch for overflow. */ + s = t = a->buffer_size; + while (s < min) { + t *= 2; + if (t <= s) { /* Integer overflow! */ + archive_set_error(&a->archive, + ENOMEM, + "Unable to allocate copy buffer"); + a->fatal = 1; + if (avail != NULL) + *avail = ARCHIVE_FATAL; + return (NULL); + } + s = t; + } + /* Now s >= min, so allocate a new buffer. */ + p = (char *)malloc(s); + if (p == NULL) { + archive_set_error(&a->archive, ENOMEM, + "Unable to allocate copy buffer"); + a->fatal = 1; + if (avail != NULL) + *avail = ARCHIVE_FATAL; + return (NULL); + } + /* Move data into newly-enlarged buffer. */ + if (a->avail > 0) + memmove(p, a->next, a->avail); + free(a->buffer); + a->next = a->buffer = p; + a->buffer_size = s; + } + + /* We can add client data to copy buffer. */ + /* First estimate: copy to fill rest of buffer. */ + tocopy = (a->buffer + a->buffer_size) + - (a->next + a->avail); + /* Don't waste time buffering more than we need to. */ + if (tocopy + a->avail > min) + tocopy = min - a->avail; + /* Don't copy more than is available. */ + if (tocopy > a->client_avail) + tocopy = a->client_avail; + + memcpy(a->next + a->avail, a->client_next, + tocopy); + /* Remove this data from client buffer. */ + a->client_next += tocopy; + a->client_avail -= tocopy; + /* add it to copy buffer. */ + a->avail += tocopy; + } + } +} + +/* + * Move the file pointer forward. This should be called after + * __archive_read_ahead() returns data to you. Don't try to move + * ahead by more than the amount of data available according to + * __archive_read_ahead(). + */ +/* + * Mark the appropriate data as used. Note that the request here will + * often be much smaller than the size of the previous read_ahead + * request. + */ +ssize_t +__archive_read_consume(struct archive_read *a, size_t request) +{ + if (a->avail > 0) { + /* Read came from copy buffer. */ + a->next += request; + a->avail -= request; + } else { + /* Read came from client buffer. */ + a->client_next += request; + a->client_avail -= request; + } + a->archive.file_position += request; + return (request); +} + +/* + * Move the file pointer ahead by an arbitrary amount. If you're + * reading uncompressed data from a disk file, this will actually + * translate into a seek() operation. Even in cases where seek() + * isn't feasible, this at least pushes the read-and-discard loop + * down closer to the data source. + */ +int64_t +__archive_read_skip(struct archive_read *a, int64_t request) +{ + off_t bytes_skipped, total_bytes_skipped = 0; + size_t min; + + if (a->fatal) + return (-1); + /* + * If there is data in the buffers already, use that first. + */ + if (a->avail > 0) { + min = minimum(request, (off_t)a->avail); + bytes_skipped = __archive_read_consume(a, min); + request -= bytes_skipped; + total_bytes_skipped += bytes_skipped; + } + if (a->client_avail > 0) { + min = minimum(request, (off_t)a->client_avail); + bytes_skipped = __archive_read_consume(a, min); + request -= bytes_skipped; + total_bytes_skipped += bytes_skipped; + } + if (request == 0) + return (total_bytes_skipped); + /* + * If a client_skipper was provided, try that first. + */ +#if ARCHIVE_API_VERSION < 2 + if ((a->source->skip != NULL) && (request < SSIZE_MAX)) { +#else + if (a->source->skip != NULL) { +#endif + bytes_skipped = (a->source->skip)(a->source, request); + if (bytes_skipped < 0) { /* error */ + a->client_total = a->client_avail = 0; + a->client_next = a->client_buff = NULL; + a->fatal = 1; + return (bytes_skipped); + } + total_bytes_skipped += bytes_skipped; + a->archive.file_position += bytes_skipped; + request -= bytes_skipped; + a->client_next = a->client_buff; + a->archive.raw_position += bytes_skipped; + a->client_avail = a->client_total = 0; + } + /* + * Note that client_skipper will usually not satisfy the + * full request (due to low-level blocking concerns), + * so even if client_skipper is provided, we may still + * have to use ordinary reads to finish out the request. + */ + while (request > 0) { + const void* dummy_buffer; + ssize_t bytes_read; + dummy_buffer = __archive_read_ahead(a, 1, &bytes_read); + if (bytes_read < 0) + return (bytes_read); + if (bytes_read == 0) { + /* We hit EOF before we satisfied the skip request. */ + archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC, + "Truncated input file (need to skip %jd bytes)", + (intmax_t)request); + return (ARCHIVE_FATAL); + } + min = (size_t)(minimum(bytes_read, request)); + bytes_read = __archive_read_consume(a, min); + total_bytes_skipped += bytes_read; + request -= bytes_read; + } + return (total_bytes_skipped); } Modified: head/lib/libarchive/archive_read_private.h ============================================================================== --- head/lib/libarchive/archive_read_private.h Sat Dec 6 06:23:37 2008 (r185678) +++ head/lib/libarchive/archive_read_private.h Sat Dec 6 06:45:15 2008 (r185679) @@ -32,6 +32,75 @@ #include "archive_string.h" #include "archive_private.h" +struct archive_read; +struct archive_reader; +struct archive_read_source; + +/* + * A "reader" knows how to provide blocks. That can include something + * that reads blocks from disk or socket or a transformation layer + * that reads blocks from another source and transforms them. This + * includes decompression and decryption filters. + * + * How bidding works: + * * The bid manager reads the first block from the current source. + * * It shows that block to each registered bidder. + * * The winning bidder is initialized (with the block and information + * about the source) + * * The winning bidder becomes the new source and the process repeats + * This ends only when no reader provides a non-zero bid. + */ +struct archive_reader { + /* Configuration data for the reader. */ + void *data; + /* Bidder is handed the initial block from its source. */ + int (*bid)(struct archive_reader *, const void *buff, size_t); + /* Init() is given the archive, upstream source, and the initial + * block above. It returns a populated source structure. */ + struct archive_read_source *(*init)(struct archive_read *, + struct archive_reader *, struct archive_read_source *source, + const void *, size_t); + /* Release the reader and any configuration data it allocated. */ + int (*free)(struct archive_reader *); +}; + +/* + * A "source" is an instance of a reader. This structure is + * allocated and initialized by the init() method of a reader + * above. + */ +struct archive_read_source { + /* Essentially all sources will need these values, so + * just declare them here. */ + struct archive_reader *reader; /* Reader that I'm an instance of. */ + struct archive_read_source *upstream; /* Who I get blocks from. */ + struct archive_read *archive; /* associated archive. */ + /* Return next block. */ + ssize_t (*read)(struct archive_read_source *, const void **); + /* Skip forward this many bytes. */ + int64_t (*skip)(struct archive_read_source *self, int64_t request); + /* Close (recursively) and free(self). */ + int (*close)(struct archive_read_source *self); + /* My private data. */ + void *data; +}; + +/* + * The client source is almost the same as an internal source. + * + * TODO: Make archive_read_source and archive_read_client identical so + * that users of the library can easily register their own + * transformation filters. This will probably break the API/ABI and + * so should be deferred until libarchive 3.0. + */ +struct archive_read_client { + archive_open_callback *opener; + archive_read_callback *reader; + archive_skip_callback *skipper; + archive_close_callback *closer; + void *data; +}; + struct archive_read { struct archive archive; @@ -50,46 +119,30 @@ struct archive_read { off_t read_data_output_offset; size_t read_data_remaining; - /* Callbacks to open/read/write/close archive stream. */ - archive_open_callback *client_opener; - archive_read_callback *client_reader; - archive_skip_callback *client_skipper; - archive_close_callback *client_closer; - void *client_data; + /* Callbacks to open/read/write/close client archive stream. */ + struct archive_read_client client; + + /* Registered readers. */ + struct archive_reader readers[8]; + + /* Source */ + struct archive_read_source *source; /* File offset of beginning of most recently-read header. */ off_t header_position; - /* - * Decompressors have a very specific lifecycle: - * public setup function initializes a slot in this table - * 'config' holds minimal configuration data - * bid() examines a block of data and returns a bid [1] - * init() is called for successful bidder - * 'data' is initialized by init() - * read() returns a pointer to the next block of data - * consume() indicates how much data is used - * skip() ignores bytes of data - * finish() cleans up and frees 'data' and 'config' - * - * [1] General guideline: bid the number of bits that you actually - * test, e.g., 16 if you test a 2-byte magic value. - */ - struct decompressor_t { - void *config; - void *data; - int (*bid)(const void *buff, size_t); - int (*init)(struct archive_read *, - const void *buff, size_t); - int (*finish)(struct archive_read *); - ssize_t (*read_ahead)(struct archive_read *, - const void **, size_t); - ssize_t (*consume)(struct archive_read *, size_t); - off_t (*skip)(struct archive_read *, off_t); - } decompressors[4]; - /* Pointer to current decompressor. */ - struct decompressor_t *decompressor; + /* Used by reblocking logic. */ + char *buffer; + size_t buffer_size; + char *next; /* Current read location. */ + size_t avail; /* Bytes in my buffer. */ + const void *client_buff; /* Client buffer information. */ + size_t client_total; + const char *client_next; + size_t client_avail; + char end_of_file; + char fatal; /* * Format detection is mostly the same as compression @@ -124,12 +177,13 @@ int __archive_read_register_format(struc int (*read_data_skip)(struct archive_read *), int (*cleanup)(struct archive_read *)); -struct decompressor_t - *__archive_read_register_compression(struct archive_read *a, - int (*bid)(const void *, size_t), - int (*init)(struct archive_read *, const void *, size_t)); +struct archive_reader + *__archive_read_get_reader(struct archive_read *a); const void - *__archive_read_ahead(struct archive_read *, size_t); - + *__archive_read_ahead(struct archive_read *, size_t, ssize_t *); +ssize_t + __archive_read_consume(struct archive_read *, size_t); +int64_t + __archive_read_skip(struct archive_read *, int64_t); #endif Modified: head/lib/libarchive/archive_read_support_compression_all.c ============================================================================== --- head/lib/libarchive/archive_read_support_compression_all.c Sat Dec 6 06:23:37 2008 (r185678) +++ head/lib/libarchive/archive_read_support_compression_all.c Sat Dec 6 06:45:15 2008 (r185679) @@ -39,5 +39,11 @@ archive_read_support_compression_all(str #if HAVE_ZLIB_H archive_read_support_compression_gzip(a); #endif +#if HAVE_LZMADEC_H + /* LZMA bidding is subject to false positives because + * the LZMA file format has a very weak signature. It + * may not be feasible to include LZMA detection here. */ + /* archive_read_support_compression_lzma(a); */ +#endif return (ARCHIVE_OK); } Modified: head/lib/libarchive/archive_read_support_compression_bzip2.c ============================================================================== --- head/lib/libarchive/archive_read_support_compression_bzip2.c Sat Dec 6 06:23:37 2008 (r185678) +++ head/lib/libarchive/archive_read_support_compression_bzip2.c Sat Dec 6 06:45:15 2008 (r185679) @@ -51,30 +51,49 @@ __FBSDID("$FreeBSD$"); #if HAVE_BZLIB_H struct private_data { bz_stream stream; - char *uncompressed_buffer; - size_t uncompressed_buffer_size; - char *read_next; - int64_t total_out; + char *out_block; + size_t out_block_size; + char valid; /* True = decompressor is initialized */ char eof; /* True = found end of compressed data. */ }; -static int finish(struct archive_read *); -static ssize_t read_ahead(struct archive_read *, const void **, size_t); -static ssize_t read_consume(struct archive_read *, size_t); -static int drive_decompressor(struct archive_read *a, struct private_data *); +/* Bzip2 source */ +static ssize_t bzip2_source_read(struct archive_read_source *, const void **); +static int bzip2_source_close(struct archive_read_source *); #endif -/* These two functions are defined even if we lack the library. See below. */ -static int bid(const void *, size_t); -static int init(struct archive_read *, const void *, size_t); +/* + * Note that we can detect bzip2 archives even if we can't decompress + * them. (In fact, we like detecting them because we can give better + * error messages.) So the bid framework here gets compiled even + * if bzlib is unavailable. + */ +static int bzip2_reader_bid(struct archive_reader *, const void *, size_t); +static struct archive_read_source *bzip2_reader_init(struct archive_read *, + struct archive_reader *, struct archive_read_source *, + const void *, size_t); +static int bzip2_reader_free(struct archive_reader *); int archive_read_support_compression_bzip2(struct archive *_a) { struct archive_read *a = (struct archive_read *)_a; - if (__archive_read_register_compression(a, bid, init) != NULL) - return (ARCHIVE_OK); - return (ARCHIVE_FATAL); + struct archive_reader *reader = __archive_read_get_reader(a); + + if (reader == NULL) + return (ARCHIVE_FATAL); + + reader->data = NULL; + reader->bid = bzip2_reader_bid; + reader->init = bzip2_reader_init; + reader->free = bzip2_reader_free; + return (ARCHIVE_OK); +} + +static int +bzip2_reader_free(struct archive_reader *self){ + (void)self; /* UNUSED */ + return (ARCHIVE_OK); } /* @@ -85,11 +104,13 @@ archive_read_support_compression_bzip2(s * from verifying as much as we would like. */ static int -bid(const void *buff, size_t len) +bzip2_reader_bid(struct archive_reader *self, const void *buff, size_t len) { const unsigned char *buffer; int bits_checked; + (void)self; /* UNUSED */ + if (len < 1) return (0); @@ -150,16 +171,19 @@ bid(const void *buff, size_t len) * decompression. We can, however, still detect compressed archives * and emit a useful message. */ -static int -init(struct archive_read *a, const void *buff, size_t n) +static struct archive_read_source * +bzip2_reader_init(struct archive_read *a, struct archive_reader *reader, + struct archive_read_source *upstream, const void *buff, size_t n) { (void)a; /* UNUSED */ + (void)reader; /* UNUSED */ + (void)upstream; /* UNUSED */ (void)buff; /* UNUSED */ (void)n; /* UNUSED */ archive_set_error(&a->archive, -1, "This version of libarchive was compiled without bzip2 support"); - return (ARCHIVE_FATAL); + return (NULL); } @@ -168,37 +192,42 @@ init(struct archive_read *a, const void /* * Setup the callbacks. */ -static int -init(struct archive_read *a, const void *buff, size_t n) *** DIFF OUTPUT TRUNCATED AT 1000 LINES ***