Date: Thu, 23 Dec 2010 14:43:12 +0200 From: Alex Kozlov <spam@rm-rf.kiev.ua> To: Jeremy Chadwick <freebsd@jdc.parodius.com>, freebsd-stable@freebsd.org, spam@rm-rf.kiev.ua Subject: Re: 8.2-PRERELEASE: bsdtar does not recognise xz -z9 compression Message-ID: <20101223124312.GA10405@ravenloft.kiev.ua>
next in thread | raw e-mail | index | archive | help
--zhXaljGHf11kAtnf Content-Type: text/plain; charset=us-ascii Content-Disposition: inline Hi, stable Possible fix for this issue. (MFC r201167): -- Adios --zhXaljGHf11kAtnf Content-Type: text/plain; charset=us-ascii Content-Disposition: attachment; filename="patch.txt" Index: lib/libarchive/archive_read_support_compression_xz.c @@ -48,6 +48,7 @@ #endif #include "archive.h" +#include "archive_endian.h" #include "archive_private.h" #include "archive_read_private.h" @@ -205,37 +206,100 @@ { const unsigned char *buffer; ssize_t avail; + uint32_t dicsize; + uint64_t uncompressed_size; int bits_checked; (void)self; /* UNUSED */ - buffer = __archive_read_filter_ahead(filter, 6, &avail); + buffer = __archive_read_filter_ahead(filter, 14, &avail); if (buffer == NULL) return (0); - /* First byte of raw LZMA stream is always 0x5d. */ + /* First byte of raw LZMA stream is commonly 0x5d. + * The first byte is a special number, which consists of + * three parameters of LZMA compression, a number of literal + * context bits(which is from 0 to 8, default is 3), a number + * of literal pos bits(which is from 0 to 4, default is 0), + * a number of pos bits(which is from 0 to 4, default is 2). + * The first byte is made by + * (pos bits * 5 + literal pos bit) * 9 + * literal contest bit, + * and so the default value in this field is + * (2 * 5 + 0) * 9 + 3 = 0x5d. + * lzma of LZMA SDK has options to change those parameters. + * It means a range of this field is from 0 to 224. And lzma of + * XZ Utils with option -e records 0x5e in this field. */ + /* NOTE: If this checking of the first byte increases false + * recognition, we should allow only 0x5d and 0x5e for the first + * byte of LZMA stream. */ bits_checked = 0; - if (buffer[0] != 0x5d) - return (0); - bits_checked += 8; - - /* Second through fifth bytes are dictionary code, stored in - * little-endian order. The two least-significant bytes are - * always zero. */ - if (buffer[1] != 0 || buffer[2] != 0) + if (buffer[0] > (4 * 5 + 4) * 9 + 8) return (0); - bits_checked += 16; - - /* ??? TODO: Fix this. ??? */ - /* NSIS format check uses this, but I've seen tar.lzma - * archives where this byte is 0xff, not 0. Can it - * ever be anything other than 0 or 0xff? - */ -#if 0 - if (buffer[5] != 0) + /* Most likely value in the first byte of LZMA stream. */ + if (buffer[0] == 0x5d || buffer[0] == 0x5e) + bits_checked += 8; + + /* Sixth through fourteenth bytes are uncompressed size, + * stored in little-endian order. `-1' means uncompressed + * size is unknown and lzma of XZ Utils always records `-1' + * in this field. */ + uncompressed_size = archive_le64dec(buffer+5); + if (uncompressed_size == (uint64_t)ARCHIVE_LITERAL_LL(-1)) + bits_checked += 64; + + /* Second through fifth bytes are dictionary size, stored in + * little-endian order. The minimum dictionary size is + * 1 << 12(4KiB) which the lzma of LZMA SDK uses with option + * -d12 and the maxinam dictionary size is 1 << 27(128MiB) + * which the one uses with option -d27. + * NOTE: A comment of LZMA SDK source code says this dictionary + * range is from 1 << 12 to 1 << 30. */ + dicsize = archive_le32dec(buffer+1); + switch (dicsize) { + case 0x00001000:/* lzma of LZMA SDK option -d12. */ + case 0x00002000:/* lzma of LZMA SDK option -d13. */ + case 0x00004000:/* lzma of LZMA SDK option -d14. */ + case 0x00008000:/* lzma of LZMA SDK option -d15. */ + case 0x00010000:/* lzma of XZ Utils option -0 and -1. + * lzma of LZMA SDK option -d16. */ + case 0x00020000:/* lzma of LZMA SDK option -d17. */ + case 0x00040000:/* lzma of LZMA SDK option -d18. */ + case 0x00080000:/* lzma of XZ Utils option -2. + * lzma of LZMA SDK option -d19. */ + case 0x00100000:/* lzma of XZ Utils option -3. + * lzma of LZMA SDK option -d20. */ + case 0x00200000:/* lzma of XZ Utils option -4. + * lzma of LZMA SDK option -d21. */ + case 0x00400000:/* lzma of XZ Utils option -5. + * lzma of LZMA SDK option -d22. */ + case 0x00800000:/* lzma of XZ Utils option -6. + * lzma of LZMA SDK option -d23. */ + case 0x01000000:/* lzma of XZ Utils option -7. + * lzma of LZMA SDK option -d24. */ + case 0x02000000:/* lzma of XZ Utils option -8. + * lzma of LZMA SDK option -d25. */ + case 0x04000000:/* lzma of XZ Utils option -9. + * lzma of LZMA SDK option -d26. */ + case 0x08000000:/* lzma of LZMA SDK option -d27. */ + bits_checked += 32; + break; + default: + /* If a memory usage for encoding was not enough on + * the platform where LZMA stream was made, lzma of + * XZ Utils automatically decreased the dictionary + * size to enough memory for encoding by 1Mi bytes + * (1 << 20).*/ + if (dicsize <= 0x03F00000 && dicsize >= 0x00300000 && + (dicsize & ((1 << 20)-1)) == 0 && + bits_checked == 8 + 64) { + bits_checked += 32; + break; + } + /* Otherwise dictionary size is unlikely. But it is + * possible that someone makes lzma stream with + * liblzma/LZMA SDK in one's dictionary size. */ return (0); - bits_checked += 8; -#endif + } /* TODO: The above test is still very weak. It would be * good to do better. */ @@ -304,11 +368,11 @@ */ if (self->code == ARCHIVE_COMPRESSION_XZ) ret = lzma_stream_decoder(&(state->stream), - (1U << 23) + (1U << 21),/* memlimit */ + (1U << 30),/* memlimit */ LZMA_CONCATENATED); else ret = lzma_alone_decoder(&(state->stream), - (1U << 23) + (1U << 21));/* memlimit */ + (1U << 30));/* memlimit */ if (ret == LZMA_OK) return (ARCHIVE_OK); --zhXaljGHf11kAtnf--
Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?20101223124312.GA10405>