Skip site navigation (1)Skip section navigation (2)
Date:      Thu, 23 Dec 2010 14:43:12 +0200
From:      Alex Kozlov <spam@rm-rf.kiev.ua>
To:        Jeremy Chadwick <freebsd@jdc.parodius.com>, freebsd-stable@freebsd.org, spam@rm-rf.kiev.ua
Subject:   Re: 8.2-PRERELEASE: bsdtar does not recognise xz -z9 compression
Message-ID:  <20101223124312.GA10405@ravenloft.kiev.ua>

next in thread | raw e-mail | index | archive | help

--zhXaljGHf11kAtnf
Content-Type: text/plain; charset=us-ascii
Content-Disposition: inline

Hi, stable

Possible fix for this issue. (MFC r201167):


--
Adios

--zhXaljGHf11kAtnf
Content-Type: text/plain; charset=us-ascii
Content-Disposition: attachment; filename="patch.txt"

Index: lib/libarchive/archive_read_support_compression_xz.c
@@ -48,6 +48,7 @@
 #endif
 
 #include "archive.h"
+#include "archive_endian.h"
 #include "archive_private.h"
 #include "archive_read_private.h"
 
@@ -205,37 +206,100 @@
 {
 	const unsigned char *buffer;
 	ssize_t avail;
+	uint32_t dicsize;
+	uint64_t uncompressed_size;
 	int bits_checked;
 
 	(void)self; /* UNUSED */
 
-	buffer = __archive_read_filter_ahead(filter, 6, &avail);
+	buffer = __archive_read_filter_ahead(filter, 14, &avail);
 	if (buffer == NULL)
 		return (0);
 
-	/* First byte of raw LZMA stream is always 0x5d. */
+	/* First byte of raw LZMA stream is commonly 0x5d.
+	 * The first byte is a special number, which consists of
+	 * three parameters of LZMA compression, a number of literal
+	 * context bits(which is from 0 to 8, default is 3), a number
+	 * of literal pos bits(which is from 0 to 4, default is 0),
+	 * a number of pos bits(which is from 0 to 4, default is 2).
+	 * The first byte is made by
+	 * (pos bits * 5 + literal pos bit) * 9 + * literal contest bit,
+	 * and so the default value in this field is
+	 * (2 * 5 + 0) * 9 + 3 = 0x5d.
+	 * lzma of LZMA SDK has options to change those parameters.
+	 * It means a range of this field is from 0 to 224. And lzma of
+	 * XZ Utils with option -e records 0x5e in this field. */
+	/* NOTE: If this checking of the first byte increases false
+	 * recognition, we should allow only 0x5d and 0x5e for the first
+	 * byte of LZMA stream. */
 	bits_checked = 0;
-	if (buffer[0] != 0x5d)
-		return (0);
-	bits_checked += 8;
-
-	/* Second through fifth bytes are dictionary code, stored in
-	 * little-endian order.  The two least-significant bytes are
-	 * always zero. */
-	if (buffer[1] != 0 || buffer[2] != 0)
+	if (buffer[0] > (4 * 5 + 4) * 9 + 8)
 		return (0);
-	bits_checked += 16;
-
-	/* ??? TODO:  Fix this. ??? */
-	/* NSIS format check uses this, but I've seen tar.lzma
-	 * archives where this byte is 0xff, not 0.  Can it
-	 * ever be anything other than 0 or 0xff?
-	 */
-#if 0
-	if (buffer[5] != 0)
+	/* Most likely value in the first byte of LZMA stream. */
+	if (buffer[0] == 0x5d || buffer[0] == 0x5e)
+		bits_checked += 8;
+
+	/* Sixth through fourteenth bytes are uncompressed size,
+	 * stored in little-endian order. `-1' means uncompressed
+	 * size is unknown and lzma of XZ Utils always records `-1'
+	 * in this field. */
+	uncompressed_size = archive_le64dec(buffer+5);
+	if (uncompressed_size == (uint64_t)ARCHIVE_LITERAL_LL(-1))
+		bits_checked += 64;
+
+	/* Second through fifth bytes are dictionary size, stored in
+	 * little-endian order. The minimum dictionary size is
+	 * 1 << 12(4KiB) which the lzma of LZMA SDK uses with option
+	 * -d12 and the maxinam dictionary size is 1 << 27(128MiB)
+	 * which the one uses with option -d27.
+	 * NOTE: A comment of LZMA SDK source code says this dictionary
+	 * range is from 1 << 12 to 1 << 30. */
+	dicsize = archive_le32dec(buffer+1);
+	switch (dicsize) {
+	case 0x00001000:/* lzma of LZMA SDK option -d12. */
+	case 0x00002000:/* lzma of LZMA SDK option -d13. */
+	case 0x00004000:/* lzma of LZMA SDK option -d14. */
+	case 0x00008000:/* lzma of LZMA SDK option -d15. */
+	case 0x00010000:/* lzma of XZ Utils option -0 and -1.
+			 * lzma of LZMA SDK option -d16. */
+	case 0x00020000:/* lzma of LZMA SDK option -d17. */
+	case 0x00040000:/* lzma of LZMA SDK option -d18. */
+	case 0x00080000:/* lzma of XZ Utils option -2.
+			 * lzma of LZMA SDK option -d19. */
+	case 0x00100000:/* lzma of XZ Utils option -3.
+			 * lzma of LZMA SDK option -d20. */
+	case 0x00200000:/* lzma of XZ Utils option -4.
+			 * lzma of LZMA SDK option -d21. */
+	case 0x00400000:/* lzma of XZ Utils option -5.
+			 * lzma of LZMA SDK option -d22. */
+	case 0x00800000:/* lzma of XZ Utils option -6.
+			 * lzma of LZMA SDK option -d23. */
+	case 0x01000000:/* lzma of XZ Utils option -7.
+			 * lzma of LZMA SDK option -d24. */
+	case 0x02000000:/* lzma of XZ Utils option -8.
+			 * lzma of LZMA SDK option -d25. */
+	case 0x04000000:/* lzma of XZ Utils option -9.
+			 * lzma of LZMA SDK option -d26. */
+	case 0x08000000:/* lzma of LZMA SDK option -d27. */
+		bits_checked += 32;
+		break;
+	default:
+		/* If a memory usage for encoding was not enough on
+		 * the platform where LZMA stream was made, lzma of
+		 * XZ Utils automatically decreased the dictionary
+		 * size to enough memory for encoding by 1Mi bytes
+		 * (1 << 20).*/
+		if (dicsize <= 0x03F00000 && dicsize >= 0x00300000 &&
+		    (dicsize & ((1 << 20)-1)) == 0 &&
+		    bits_checked == 8 + 64) {
+			bits_checked += 32;
+			break;
+		}
+		/* Otherwise dictionary size is unlikely. But it is
+		 * possible that someone makes lzma stream with
+		 * liblzma/LZMA SDK in one's dictionary size. */
 		return (0);
-	bits_checked += 8;
-#endif
+	}
 
 	/* TODO: The above test is still very weak.  It would be
 	 * good to do better. */
@@ -304,11 +368,11 @@
 	 */
 	if (self->code == ARCHIVE_COMPRESSION_XZ)
 		ret = lzma_stream_decoder(&(state->stream),
-		    (1U << 23) + (1U << 21),/* memlimit */
+		    (1U << 30),/* memlimit */
 		    LZMA_CONCATENATED);
 	else
 		ret = lzma_alone_decoder(&(state->stream),
-		    (1U << 23) + (1U << 21));/* memlimit */
+		    (1U << 30));/* memlimit */
 
 	if (ret == LZMA_OK)
 		return (ARCHIVE_OK);

--zhXaljGHf11kAtnf--



Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?20101223124312.GA10405>