Date: Mon, 21 Jan 2019 06:52:35 +0000 (UTC) From: Xin LI <delphij@FreeBSD.org> To: src-committers@freebsd.org, svn-src-all@freebsd.org, svn-src-stable@freebsd.org, svn-src-stable-11@freebsd.org Subject: svn commit: r343251 - stable/11/usr.bin/gzip Message-ID: <201901210652.x0L6qZI3045672@repo.freebsd.org>
next in thread | raw e-mail | index | archive | help
Author: delphij Date: Mon Jan 21 06:52:35 2019 New Revision: 343251 URL: https://svnweb.freebsd.org/changeset/base/343251 Log: MFC r342845,342846: Port NetBSD improvements: - Add -l support for xz files - Add lzip support to gzip based on the example lzip decoder. Obtained from: NetBSD Relnotes: yes Added: stable/11/usr.bin/gzip/unlz.c - copied unchanged from r342845, head/usr.bin/gzip/unlz.c Modified: stable/11/usr.bin/gzip/gzip.1 stable/11/usr.bin/gzip/gzip.c stable/11/usr.bin/gzip/unxz.c Directory Properties: stable/11/ (props changed) Modified: stable/11/usr.bin/gzip/gzip.1 ============================================================================== --- stable/11/usr.bin/gzip/gzip.1 Mon Jan 21 06:14:26 2019 (r343250) +++ stable/11/usr.bin/gzip/gzip.1 Mon Jan 21 06:52:35 2019 (r343251) @@ -1,4 +1,4 @@ -.\" $NetBSD: gzip.1,v 1.30 2017/10/22 17:36:49 abhinav Exp $ +.\" $NetBSD: gzip.1,v 1.31 2018/10/26 22:10:15 christos Exp $ .\" .\" Copyright (c) 1997, 2003, 2004, 2008, 2009, 2015, 2017 Matthew R. Green .\" All rights reserved. @@ -25,7 +25,7 @@ .\" SUCH DAMAGE. .\" .\" $FreeBSD$ -.Dd November 21, 2017 +.Dd January 7, 2019 .Dt GZIP 1 .Os .Sh NAME @@ -109,6 +109,7 @@ This version of is also capable of decompressing files compressed using .Xr compress 1 , .Xr bzip2 1 , +.Ar lzip , or .Xr xz 1 . .Sh OPTIONS @@ -224,7 +225,7 @@ This implementation of was ported based on the .Nx .Nm -version 20170803, +version 20181111, and first appeared in .Fx 7.0 . .Sh AUTHORS Modified: stable/11/usr.bin/gzip/gzip.c ============================================================================== --- stable/11/usr.bin/gzip/gzip.c Mon Jan 21 06:14:26 2019 (r343250) +++ stable/11/usr.bin/gzip/gzip.c Mon Jan 21 06:52:35 2019 (r343251) @@ -1,4 +1,4 @@ -/* $NetBSD: gzip.c,v 1.113 2018/06/12 00:42:17 kamil Exp $ */ +/* $NetBSD: gzip.c,v 1.116 2018/10/27 11:39:12 skrll Exp $ */ /*- * SPDX-License-Identifier: BSD-2-Clause-NetBSD @@ -84,6 +84,9 @@ enum filetype { #ifndef NO_XZ_SUPPORT FT_XZ, #endif +#ifndef NO_LZ_SUPPORT + FT_LZ, +#endif FT_LAST, FT_UNKNOWN }; @@ -110,6 +113,11 @@ enum filetype { #define XZ_MAGIC "\3757zXZ" #endif +#ifndef NO_LZ_SUPPORT +#define LZ_SUFFIX ".lz" +#define LZ_MAGIC "LZIP" +#endif + #define GZ_SUFFIX ".gz" #define BUFLEN (64 * 1024) @@ -155,6 +163,9 @@ static suffixes_t suffixes[] = { #ifndef NO_XZ_SUPPORT SUFFIX(XZ_SUFFIX, ""), #endif +#ifndef NO_LZ_SUPPORT + SUFFIX(LZ_SUFFIX, ""), +#endif SUFFIX(GZ_SUFFIX, ""), /* Overwritten by -S "" */ #endif /* SMALL */ #undef SUFFIX @@ -162,7 +173,7 @@ static suffixes_t suffixes[] = { #define NUM_SUFFIXES (nitems(suffixes)) #define SUFFIX_MAXLEN 30 -static const char gzip_version[] = "FreeBSD gzip 20171121"; +static const char gzip_version[] = "FreeBSD gzip 20190107"; #ifndef SMALL static const char gzip_copyright[] = \ @@ -246,6 +257,7 @@ static void display_license(void); static const suffixes_t *check_suffix(char *, int); static ssize_t read_retry(int, void *, size_t); static ssize_t write_retry(int, const void *, size_t); +static void print_list_out(off_t, off_t, const char*); #ifdef SMALL #define infile_set(f,t) infile_set(f) @@ -289,8 +301,13 @@ static off_t unpack(int, int, char *, size_t, off_t *) #ifndef NO_XZ_SUPPORT static off_t unxz(int, int, char *, size_t, off_t *); +static off_t unxz_len(int); #endif +#ifndef NO_LZ_SUPPORT +static off_t unlz(int, int, char *, size_t, off_t *); +#endif + #ifdef SMALL #define getopt_long(a,b,c,d,e) getopt(a,b,c) #else @@ -1159,6 +1176,11 @@ file_gettype(u_char *buf) return FT_XZ; else #endif +#ifndef NO_LZ_SUPPORT + if (memcmp(buf, LZ_MAGIC, 4) == 0) + return FT_LZ; + else +#endif return FT_UNKNOWN; } @@ -1633,14 +1655,23 @@ file_uncompress(char *file, char *outfile, size_t outs #ifndef NO_XZ_SUPPORT case FT_XZ: if (lflag) { - maybe_warnx("no -l with xz files"); - goto lose; + size = unxz_len(fd); + print_list_out(in_size, size, file); + return -1; } - size = unxz(fd, zfd, NULL, 0, NULL); break; #endif +#ifndef NO_LZ_SUPPORT + case FT_LZ: + if (lflag) { + maybe_warnx("no -l with lzip files"); + goto lose; + } + size = unlz(fd, zfd, NULL, 0, NULL); + break; +#endif #ifndef SMALL case FT_UNKNOWN: if (lflag) { @@ -1873,6 +1904,12 @@ handle_stdin(void) (char *)header1, sizeof header1, &gsize); break; #endif +#ifndef NO_LZ_SUPPORT + case FT_LZ: + usize = unlz(STDIN_FILENO, STDOUT_FILENO, + (char *)header1, sizeof header1, &gsize); + break; +#endif } #ifndef SMALL @@ -2198,6 +2235,12 @@ print_list(int fd, off_t out, const char *outfile, tim #else (void)&ts; /* XXX */ #endif + print_list_out(out, in, outfile); +} + +static void +print_list_out(off_t out, off_t in, const char *outfile) +{ printf("%12llu %12llu ", (unsigned long long)out, (unsigned long long)in); print_ratio(in, out, stdout); printf(" %s\n", outfile); @@ -2271,6 +2314,9 @@ display_version(void) #endif #ifndef NO_XZ_SUPPORT #include "unxz.c" +#endif +#ifndef NO_LZ_SUPPORT +#include "unlz.c" #endif static ssize_t Copied: stable/11/usr.bin/gzip/unlz.c (from r342845, head/usr.bin/gzip/unlz.c) ============================================================================== --- /dev/null 00:00:00 1970 (empty, because file is newly added) +++ stable/11/usr.bin/gzip/unlz.c Mon Jan 21 06:52:35 2019 (r343251, copy of r342845, head/usr.bin/gzip/unlz.c) @@ -0,0 +1,646 @@ +/* $NetBSD: unlz.c,v 1.6 2018/11/11 01:42:36 christos Exp $ */ + +/*- + * Copyright (c) 2018 The NetBSD Foundation, Inc. + * All rights reserved. + * + * This code is derived from software contributed to The NetBSD Foundation + * by Christos Zoulas. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED + * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS + * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + * + * $FreeBSD$ + */ + +/* Lzd - Educational decompressor for the lzip format + Copyright (C) 2013-2018 Antonio Diaz Diaz. + + This program is free software. Redistribution and use in source and + binary forms, with or without modification, are permitted provided + that the following conditions are met: + + 1. Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + + 2. Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +*/ + +#include <sys/param.h> +#include <stdio.h> +#include <string.h> +#include <stdlib.h> +#include <stdint.h> +#include <stdbool.h> +#include <errno.h> +#include <unistd.h> + +#define LZ_STATES 12 + +#define LITERAL_CONTEXT_BITS 3 +#define POS_STATE_BITS 2 +#define POS_STATES (1 << POS_STATE_BITS) +#define POS_STATE_MASK (POS_STATES - 1) + +#define STATES 4 +#define DIS_SLOT_BITS 6 + +#define DIS_MODEL_START 4 +#define DIS_MODEL_END 14 + +#define MODELED_DISTANCES (1 << (DIS_MODEL_END / 2)) +#define DIS_ALIGN_BITS 4 +#define DIS_ALIGN_SIZE (1 << DIS_ALIGN_BITS) + +#define LOW_BITS 3 +#define MID_BITS 3 +#define HIGH_BITS 8 + +#define LOW_SYMBOLS (1 << LOW_BITS) +#define MID_SYMBOLS (1 << MID_BITS) +#define HIGH_SYMBOLS (1 << HIGH_BITS) + +#define MAX_SYMBOLS (LOW_SYMBOLS + MID_SYMBOLS + HIGH_SYMBOLS) + +#define MIN_MATCH_LEN 2 + +#define BIT_MODEL_MOVE_BITS 5 +#define BIT_MODEL_TOTAL_BITS 11 +#define BIT_MODEL_TOTAL (1 << BIT_MODEL_TOTAL_BITS) +#define BIT_MODEL_INIT (BIT_MODEL_TOTAL / 2) + +static const int lz_st_next[] = { + 0, 0, 0, 0, 1, 2, 3, 4, 5, 6, 4, 5, +}; + +static bool +lz_st_is_char(int st) { + return st < 7; +} + +static int +lz_st_get_char(int st) { + return lz_st_next[st]; +} + +static int +lz_st_get_match(int st) { + return st < 7 ? 7 : 10; +} + +static int +lz_st_get_rep(int st) { + return st < 7 ? 8 : 11; +} + +static int +lz_st_get_short_rep(int st) { + return st < 7 ? 9 : 11; +} + +struct lz_len_model { + int choice1; + int choice2; + int bm_low[POS_STATES][LOW_SYMBOLS]; + int bm_mid[POS_STATES][MID_SYMBOLS]; + int bm_high[HIGH_SYMBOLS]; +}; + +static uint32_t lz_crc[256]; + +static void +lz_crc_init(void) +{ + for (unsigned i = 0; i < nitems(lz_crc); i++) { + unsigned c = i; + for (unsigned j = 0; j < 8; j++) { + if (c & 1) + c = 0xEDB88320U ^ (c >> 1); + else + c >>= 1; + } + lz_crc[i] = c; + } +} + +static void +lz_crc_update(uint32_t *crc, const uint8_t *buf, size_t len) +{ + for (size_t i = 0; i < len; i++) + *crc = lz_crc[(*crc ^ buf[i]) & 0xFF] ^ (*crc >> 8); +} + +struct lz_range_decoder { + FILE *fp; + uint32_t code; + uint32_t range; +}; + +static int +lz_rd_create(struct lz_range_decoder *rd, FILE *fp) +{ + rd->fp = fp; + rd->code = 0; + rd->range = ~0; + for (int i = 0; i < 5; i++) + rd->code = (rd->code << 8) | (uint8_t)getc(rd->fp); + return ferror(rd->fp) ? -1 : 0; +} + +static unsigned +lz_rd_decode(struct lz_range_decoder *rd, int num_bits) +{ + unsigned symbol = 0; + + for (int i = num_bits; i > 0; i--) { + rd->range >>= 1; + symbol <<= 1; + if (rd->code >= rd->range) { + rd->code -= rd->range; + symbol |= 1; + } + if (rd->range <= 0x00FFFFFFU) { + rd->range <<= 8; + rd->code = (rd->code << 8) | (uint8_t)getc(rd->fp); + } + } + + return symbol; +} + +static unsigned +lz_rd_decode_bit(struct lz_range_decoder *rd, int *bm) +{ + unsigned symbol; + const uint32_t bound = (rd->range >> BIT_MODEL_TOTAL_BITS) * *bm; + + if(rd->code < bound) { + rd->range = bound; + *bm += (BIT_MODEL_TOTAL - *bm) >> BIT_MODEL_MOVE_BITS; + symbol = 0; + } + else { + rd->range -= bound; + rd->code -= bound; + *bm -= *bm >> BIT_MODEL_MOVE_BITS; + symbol = 1; + } + + if (rd->range <= 0x00FFFFFFU) { + rd->range <<= 8; + rd->code = (rd->code << 8) | (uint8_t)getc(rd->fp); + } + return symbol; +} + +static unsigned +lz_rd_decode_tree(struct lz_range_decoder *rd, int *bm, int num_bits) +{ + unsigned symbol = 1; + + for (int i = 0; i < num_bits; i++) + symbol = (symbol << 1) | lz_rd_decode_bit(rd, &bm[symbol]); + + return symbol - (1 << num_bits); +} + +static unsigned +lz_rd_decode_tree_reversed(struct lz_range_decoder *rd, int *bm, int num_bits) +{ + unsigned symbol = lz_rd_decode_tree(rd, bm, num_bits); + unsigned reversed_symbol = 0; + + for (int i = 0; i < num_bits; i++) { + reversed_symbol = (reversed_symbol << 1) | (symbol & 1); + symbol >>= 1; + } + + return reversed_symbol; +} + +static unsigned +lz_rd_decode_matched(struct lz_range_decoder *rd, int *bm, int match_byte) +{ + unsigned symbol = 1; + + for (int i = 7; i >= 0; i--) { + const unsigned match_bit = (match_byte >> i) & 1; + const unsigned bit = lz_rd_decode_bit(rd, + &bm[symbol + (match_bit << 8) + 0x100]); + symbol = (symbol << 1) | bit; + if (match_bit != bit) { + while (symbol < 0x100) { + symbol = (symbol << 1) | + lz_rd_decode_bit(rd, &bm[symbol]); + } + break; + } + } + return symbol & 0xFF; +} + +static unsigned +lz_rd_decode_len(struct lz_range_decoder *rd, struct lz_len_model *lm, + int pos_state) +{ + if (lz_rd_decode_bit(rd, &lm->choice1) == 0) + return lz_rd_decode_tree(rd, lm->bm_low[pos_state], LOW_BITS); + + if (lz_rd_decode_bit(rd, &lm->choice2) == 0) { + return LOW_SYMBOLS + + lz_rd_decode_tree(rd, lm->bm_mid[pos_state], MID_BITS); + } + + return LOW_SYMBOLS + MID_SYMBOLS + + lz_rd_decode_tree(rd, lm->bm_high, HIGH_BITS); +} + +struct lz_decoder { + FILE *fin, *fout; + off_t pos, ppos, spos, dict_size; + bool wrapped; + uint32_t crc; + uint8_t *obuf; + struct lz_range_decoder rdec; +}; + +static int +lz_flush(struct lz_decoder *lz) +{ + off_t offs = lz->pos - lz->spos; + if (offs <= 0) + return -1; + + size_t size = (size_t)offs; + lz_crc_update(&lz->crc, lz->obuf + lz->spos, size); + if (fwrite(lz->obuf + lz->spos, 1, size, lz->fout) != size) + return -1; + + lz->wrapped = lz->pos >= lz->dict_size; + if (lz->wrapped) { + lz->ppos += lz->pos; + lz->pos = 0; + } + lz->spos = lz->pos; + return 0; +} + +static void +lz_destroy(struct lz_decoder *lz) +{ + if (lz->fin) + fclose(lz->fin); + if (lz->fout) + fclose(lz->fout); + free(lz->obuf); +} + +static int +lz_create(struct lz_decoder *lz, int fin, int fdout, int dict_size) +{ + memset(lz, 0, sizeof(*lz)); + + lz->fin = fdopen(dup(fin), "r"); + if (lz->fin == NULL) + goto out; + + lz->fout = fdopen(dup(fdout), "w"); + if (lz->fout == NULL) + goto out; + + lz->pos = lz->ppos = lz->spos = 0; + lz->crc = ~0; + lz->dict_size = dict_size; + lz->wrapped = false; + + lz->obuf = malloc(dict_size); + if (lz->obuf == NULL) + goto out; + + if (lz_rd_create(&lz->rdec, lz->fin) == -1) + goto out; + return 0; +out: + lz_destroy(lz); + return -1; +} + +static uint8_t +lz_peek(const struct lz_decoder *lz, unsigned ahead) +{ + off_t diff = lz->pos - ahead - 1; + + if (diff >= 0) + return lz->obuf[diff]; + + if (lz->wrapped) + return lz->obuf[lz->dict_size + diff]; + + return 0; +} + +static void +lz_put(struct lz_decoder *lz, uint8_t b) +{ + lz->obuf[lz->pos++] = b; + if (lz->dict_size == lz->pos) + lz_flush(lz); +} + +static off_t +lz_get_data_position(const struct lz_decoder *lz) +{ + return lz->ppos + lz->pos; +} + +static unsigned +lz_get_crc(const struct lz_decoder *lz) +{ + return lz->crc ^ 0xffffffffU; +} + +static void +lz_bm_init(int *a, size_t l) +{ + for (size_t i = 0; i < l; i++) + a[i] = BIT_MODEL_INIT; +} + +#define LZ_BM_INIT(a) lz_bm_init(a, nitems(a)) +#define LZ_BM_INIT2(a) do { \ + size_t l = nitems(a[0]); \ + for (size_t i = 0; i < nitems(a); i++) \ + lz_bm_init(a[i], l); \ +} while (/*CONSTCOND*/0) + +#define LZ_MODEL_INIT(a) do { \ + a.choice1 = BIT_MODEL_INIT; \ + a.choice2 = BIT_MODEL_INIT; \ + LZ_BM_INIT2(a.bm_low); \ + LZ_BM_INIT2(a.bm_mid); \ + LZ_BM_INIT(a.bm_high); \ +} while (/*CONSTCOND*/0) + +static bool +lz_decode_member(struct lz_decoder *lz) +{ + int bm_literal[1 << LITERAL_CONTEXT_BITS][0x300]; + int bm_match[LZ_STATES][POS_STATES]; + int bm_rep[4][LZ_STATES]; + int bm_len[LZ_STATES][POS_STATES]; + int bm_dis_slot[LZ_STATES][1 << DIS_SLOT_BITS]; + int bm_dis[MODELED_DISTANCES - DIS_MODEL_END + 1]; + int bm_align[DIS_ALIGN_SIZE]; + + LZ_BM_INIT2(bm_literal); + LZ_BM_INIT2(bm_match); + LZ_BM_INIT2(bm_rep); + LZ_BM_INIT2(bm_len); + LZ_BM_INIT2(bm_dis_slot); + LZ_BM_INIT(bm_dis); + LZ_BM_INIT(bm_align); + + struct lz_len_model match_len_model; + struct lz_len_model rep_len_model; + + LZ_MODEL_INIT(match_len_model); + LZ_MODEL_INIT(rep_len_model); + + struct lz_range_decoder *rd = &lz->rdec; + unsigned rep[4] = { 0 }; + + + int state = 0; + + while (!feof(lz->fin) && !ferror(lz->fin)) { + const int pos_state = lz_get_data_position(lz) & POS_STATE_MASK; + // bit 1 + if (lz_rd_decode_bit(rd, &bm_match[state][pos_state]) == 0) { + const uint8_t prev_byte = lz_peek(lz, 0); + const int literal_state = + prev_byte >> (8 - LITERAL_CONTEXT_BITS); + int *bm = bm_literal[literal_state]; + if (lz_st_is_char(state)) + lz_put(lz, lz_rd_decode_tree(rd, bm, 8)); + else { + int peek = lz_peek(lz, rep[0]); + lz_put(lz, lz_rd_decode_matched(rd, bm, peek)); + } + state = lz_st_get_char(state); + continue; + } + int len; + // bit 2 + if (lz_rd_decode_bit(rd, &bm_rep[0][state]) != 0) { + // bit 3 + if (lz_rd_decode_bit(rd, &bm_rep[1][state]) == 0) { + // bit 4 + if (lz_rd_decode_bit(rd, + &bm_len[state][pos_state]) == 0) + { + state = lz_st_get_short_rep(state); + lz_put(lz, lz_peek(lz, rep[0])); + continue; + } + } else { + unsigned distance; + // bit 4 + if (lz_rd_decode_bit(rd, &bm_rep[2][state]) + == 0) + distance = rep[1]; + else { + // bit 5 + if (lz_rd_decode_bit(rd, + &bm_rep[3][state]) == 0) + distance = rep[2]; + else { + distance = rep[3]; + rep[3] = rep[2]; + } + rep[2] = rep[1]; + } + rep[1] = rep[0]; + rep[0] = distance; + } + state = lz_st_get_rep(state); + len = MIN_MATCH_LEN + + lz_rd_decode_len(rd, &rep_len_model, pos_state); + } else { + rep[3] = rep[2]; rep[2] = rep[1]; rep[1] = rep[0]; + len = MIN_MATCH_LEN + + lz_rd_decode_len(rd, &match_len_model, pos_state); + const int len_state = + MIN(len - MIN_MATCH_LEN, STATES - 1); + rep[0] = lz_rd_decode_tree(rd, bm_dis_slot[len_state], + DIS_SLOT_BITS); + if (rep[0] >= DIS_MODEL_START) { + const unsigned dis_slot = rep[0]; + const int direct_bits = (dis_slot >> 1) - 1; + rep[0] = (2 | (dis_slot & 1)) << direct_bits; + if (dis_slot < DIS_MODEL_END) + rep[0] += lz_rd_decode_tree_reversed(rd, + &bm_dis[rep[0] - dis_slot], + direct_bits); + else { + rep[0] += lz_rd_decode(rd, direct_bits + - DIS_ALIGN_BITS) << DIS_ALIGN_BITS; + rep[0] += lz_rd_decode_tree_reversed(rd, + bm_align, DIS_ALIGN_BITS); + if (rep[0] == 0xFFFFFFFFU) { + lz_flush(lz); + return len == MIN_MATCH_LEN; + } + } + } + state = lz_st_get_match(state); + if (rep[0] >= lz->dict_size || + (rep[0] >= lz->pos && !lz->wrapped)) { + lz_flush(lz); + return false; + } + } + for (int i = 0; i < len; i++) + lz_put(lz, lz_peek(lz, rep[0])); + } + lz_flush(lz); + return false; +} + +/* + * 0-3 CRC32 of the uncompressed data + * 4-11 size of the uncompressed data + * 12-19 member size including header and trailer + */ +#define TRAILER_SIZE 20 + + +static off_t +lz_decode(int fin, int fdout, unsigned dict_size, off_t *insize) +{ + struct lz_decoder lz; + off_t rv = -1; + + if (lz_create(&lz, fin, fdout, dict_size) == -1) + return -1; + + if (!lz_decode_member(&lz)) + goto out; + + uint8_t trailer[TRAILER_SIZE]; + + for(size_t i = 0; i < nitems(trailer); i++) + trailer[i] = (uint8_t)getc(lz.fin); + + unsigned crc = 0; + for (int i = 3; i >= 0; --i) { + crc <<= 8; + crc += trailer[i]; + } + + int64_t data_size = 0; + for (int i = 11; i >= 4; --i) { + data_size <<= 8; + data_size += trailer[i]; + } + + if (crc != lz_get_crc(&lz) || data_size != lz_get_data_position(&lz)) + goto out; + + rv = 0; + for (int i = 19; i >= 12; --i) { + rv <<= 8; + rv += trailer[i]; + } + if (insize) + *insize = rv; +#if 0 + /* Does not work with pipes */ + rv = ftello(lz.fout); +#else + rv = data_size; +#endif +out: + lz_destroy(&lz); + return rv; +} + + +/* + * 0-3 magic + * 4 version + * 5 coded dict_size + */ +#define HDR_SIZE 6 +#define MIN_DICTIONARY_SIZE (1 << 12) +#define MAX_DICTIONARY_SIZE (1 << 29) + +static const char hdrmagic[] = { 'L', 'Z', 'I', 'P', 1 }; + +static unsigned +lz_get_dict_size(unsigned char c) +{ + unsigned dict_size = 1 << (c & 0x1f); + dict_size -= (dict_size >> 2) * ( (c >> 5) & 0x7); + if (dict_size < MIN_DICTIONARY_SIZE || dict_size > MAX_DICTIONARY_SIZE) + return 0; + return dict_size; +} + +static off_t +unlz(int fin, int fout, char *pre, size_t prelen, off_t *bytes_in) +{ + if (lz_crc[0] == 0) + lz_crc_init(); + + char header[HDR_SIZE]; + + if (prelen > sizeof(header)) + return -1; + if (pre && prelen) + memcpy(header, pre, prelen); + + ssize_t nr = read(fin, header + prelen, sizeof(header) - prelen); + switch (nr) { + case -1: + return -1; + case 0: + return prelen ? -1 : 0; + default: + if ((size_t)nr != sizeof(header) - prelen) + return -1; + break; + } + + if (memcmp(header, hdrmagic, sizeof(hdrmagic)) != 0) + return -1; + + unsigned dict_size = lz_get_dict_size(header[5]); + if (dict_size == 0) + return -1; + + return lz_decode(fin, fout, dict_size, bytes_in); +} Modified: stable/11/usr.bin/gzip/unxz.c ============================================================================== --- stable/11/usr.bin/gzip/unxz.c Mon Jan 21 06:14:26 2019 (r343250) +++ stable/11/usr.bin/gzip/unxz.c Mon Jan 21 06:52:35 2019 (r343251) @@ -1,4 +1,4 @@ -/* $NetBSD: unxz.c,v 1.7 2017/08/04 07:27:08 mrg Exp $ */ +/* $NetBSD: unxz.c,v 1.8 2018/10/06 16:36:45 martin Exp $ */ /*- * SPDX-License-Identifier: BSD-2-Clause-NetBSD @@ -156,3 +156,322 @@ unxz(int i, int o, char *pre, size_t prelen, off_t *by } } } + +#include <stdbool.h> + +/* + * Copied various bits and pieces from xz support code or brute force + * replacements. + */ + +#define my_min(A,B) ((A)<(B)?(A):(B)) + +// Some systems have suboptimal BUFSIZ. Use a bit bigger value on them. +// We also need that IO_BUFFER_SIZE is a multiple of 8 (sizeof(uint64_t)) +#if BUFSIZ <= 1024 +# define IO_BUFFER_SIZE 8192 +#else +# define IO_BUFFER_SIZE (BUFSIZ & ~7U) +#endif + +/// is_sparse() accesses the buffer as uint64_t for maximum speed. +/// Use an union to make sure that the buffer is properly aligned. +typedef union { + uint8_t u8[IO_BUFFER_SIZE]; + uint32_t u32[IO_BUFFER_SIZE / sizeof(uint32_t)]; + uint64_t u64[IO_BUFFER_SIZE / sizeof(uint64_t)]; +} io_buf; + + +static bool +io_pread(int fd, io_buf *buf, size_t size, off_t pos) +{ + // Using lseek() and read() is more portable than pread() and + // for us it is as good as real pread(). + if (lseek(fd, pos, SEEK_SET) != pos) { + return true; + } + + const size_t amount = read(fd, buf, size); + if (amount == SIZE_MAX) + return true; + + if (amount != size) { + return true; + } + + return false; +} + +/* + * Most of the following is copied (mostly verbatim) from the xz + * distribution, from file src/xz/list.c + */ + +/////////////////////////////////////////////////////////////////////////////// +// +/// \file list.c +/// \brief Listing information about .xz files +// +// Author: Lasse Collin +// +// This file has been put into the public domain. +// You can do whatever you want with this file. +// +/////////////////////////////////////////////////////////////////////////////// + + +/// Information about a .xz file +typedef struct { + /// Combined Index of all Streams in the file + lzma_index *idx; + + /// Total amount of Stream Padding + uint64_t stream_padding; + + /// Highest memory usage so far + uint64_t memusage_max; + + /// True if all Blocks so far have Compressed Size and + /// Uncompressed Size fields + bool all_have_sizes; + + /// Oldest XZ Utils version that will decompress the file + uint32_t min_version; + +} xz_file_info; + +#define XZ_FILE_INFO_INIT { NULL, 0, 0, true, 50000002 } + + +/// \brief Parse the Index(es) from the given .xz file +/// +/// \param xfi Pointer to structure where the decoded information +/// is stored. +/// \param pair Input file +/// +/// \return On success, false is returned. On error, true is returned. +/// +// TODO: This function is pretty big. liblzma should have a function that +// takes a callback function to parse the Index(es) from a .xz file to make +// it easy for applications. +static bool +parse_indexes(xz_file_info *xfi, int src_fd) +{ + struct stat st; + + fstat(src_fd, &st); + if (st.st_size <= 0) { + return true; + } + + if (st.st_size < 2 * LZMA_STREAM_HEADER_SIZE) { + return true; + } + + io_buf buf; + lzma_stream_flags header_flags; + lzma_stream_flags footer_flags; + lzma_ret ret; + + // lzma_stream for the Index decoder + lzma_stream strm = LZMA_STREAM_INIT; + + // All Indexes decoded so far + lzma_index *combined_index = NULL; + + // The Index currently being decoded + lzma_index *this_index = NULL; + + // Current position in the file. We parse the file backwards so + // initialize it to point to the end of the file. + off_t pos = st.st_size; + + // Each loop iteration decodes one Index. + do { + // Check that there is enough data left to contain at least + // the Stream Header and Stream Footer. This check cannot + // fail in the first pass of this loop. + if (pos < 2 * LZMA_STREAM_HEADER_SIZE) { + goto error; + } + + pos -= LZMA_STREAM_HEADER_SIZE; + lzma_vli stream_padding = 0; + + // Locate the Stream Footer. There may be Stream Padding which + // we must skip when reading backwards. + while (true) { + if (pos < LZMA_STREAM_HEADER_SIZE) { + goto error; + } + + if (io_pread(src_fd, &buf, + LZMA_STREAM_HEADER_SIZE, pos)) + goto error; + + // Stream Padding is always a multiple of four bytes. + int i = 2; + if (buf.u32[i] != 0) + break; + *** DIFF OUTPUT TRUNCATED AT 1000 LINES ***
Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?201901210652.x0L6qZI3045672>