Date: Wed, 19 Apr 2006 02:00:15 -0500 From: Yoshihiro Ota <ota@j.email.ne.jp> To: freebsd-hackers@freebsd.org Subject: [patch] mkuzip to compress without stat.st_size Message-ID: <20060419020015.d3e23e5b.ota@j.email.ne.jp>
next in thread | raw e-mail | index | archive | help
mkuzip and GEOM_UZIP has been quite useful to store files I change rarely myself such as /usr/src and /usr/ports. I now keep 6.1-RC1-src.uzip, 6.1-BETA4-src.zip and so on. If all of these were extracted, I would run out of i-nodes. One problem with mkuzip is it can only handle regular files but not devices or though pipe. The solution suggested here is for user to provide alternative size of the input. If st_size is available from stat.2, this patch uses it. However, if not available, it uses the user supplied size. The size of input is important in cloop format because it needs to store index entries to each block at the begging of file. As a result, if you don't know the size of a file, you cannot pre-allocate the space for this index. When EOF is reached, it creates and writes one compressed block whose contents are all 0's. All remaining indexes point to the block. Therefore, the cost of extra block is rather small. (user-size - actual-input-size)/block-size x 64bit-index-size + compressed-0'ed-block Futher enhancement to this patch is to use lseek(SEEK_END) to get the actual size when possible. I am not fully sure if I picked up the appropreate type for sizes; if someone can check them, it will be nice. I hope other people find this useful. I found other people had the same issue on this list a couple years ago although I couldn't find the e-mails again. Regards, Hiro --- mkuzip.c.orig Sat Apr 15 17:10:12 2006 +++ mkuzip.c Tue Apr 18 06:14:53 2006 @@ -24,6 +24,7 @@ #include <stdlib.h> #include <string.h> #include <unistd.h> +#include <inttypes.h> #define CLSTSIZE 16384 #define DEFAULT_SUFX ".uzip" @@ -33,6 +34,7 @@ "m=geom_uzip\n(kldstat -m $m 2>&-||kldload $m)>&-&&" "mount_cd9660 /dev/`mdconfig -af $0`.uzip $1\nexit $?\n"; +static uint32_t get_size(char *); static char *readblock(int, char *, u_int32_t); static void usage(void); static void *safe_malloc(size_t); @@ -43,12 +45,16 @@ int main(int argc, char **argv) { char *iname, *oname, *obuf, *ibuf; + char *p; uint64_t *toc; - int fdr, fdw, i, opt, verbose, tmp; + int fdr, fdw, opt, verbose, tmp; struct iovec iov[2]; - struct stat sb; uLongf destlen; uint64_t offset; + off_t i; + off_t *inputsz; /* we try file size first; otherwise user supplied */ + off_t filesz; /* file size */ + off_t usersz; /* user supplied size */ struct cloop_header { char magic[CLOOP_MAGIC_LEN]; /* cloop magic */ uint32_t blksz; /* block size */ @@ -61,7 +67,7 @@ oname = NULL; verbose = 0; - while((opt = getopt(argc, argv, "o:s:v")) != -1) { + while((opt = getopt(argc, argv, "o:s:S:v")) != -1) { switch(opt) { case 'o': oname = optarg; @@ -86,6 +92,27 @@ hdr.blksz = tmp; break; + case 'S': + usersz = (off_t)strtoumax(optarg, &p, 0); + if(p == NULL || *p == '\0') + break; + switch(*p) + { + case 't': case 'T': + usersz <<= 10; + case 'g': case 'G': + usersz <<= 10; + case 'm': case 'M': + usersz <<= 10; + case 'k': case 'K': + usersz <<= 10; + case 'b': case 'B': + break; + default: + err(1, "Unknown suffix on -S argument"); + } + break; + case 'v': verbose = 1; break; @@ -122,12 +149,21 @@ signal(SIGXFSZ, exit); atexit(cleanup); - if (stat(iname, &sb) != 0) { - err(1, "%s", iname); + + filesz = get_size(iname); + if(filesz > 0) + inputsz = &filesz; + else + inputsz = &usersz; + + if(*inputsz < 0) { + err(1, "input file size is not avaiable;\n" + "try -S input_size option with large enough size"); /* Not reached */ } - hdr.nblocks = sb.st_size / hdr.blksz; - if ((sb.st_size % hdr.blksz) != 0) { + + hdr.nblocks = *inputsz / hdr.blksz; + if ((*inputsz % hdr.blksz) != 0) { if (verbose != 0) fprintf(stderr, "file size is not multiple " "of %d, padding data\n", hdr.blksz); @@ -160,21 +196,23 @@ if (verbose != 0) fprintf(stderr, "data size %ju bytes, number of clusters " - "%u, index length %zu bytes\n", sb.st_size, + "%u, index length %zu bytes\n", *inputsz, hdr.nblocks, iov[1].iov_len); - for(i = 0; i == 0 || ibuf != NULL; i++) { - ibuf = readblock(fdr, ibuf, hdr.blksz); - if (ibuf != NULL) { - destlen = compressBound(hdr.blksz); - if (compress2(obuf, &destlen, ibuf, hdr.blksz, - Z_BEST_COMPRESSION) != Z_OK) { - errx(1, "can't compress data: compress2() " - "failed"); + for(i = 0; i <= hdr.nblocks; i++) { + p = readblock(fdr, ibuf, hdr.blksz); + + /* compress 0'ed data even if no data is read */ + destlen = compressBound(hdr.blksz); + if (compress2(obuf, &destlen, ibuf, hdr.blksz, + Z_BEST_COMPRESSION) != Z_OK) { + errx(1, "can't compress data: compress2() failed"); /* Not reached */ - } + } + + if (p != NULL) { if (verbose != 0) - fprintf(stderr, "cluster #%d, in %u bytes, " + fprintf(stderr, "cluster #%ju, in %u bytes, " "out %lu bytes\n", i, hdr.blksz, destlen); } else { destlen = DEV_BSIZE - (offset % DEV_BSIZE); @@ -189,14 +227,27 @@ /* Not reached */ } toc[i] = htobe64(offset); + if(p == NULL) { + /* -S size was too large; the rest points to zero filled block */ + for(; i <= hdr.nblocks; i++) + toc[i] = htobe64(offset); + break; + } offset += destlen; } + if(p != NULL && i > hdr.nblocks) { /* check if we can read more */ + if(readblock(fdr, ibuf, hdr.blksz) != NULL) { + /* file has grown or -S was too smal */ + err(1, "uziped file was not large enought"); + /* Not reached */ + } + } close(fdr); if (verbose != 0) fprintf(stderr, "compressed data to %ju bytes, saved %lld " - "bytes, %.2f%% decrease.\n", offset, (long long)(sb.st_size - offset), - 100.0 * (long long)(sb.st_size - offset) / (float)sb.st_size); + "bytes, %.2f%% decrease.\n", offset, (long long)(*inputsz - offset), + 100.0 * (long long)(*inputsz - offset) / (float)*inputsz); /* Convert to big endian */ hdr.blksz = htonl(hdr.blksz); @@ -213,6 +264,20 @@ exit(0); } +static uint32_t +get_size(char *filename) +{ + struct stat sb; + if (stat(filename, &sb) != 0) { + err(1, "%s", filename); + /* Not reached */ + } + if(sb.st_size > 0) /* we got the actual size */ + return sb.st_size; + + return 0; +} + static char * readblock(int fd, char *ibuf, u_int32_t clstsize) { @@ -234,7 +299,9 @@ usage(void) { - fprintf(stderr, "usage: mkuzip [-v] [-o outfile] [-s cluster_size] infile\n"); + fprintf(stderr, + "usage: mkuzip [-v] [-o outfile] " + "[-s cluster_size] [-S input_size] infile\n"); exit(1); }
Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?20060419020015.d3e23e5b.ota>