Skip site navigation (1)Skip section navigation (2)
Date:      Wed, 19 Apr 2006 02:00:15 -0500
From:      Yoshihiro Ota <ota@j.email.ne.jp>
To:        freebsd-hackers@freebsd.org
Subject:   [patch] mkuzip to compress without stat.st_size
Message-ID:  <20060419020015.d3e23e5b.ota@j.email.ne.jp>

next in thread | raw e-mail | index | archive | help
mkuzip and GEOM_UZIP has been quite useful to store files
I change rarely myself such as /usr/src and /usr/ports.
I now keep 6.1-RC1-src.uzip, 6.1-BETA4-src.zip and so on.
If all of these were extracted, I would run out of i-nodes.

One problem with mkuzip is it can only handle regular files
but not devices or though pipe.  The solution suggested here
is for user to provide alternative size of the input.  If st_size
is available from stat.2, this patch uses it.  However, if not
available, it uses the user supplied size.  The size of input
is important in cloop format because it needs to store index
entries to each block at the begging of file.  As a result, if
you don't know the size of a file, you cannot pre-allocate
the space for this index.

When EOF is reached, it creates and writes one compressed block
whose contents are all 0's.  All remaining indexes point to
the block.  Therefore, the cost of extra block is rather small.

(user-size - actual-input-size)/block-size x 64bit-index-size 
	+ compressed-0'ed-block

Futher enhancement to this patch is to use lseek(SEEK_END)
to get the actual size when possible.  I am not fully sure
if I picked up the appropreate type for sizes; if someone
can check them, it will be nice.

I hope other people find this useful.  I found other people
had the same issue on this list a couple years ago although
I couldn't find the e-mails again.

Regards,
Hiro


--- mkuzip.c.orig	Sat Apr 15 17:10:12 2006
+++ mkuzip.c	Tue Apr 18 06:14:53 2006
@@ -24,6 +24,7 @@
 #include <stdlib.h>
 #include <string.h>
 #include <unistd.h>
+#include <inttypes.h>
 
 #define CLSTSIZE	16384
 #define DEFAULT_SUFX	".uzip"
@@ -33,6 +34,7 @@
     "m=geom_uzip\n(kldstat -m $m 2>&-||kldload $m)>&-&&"
     "mount_cd9660 /dev/`mdconfig -af $0`.uzip $1\nexit $?\n";
 
+static uint32_t get_size(char *);
 static char *readblock(int, char *, u_int32_t);
 static void usage(void);
 static void *safe_malloc(size_t);
@@ -43,12 +45,16 @@
 int main(int argc, char **argv)
 {
 	char *iname, *oname, *obuf, *ibuf;
+	char *p;
 	uint64_t *toc;
-	int fdr, fdw, i, opt, verbose, tmp;
+	int fdr, fdw, opt, verbose, tmp;
 	struct iovec iov[2];
-	struct stat sb;
 	uLongf destlen;
 	uint64_t offset;
+	off_t i;
+	off_t *inputsz; /* we try file size first; otherwise user supplied */
+	off_t filesz; /* file size */
+	off_t usersz; /* user supplied size */
 	struct cloop_header {
 		char magic[CLOOP_MAGIC_LEN];    /* cloop magic */
 		uint32_t blksz;                 /* block size */
@@ -61,7 +67,7 @@
 	oname = NULL;
 	verbose = 0;
 
-	while((opt = getopt(argc, argv, "o:s:v")) != -1) {
+	while((opt = getopt(argc, argv, "o:s:S:v")) != -1) {
 		switch(opt) {
 		case 'o':
 			oname = optarg;
@@ -86,6 +92,27 @@
 			hdr.blksz = tmp;
 			break;
 
+		case 'S':
+			usersz = (off_t)strtoumax(optarg, &p, 0);
+			if(p == NULL || *p == '\0')
+				break;
+			switch(*p)
+			{
+				case 't': case 'T':
+					usersz <<= 10;
+				case 'g': case 'G':
+					usersz <<= 10;
+				case 'm': case 'M':
+					usersz <<= 10;
+				case 'k': case 'K':
+					usersz <<= 10;
+				case 'b': case 'B':
+					break;
+				default:
+					err(1, "Unknown suffix on -S argument");
+			}
+			break;
+
 		case 'v':
 			verbose = 1;
 			break;
@@ -122,12 +149,21 @@
 	signal(SIGXFSZ, exit);
 	atexit(cleanup);
 
-	if (stat(iname, &sb) != 0) {
-		err(1, "%s", iname);
+
+	filesz = get_size(iname);
+	if(filesz > 0)
+		inputsz = &filesz;
+	else
+		inputsz = &usersz;
+
+	if(*inputsz < 0) {
+		err(1, "input file size is not avaiable;\n"
+			"try -S input_size option with large enough size");
 		/* Not reached */
 	}
-	hdr.nblocks = sb.st_size / hdr.blksz;
-	if ((sb.st_size % hdr.blksz) != 0) {
+
+	hdr.nblocks = *inputsz / hdr.blksz;
+	if ((*inputsz % hdr.blksz) != 0) {
 		if (verbose != 0)
 			fprintf(stderr, "file size is not multiple "
 			"of %d, padding data\n", hdr.blksz);
@@ -160,21 +196,23 @@
 
 	if (verbose != 0)
 		fprintf(stderr, "data size %ju bytes, number of clusters "
-		    "%u, index length %zu bytes\n", sb.st_size,
+		    "%u, index length %zu bytes\n", *inputsz,
 		    hdr.nblocks, iov[1].iov_len);
 
-	for(i = 0; i == 0 || ibuf != NULL; i++) {
-		ibuf = readblock(fdr, ibuf, hdr.blksz);
-		if (ibuf != NULL) {
-			destlen = compressBound(hdr.blksz);
-			if (compress2(obuf, &destlen, ibuf, hdr.blksz,
-			    Z_BEST_COMPRESSION) != Z_OK) {
-				errx(1, "can't compress data: compress2() "
-				    "failed");
+	for(i = 0; i <= hdr.nblocks; i++) {
+		p = readblock(fdr, ibuf, hdr.blksz);
+
+		/* compress 0'ed data even if no data is read */
+		destlen = compressBound(hdr.blksz);
+		if (compress2(obuf, &destlen, ibuf, hdr.blksz,
+			Z_BEST_COMPRESSION) != Z_OK) {
+				errx(1, "can't compress data: compress2() failed");
 				/* Not reached */
-			}
+		}
+
+		if (p != NULL) {
 			if (verbose != 0)
-				fprintf(stderr, "cluster #%d, in %u bytes, "
+				fprintf(stderr, "cluster #%ju, in %u bytes, "
 				    "out %lu bytes\n", i, hdr.blksz, destlen);
 		} else {
 			destlen = DEV_BSIZE - (offset % DEV_BSIZE);
@@ -189,14 +227,27 @@
 			/* Not reached */
 		}
 		toc[i] = htobe64(offset);
+		if(p == NULL) {
+			/* -S size was too large; the rest points to zero filled block */
+			for(; i <= hdr.nblocks; i++)
+				toc[i] = htobe64(offset);
+			break;
+		}
 		offset += destlen;
 	}
+	if(p != NULL && i > hdr.nblocks) { /* check if we can read more */
+		if(readblock(fdr, ibuf, hdr.blksz) != NULL) {
+			/* file has grown or -S was too smal */
+			err(1, "uziped file was not large enought");
+			/* Not reached */
+		}
+	}
 	close(fdr);
 
 	if (verbose != 0)
 		fprintf(stderr, "compressed data to %ju bytes, saved %lld "
-		    "bytes, %.2f%% decrease.\n", offset, (long long)(sb.st_size - offset),
-		    100.0 * (long long)(sb.st_size - offset) / (float)sb.st_size);
+		    "bytes, %.2f%% decrease.\n", offset, (long long)(*inputsz - offset),
+		    100.0 * (long long)(*inputsz - offset) / (float)*inputsz);
 
 	/* Convert to big endian */
 	hdr.blksz = htonl(hdr.blksz);
@@ -213,6 +264,20 @@
 	exit(0);
 }
 
+static uint32_t
+get_size(char *filename)
+{
+	struct stat sb;
+	if (stat(filename, &sb) != 0) {
+		err(1, "%s", filename);
+		/* Not reached */
+	}
+	if(sb.st_size > 0) /* we got the actual size */
+		return sb.st_size;
+
+	return 0;
+}
+
 static char *
 readblock(int fd, char *ibuf, u_int32_t clstsize)
 {
@@ -234,7 +299,9 @@
 usage(void)
 {
 
-	fprintf(stderr, "usage: mkuzip [-v] [-o outfile] [-s cluster_size] infile\n");
+	fprintf(stderr,
+		"usage: mkuzip [-v] [-o outfile] "
+		"[-s cluster_size] [-S input_size] infile\n");
 	exit(1);
 }
 



Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?20060419020015.d3e23e5b.ota>