From owner-freebsd-hackers Thu Apr 3 06:09:41 1997 Return-Path: Received: (from root@localhost) by freefall.freebsd.org (8.8.5/8.8.5) id GAA24199 for hackers-outgoing; Thu, 3 Apr 1997 06:09:41 -0800 (PST) Received: from lattice.latte.it (line08.globalnet.it [194.185.53.40]) by freefall.freebsd.org (8.8.5/8.8.5) with ESMTP id GAA24192 for ; Thu, 3 Apr 1997 06:09:32 -0800 (PST) Received: from localhost (ssigala@localhost) by lattice.latte.it (8.8.4/8.8.4) with SMTP id QAA00647 for ; Thu, 3 Apr 1997 16:08:42 +0200 (CEST) X-Authentication-Warning: lattice.latte.it: ssigala owned process doing -bs Date: Thu, 3 Apr 1997 16:08:41 +0200 (CEST) From: S Sigala X-Sender: ssigala@lattice.latte.it To: freebsd-hackers@freebsd.org Subject: Making holes in files with lseek() Message-ID: MIME-Version: 1.0 Content-Type: TEXT/PLAIN; charset=US-ASCII Sender: owner-hackers@freebsd.org X-Loop: FreeBSD.org Precedence: bulk Hello, i have just written this little program that replaces sequences of nul bytes with holes (using lseek()). This seem to work, but i would like to know the ideal length of the nul bytes sequence where a hole is better (requires less space on disk) than the sequence. In other words, how much disk space is wasted by a hole? Does every lseek() call (a seek below the end of the file) create a hole? Thanks in advance. Regards, -sandro -- CUT HERE ------------------------------------------------------------------ #define USEFUL_LENGTH 16 #include #include #include #include #include #include int lflag = USEFUL_LENGTH; void usage __P((void)); void setfile __P((char *path, struct stat *fs)); void process_file __P((char *filename)); int main(argc, argv) int argc; char *argv[]; { int c; while ((c = getopt(argc, argv, "l:")) != -1) switch (c) { case 'l': if ((lflag = atoi(optarg)) <= 0) errx(1, "invalid -l value"); break; case '?': default: usage(); /* NOTREACHED */ } argc -= optind; argv += optind; if (argc < 1) usage(); while (*argv) process_file(*argv++); return 0; } void usage() { fprintf(stderr, "usage: holeify [-l length] filename ...\n"); exit(1); } void setfile(path, fs) char *path; struct stat *fs; { static struct timeval tv[2]; /* Set file access and modification times. */ TIMESPEC_TO_TIMEVAL(&tv[0], &fs->st_atimespec); TIMESPEC_TO_TIMEVAL(&tv[1], &fs->st_mtimespec); utimes(path, tv); fs->st_mode &= S_ISUID | S_ISGID | S_ISVTX | S_IRWXU | S_IRWXG | S_IRWXO; /* Set owner and group. */ if (chown(path, fs->st_uid, fs->st_gid)) fs->st_mode &= ~(S_ISUID | S_ISGID); /* Set mode. */ chmod(path, fs->st_mode); /* Set flags. */ chflags(path, fs->st_flags); } void process_file(filename) char *filename; { unsigned char ibuf[BUFSIZ], obuf[BUFSIZ]; char tempfile[12]; struct stat fstat_s; int ifd, ofd, isize, osize; int i, num0; ifd = open(filename, O_RDONLY); ofd = mkstemp(strcpy(tempfile, "temp.XXXX")); num0 = osize = 0; while ((isize = read(ifd, ibuf, BUFSIZ)) > 0) for (i = 0; i < isize; i++) { if (ibuf[i] == '\0') num0++; else { if (num0 > 0) { if (osize > 0) { /* Flush the output buffer. */ write(ofd, obuf, osize); osize = 0; } if (num0 >= lflag) { /* Create a hole. */ lseek(ofd, num0, SEEK_CUR); num0 = 0; } else { /* Fill with zeros. */ for (; num0; num0--) { obuf[osize++] = '\0'; if (osize == BUFSIZ) { write(ofd, obuf, BUFSIZ); osize = 0; } } } } obuf[osize++] = ibuf[i]; /* Flush the output buffer if full. */ if (osize == BUFSIZ) { write(ofd, obuf, BUFSIZ); osize = 0; } } } if (osize > 0) write(ofd, obuf, osize); if (num0 > 0) { /* * Create a hole if required. */ if (num0 > 1) { if (num0 - 1 >= lflag) { /* Large enough: create a hole. */ lseek(ofd, num0 - 1, SEEK_CUR); } else { /* Fill with zeros. */ for (; num0 - 1; num0--) write(ofd, "", 1); } } /* * Put a null byte at the end to make the hole. */ write(ofd, "", 1); } fstat(ifd, &fstat_s); close(ifd); close(ofd); /* * Restore old file attributes and rename to old file name. */ setfile(tempfile, &fstat_s); rename(tempfile, filename); }