Date: Sun, 05 Aug 2007 22:10:17 -0700 From: perryh@pluto.rain.com To: freebsd-hackers@freebsd.org Subject: improvement to split(1) Message-ID: <46b6ad39.ruamQGdheK6wwA1G%perryh@pluto.rain.com>
next in thread | raw e-mail | index | archive | help
This is a multi-part message in MIME format. --=_46b6ad39.lPXpShBtL+5LwoMbStDVYy58D0fOB8TQSR/Yb1eAclZJgXew Content-Type: text/plain; charset=us-ascii Content-Transfer-Encoding: 7bit Content-Disposition: inline In the case where the output files from split(1) are of a specified size (in bytes) and the size of the input is known, it is possible to compute the minimum required suffix_length rather than requiring it to be specified or accepting the default (2). The attached diffs add a -B switch, which requests that automation and otherwise behaves the same as -b. --=_46b6ad39.lPXpShBtL+5LwoMbStDVYy58D0fOB8TQSR/Yb1eAclZJgXew Content-Type: text/plain; charset=us-ascii Content-Transfer-Encoding: 7bit Content-Disposition: attachment; filename="split.1.diff" *** split.1.orig Sun Jan 16 23:44:29 2005 --- split.1 Sat Aug 4 22:46:19 2007 *************** *** 30,38 **** .\" SUCH DAMAGE. .\" .\" @(#)split.1 8.3 (Berkeley) 4/16/94 ! .\" $FreeBSD: src/usr.bin/split/split.1,v 1.15 2005/01/17 07:44:29 ru Exp $ .\" ! .Dd July 12, 2004 .Dt SPLIT 1 .Os .Sh NAME --- 30,38 ---- .\" SUCH DAMAGE. .\" .\" @(#)split.1 8.3 (Berkeley) 4/16/94 ! .\" $FreeBSD: src/usr.bin/split/split.1,v 1.15+ 2005/01/17 07:44:29 ru Exp $ .\" ! .Dd August 4, 2007 .Dt SPLIT 1 .Os .Sh NAME *************** *** 41,47 **** .Sh SYNOPSIS .Nm .Op Fl a Ar suffix_length ! .Op Fl b Ar byte_count[k|m] .Op Fl l Ar line_count .Op Fl p Ar pattern .Op Ar file Op Ar name --- 41,47 ---- .Sh SYNOPSIS .Nm .Op Fl a Ar suffix_length ! .Op Fl {b|B} Ar byte_count[k|m] .Op Fl l Ar line_count .Op Fl p Ar pattern .Op Ar file Op Ar name *************** *** 79,84 **** --- 79,92 ---- is appended to the number, the file is split into .Ar byte_count megabyte pieces. + .It Fl B + Like + .Fl b, + and compute the + .Ar suffix_length + based on the + .Ar byte_count + and the file size. .It Fl l Create smaller files .Ar n --=_46b6ad39.lPXpShBtL+5LwoMbStDVYy58D0fOB8TQSR/Yb1eAclZJgXew Content-Type: text/plain; charset=us-ascii Content-Transfer-Encoding: 7bit Content-Disposition: attachment; filename="split.c.diff" *** split.c.orig Sun Jul 11 07:44:23 2004 --- split.c Sat Aug 4 23:01:08 2007 *************** *** 32,38 **** */ #include <sys/cdefs.h> ! __FBSDID("$FreeBSD: src/usr.bin/split/split.c,v 1.15 2004/07/11 14:44:23 tjr Exp $"); #ifndef lint static const char copyright[] = --- 32,38 ---- */ #include <sys/cdefs.h> ! __FBSDID("$FreeBSD: src/usr.bin/split/split.c,v 1.15+ 2004/07/11 14:44:23 tjr Exp $"); #ifndef lint static const char copyright[] = *************** *** 61,66 **** --- 61,69 ---- #include <regex.h> #include <sysexits.h> + #include <sys/types.h> + #include <sys/stat.h> + #define DEFLINE 1000 /* Default num lines per file. */ off_t bytecnt; /* Byte count to split on. */ *************** *** 70,77 **** char bfr[MAXBSIZE]; /* I/O buffer. */ char fname[MAXPATHLEN]; /* File name prefix. */ regex_t rgx; ! int pflag; ! long sufflen = 2; /* File name suffix length. */ void newfile(void); void split1(void); --- 73,80 ---- char bfr[MAXBSIZE]; /* I/O buffer. */ char fname[MAXPATHLEN]; /* File name prefix. */ regex_t rgx; ! int pflag, Bflag; ! long sufflen = -2; /* File name suffix length, -2 => unspecified. */ void newfile(void); void split1(void); *************** *** 85,94 **** long scale; int ch; char *ep, *p; setlocale(LC_ALL, ""); ! while ((ch = getopt(argc, argv, "-0123456789a:b:l:p:")) != -1) switch (ch) { case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9': --- 88,98 ---- long scale; int ch; char *ep, *p; + struct stat istat; setlocale(LC_ALL, ""); ! while ((ch = getopt(argc, argv, "-0123456789a:b:l:p:B:")) != -1) switch (ch) { case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9': *************** *** 114,123 **** --- 118,134 ---- ifd = 0; break; case 'a': /* Suffix length */ + if (Bflag) + errx(EX_USAGE, "-a is incompatible with -B"); if ((sufflen = strtol(optarg, &ep, 10)) <= 0 || *ep) errx(EX_USAGE, "%s: illegal suffix length", optarg); break; + case 'B': /* Byte count & compute sufflen. */ + if (sufflen != -2) + errx(EX_USAGE, "-B is incompatible with -a"); + Bflag = 1; + /* fall through */ case 'b': /* Byte count. */ errno = 0; if ((bytecnti = strtoimax(optarg, &ep, 10)) <= 0 || *************** *** 153,164 **** --- 164,185 ---- argv += optind; argc -= optind; + if (sufflen == -2) + sufflen = 2; + if (*argv != NULL) if (ifd == -1) { /* Input file. */ if (strcmp(*argv, "-") == 0) ifd = STDIN_FILENO; else if ((ifd = open(*argv, O_RDONLY, 0)) < 0) err(EX_NOINPUT, "%s", *argv); + else if (Bflag && fstat(ifd, &istat) == 0 && + istat.st_size > 0) { + off_t nfiles = + (istat.st_size + bytecnt - 1) / bytecnt; + for (sufflen = 1; nfiles > 26; nfiles /= 26) + ++sufflen; + } ++argv; } if (*argv != NULL) /* File name prefix. */ *************** *** 349,355 **** usage(void) { (void)fprintf(stderr, ! "usage: split [-a sufflen] [-b byte_count] [-l line_count] [-p pattern]\n"); (void)fprintf(stderr, " [file [prefix]]\n"); exit(EX_USAGE); --- 370,376 ---- usage(void) { (void)fprintf(stderr, ! "usage: split [-a sufflen] [-{b|B} byte_count] [-l line_count] [-p pattern]\n"); (void)fprintf(stderr, " [file [prefix]]\n"); exit(EX_USAGE); --=_46b6ad39.lPXpShBtL+5LwoMbStDVYy58D0fOB8TQSR/Yb1eAclZJgXew--
Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?46b6ad39.ruamQGdheK6wwA1G%perryh>