Date: Sat, 26 Jan 2002 17:04:22 +1100 From: "Tim J. Robbins" <tim@robbins.dropbear.id.au> To: Mike Barcroft <mike@FreeBSD.ORG> Cc: freebsd-standards@FreeBSD.ORG Subject: Re: split(1) -a option patch Message-ID: <20020126170422.A8594@descent.robbins.dropbear.id.au> In-Reply-To: <20020125132739.B92720@espresso.q9media.com>; from mike@FreeBSD.ORG on Fri, Jan 25, 2002 at 01:27:39PM -0500 References: <20020124210956.A13091@descent.robbins.dropbear.id.au> <20020125132739.B92720@espresso.q9media.com>
next in thread | previous in thread | raw e-mail | index | archive | help
--gBBFr7Ir9EOA20Yy Content-Type: text/plain; charset=us-ascii Content-Disposition: inline On Fri, Jan 25, 2002 at 01:27:39PM -0500, Mike Barcroft wrote: > Could you please update your patch to -CURRENT sources. There is > atleast one change to split.c that hasn't been MFC'd. Done. I've corrected the style problems (I hope) and the warnings, added overflow checking when calculating `maxfiles' and fixed usage(). There were also some things in the manual page that needed updating. > > - fpnt[0] = fnum / 26 + 'a'; > > - fpnt[1] = fnum % 26 + 'a'; > > + for (tfnum = fnum, i = sufflen; i != 0; i--, tfnum /= 26) > > + fpnt[i - 1] = tfnum % 26 + 'a'; > > + fpnt[sufflen] = '\0'; > > Can you explain the purpose of this change? I don't think I > completely understand it. Instead of generating a two character suffix, I use a loop to do it more generally. The idea is: Save a copy of the file sequence number (tfnum = fnum) to mess around with. Walk backwards through where the suffix should go, dividing tfnum by 26 and setting the character to the remainder +'a'. Is this simpler? tfnum = fnum; i = sufflen - 1; do { fpnt[i] = tfnum % 26 + 'a'; tfnum /= 26; } while (i >= 0); Patch against -CURRENT is attached. Tim --gBBFr7Ir9EOA20Yy Content-Type: text/plain; charset=us-ascii Content-Disposition: attachment; filename="split.diff" Index: split/split.1 =================================================================== RCS file: /home/ncvs/src/usr.bin/split/split.1,v retrieving revision 1.6 diff -u -r1.6 split.1 --- split/split.1 2001/07/15 08:01:34 1.6 +++ split/split.1 2002/01/26 05:55:45 @@ -40,6 +40,7 @@ .Nd split a file into pieces .Sh SYNOPSIS .Nm +.Op Fl a Ar suffix_length .Op Fl b Ar byte_count[k|m] .Op Fl l Ar line_count .Op Fl p Ar pattern @@ -54,6 +55,12 @@ .Pp The options are as follows: .Bl -tag -width Ds +.It Fl a +Use +.Ar suffix_length +letters to form the suffix of the file name. If +.Fl a +is not specified, two letters are used as the suffix. .It Fl b Create smaller files .Ar byte_count @@ -89,22 +96,18 @@ If a second additional argument is specified, it is used as a prefix for the names of the files into which the file is split. In this case, each file into which the file is split is named by the -prefix followed by a lexically ordered suffix in the range of -.Dq Li aa-zz . +prefix followed by a lexically ordered suffix using +.Ar suffix_length +characters in the range +.Dq Li a-z . .Pp If the .Ar name argument is not specified, the file is split into lexically ordered -files named in the range of -.Dq Li xaa-zzz . +files named with prefixes in the range of +.Dq Li x-z +and with suffixes as above. .Sh BUGS -For historical reasons, if you specify -.Ar name , -.Nm -can only create 676 separate -files. -The default naming convention allows 2028 separate files. -.Pp The maximum line length for matching patterns is 65536. .Sh SEE ALSO .Xr re_format 7 Index: split/split.c =================================================================== RCS file: /home/ncvs/src/usr.bin/split/split.c,v retrieving revision 1.8 diff -u -r1.8 split.c --- split/split.c 2001/12/12 23:09:07 1.8 +++ split/split.c 2002/01/26 05:55:46 @@ -67,6 +67,7 @@ char fname[MAXPATHLEN]; /* File name prefix. */ regex_t rgx; int pflag; +long sufflen = 2; /* File name suffix length. */ void newfile __P((void)); void split1 __P((void)); @@ -81,7 +82,7 @@ int ch; char *ep, *p; - while ((ch = getopt(argc, argv, "-0123456789b:l:p:")) != -1) + while ((ch = getopt(argc, argv, "-0123456789a:b:l:p:")) != -1) switch (ch) { case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9': @@ -106,6 +107,11 @@ usage(); ifd = 0; break; + case 'a': /* Suffix length */ + if ((sufflen = strtol(optarg, &ep, 10)) <= 0 || *ep) + err(EX_USAGE, + "%s: illegal suffix length", optarg); + break; case 'b': /* Byte count. */ if ((bytecnt = strtoq(optarg, &ep, 10)) <= 0 || (*ep != '\0' && *ep != 'k' && *ep != 'm')) @@ -145,6 +151,8 @@ if (*argv != NULL) usage(); + if (strlen(fname) + (unsigned long)sufflen >= sizeof(fname)) + err(EX_USAGE, "suffix is too long"); if (pflag && (numlines != 0 || bytecnt != 0)) usage(); @@ -273,6 +281,7 @@ void newfile() { + long i, maxfiles, tfnum; static long fnum; static int defname; static char *fpnt; @@ -288,19 +297,25 @@ } ofd = fileno(stdout); } + + /* maxfiles = 26^sufflen, but don't use libm. */ + for (maxfiles = 1, i = 0; i < sufflen; i++) + if ((maxfiles *= 26) <= 0) + errx(EX_USAGE, "suffix is too long (max %ld)", i); + /* * Hack to increase max files; original code wandered through - * magic characters. Maximum files is 3 * 26 * 26 == 2028 + * magic characters. */ -#define MAXFILES 676 - if (fnum == MAXFILES) { + if (fnum == maxfiles) { if (!defname || fname[0] == 'z') errx(EX_DATAERR, "too many files"); ++fname[0]; fnum = 0; } - fpnt[0] = fnum / 26 + 'a'; - fpnt[1] = fnum % 26 + 'a'; + for (tfnum = fnum, i = sufflen; i != 0; i--, tfnum /= 26) + fpnt[i - 1] = tfnum % 26 + 'a'; + fpnt[sufflen] = '\0'; ++fnum; if (!freopen(fname, "w", stdout)) err(EX_IOERR, "%s", fname); @@ -311,6 +326,7 @@ usage() { (void)fprintf(stderr, -"usage: split [-b byte_count] [-l line_count] [-p pattern] [file [prefix]]\n"); +"usage: split [-a sufflen] [-b byte_count] [-l line_count] [-p pattern]\n"); + (void)fprintf(stderr, "\t\t[file [prefix]]\n"); exit(EX_USAGE); } --gBBFr7Ir9EOA20Yy-- To Unsubscribe: send mail to majordomo@FreeBSD.org with "unsubscribe freebsd-standards" in the body of the message
Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?20020126170422.A8594>