From owner-freebsd-standards Fri Feb 8 4:36:29 2002 Delivered-To: freebsd-standards@freebsd.org Received: from descent.robbins.dropbear.id.au (087.c.003.mel.iprimus.net.au [210.50.33.87]) by hub.freebsd.org (Postfix) with ESMTP id 54C0637B421 for ; Fri, 8 Feb 2002 04:36:14 -0800 (PST) Received: (from tim@localhost) by descent.robbins.dropbear.id.au (8.11.6/8.11.6) id g18CbmM41490 for freebsd-standards@FreeBSD.ORG; Fri, 8 Feb 2002 23:37:48 +1100 (EST) (envelope-from tim) Date: Fri, 8 Feb 2002 23:37:47 +1100 From: Tim Robbins To: freebsd-standards@FreeBSD.ORG Subject: tr -C option patch Message-ID: <20020208233747.A41428@descent.robbins.dropbear.id.au> Mime-Version: 1.0 Content-Type: text/plain; charset=us-ascii Content-Disposition: inline User-Agent: Mutt/1.2.5.1i Sender: owner-freebsd-standards@FreeBSD.ORG Precedence: bulk List-ID: List-Archive: (Web Archive) List-Help: (List Instructions) List-Subscribe: List-Unsubscribe: X-Loop: FreeBSD.ORG This patch adds the -C option to tr, as well as fixing PR bin/34663, tr -c "" causing bogus results. Unless I'm mistaken, the "in ascending collation sequence, as defined by the current setting of LC_COLLATE" procedure isn't necessary. Although the standard says "An empty string used for string1 or string2 produces undefined results", the current behaviour with -c and an empty string1 is clearly not what was intended. 4.3BSD's tr behaviour is evidence of this. Equivalence classes are not correctly implemented by tr yet. From str.c: * English doesn't have any equivalence classes, so for now * we just syntax check and grab the character. ... this patch doesn't address this problem. Index: tr/tr.1 =================================================================== RCS file: /home/ncvs/src/usr.bin/tr/tr.1,v retrieving revision 1.14 diff -u -r1.14 tr.1 --- tr/tr.1 2001/08/15 09:09:44 1.14 +++ tr/tr.1 2002/02/08 12:09:48 @@ -43,19 +43,23 @@ .Nd translate characters .Sh SYNOPSIS .Nm -.Op Fl csu +.Op Fl C | c +.Op Fl su .Ar string1 string2 .Nm -.Op Fl cu .Fl d +.Op Fl C | c +.Op Fl u .Ar string1 .Nm -.Op Fl cu .Fl s +.Op Fl C | c +.Op Fl u .Ar string1 .Nm -.Op Fl cu .Fl ds +.Op Fl C | c +.Op Fl u .Ar string1 string2 .Sh DESCRIPTION The @@ -65,18 +69,20 @@ .Pp The following options are available: .Bl -tag -width Ds +.It Fl C +Complement the set of characters in +.Ar string1 +according to the current locale. .It Fl c -Complements the set of characters in +Complement the set of values in .Ar string1 , -that is ``-c ab'' includes every character except for ``a'' and ``b''. +that is ``-c ab'' includes every value except for ``a'' and ``b''. .It Fl d -The -.Fl d -option causes characters to be deleted from the input. +Delete characters in +.Ar string1 +from the input. .It Fl s -The -.Fl s -option squeezes multiple occurrences of the characters listed in the last +Squeeze multiple occurrences of the characters listed in the last operand (either .Ar string1 or @@ -84,9 +90,7 @@ in the input into a single instance of the character. This occurs after all deletion and translation is completed. .It Fl u -The -.Fl u -option guarantees that any output is unbuffered. +Guarantee that any output is unbuffered. .El .Pp In the first synopsis form, the characters in @@ -283,20 +287,22 @@ .Sh STANDARDS The .Nm -utility is expected to be -.St -p1003.2 -compatible. -It should be noted that the feature wherein the last character of +utility is expected to be compliant with the +.St -p1003.1-2001 +specification. +.Pp +The feature wherein the last character of .Ar string2 is duplicated if .Ar string2 has less characters than .Ar string1 -is permitted by POSIX but is not required. -Shell scripts attempting to be portable to other POSIX systems should use +is permitted by +but is not required. +Shell scripts attempting to be portable to other +systems should use the ``[#*]'' convention instead of relying on this behavior. +.Pp The .Fl u -option is an extension to the -.St -p1003.2 -standard. +option is an extension to the standard. Index: tr/tr.c =================================================================== RCS file: /home/ncvs/src/usr.bin/tr/tr.c,v retrieving revision 1.9 diff -u -r1.9 tr.c --- tr/tr.c 2001/12/11 23:36:25 1.9 +++ tr/tr.c 2002/02/08 12:09:53 @@ -47,6 +47,7 @@ #include +#include #include #include #include @@ -56,6 +57,12 @@ #include "extern.h" +/* + * For -C option: determine whether a byte is a valid character in the + * current character set as defined by LC_CTYPE. + */ +#define ISCHAR(c) (iscntrl(c) || isprint(c)) + static int string1[NCHARS] = { 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, /* ASCII */ 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, @@ -94,7 +101,7 @@ STR s1 = { STRING1, NORMAL, 0, OOBCH, { 0, OOBCH }, NULL, NULL }; STR s2 = { STRING2, NORMAL, 0, OOBCH, { 0, OOBCH }, NULL, NULL }; -static void setup __P((int *, char *, STR *, int)); +static void setup __P((int *, char *, STR *, int, int)); static void usage __P((void)); int @@ -103,13 +110,16 @@ char **argv; { int ch, cnt, lastch, *p; - int cflag, dflag, sflag, isstring2; + int Cflag, cflag, dflag, sflag, isstring2; (void) setlocale(LC_CTYPE, ""); - cflag = dflag = sflag = 0; - while ((ch = getopt(argc, argv, "cdsu")) != -1) + Cflag = cflag = dflag = sflag = 0; + while ((ch = getopt(argc, argv, "Ccdsu")) != -1) switch((char)ch) { + case 'C': + Cflag = 1; + break; case 'c': cflag = 1; break; @@ -143,7 +153,7 @@ } /* - * tr -ds [-c] string1 string2 + * tr -ds [-C|-c] string1 string2 * Delete all characters (or complemented characters) in string1. * Squeeze all characters in string2. */ @@ -151,8 +161,8 @@ if (!isstring2) usage(); - setup(string1, argv[0], &s1, cflag); - setup(string2, argv[1], &s2, 0); + setup(string1, argv[0], &s1, cflag, Cflag); + setup(string2, argv[1], &s2, 0, 0); for (lastch = OOBCH; (ch = getchar()) != EOF;) if (!string1[ch] && (!string2[ch] || lastch != ch)) { @@ -163,14 +173,14 @@ } /* - * tr -d [-c] string1 + * tr -d [-C|-c] string1 * Delete all characters (or complemented characters) in string1. */ if (dflag) { if (isstring2) usage(); - setup(string1, argv[0], &s1, cflag); + setup(string1, argv[0], &s1, cflag, Cflag); while ((ch = getchar()) != EOF) if (!string1[ch]) @@ -179,11 +189,11 @@ } /* - * tr -s [-c] string1 + * tr -s [-C|-c] string1 * Squeeze all characters (or complemented characters) in string1. */ if (sflag && !isstring2) { - setup(string1, argv[0], &s1, cflag); + setup(string1, argv[0], &s1, cflag, Cflag); for (lastch = OOBCH; (ch = getchar()) != EOF;) if (!string1[ch] || lastch != ch) { @@ -194,7 +204,7 @@ } /* - * tr [-cs] string1 string2 + * tr [-C|-c] [-s] string1 string2 * Replace all characters (or complemented characters) in string1 with * the character in the same position in string2. If the -s option is * specified, squeeze all the characters in string2. @@ -205,7 +215,7 @@ s1.str = argv[0]; s2.str = argv[1]; - if (cflag) + if (cflag || Cflag) for (cnt = NCHARS, p = string1; cnt--;) *p++ = OOBCH; @@ -213,6 +223,7 @@ errx(1, "empty string2"); /* If string2 runs out of characters, use the last one specified. */ + ch = s2.lastch; if (sflag) while (next(&s1)) { string1[s1.lastch] = ch = s2.lastch; @@ -225,9 +236,10 @@ (void)next(&s2); } - if (cflag) + if (cflag || Cflag) for (cnt = 0, p = string1; cnt < NCHARS; ++p, ++cnt) - *p = *p == OOBCH ? ch : cnt; + *p = (*p == OOBCH && (!Cflag || ISCHAR(cnt))) ? + ch : cnt; if (sflag) for (lastch = OOBCH; (ch = getchar()) != EOF;) { @@ -244,30 +256,31 @@ } static void -setup(string, arg, str, cflag) +setup(string, arg, str, cflag, Cflag) int *string; char *arg; STR *str; int cflag; + int Cflag; { - int cnt, *p; + int cnt; str->str = arg; bzero(string, NCHARS * sizeof(int)); while (next(str)) string[str->lastch] = 1; - if (cflag) - for (p = string, cnt = NCHARS; cnt--; ++p) - *p = !*p; + if (cflag || Cflag) + for (cnt = 0; cnt < NCHARS; cnt++) + string[cnt] = !string[cnt] && (!Cflag || ISCHAR(cnt)); } static void usage() { (void)fprintf(stderr, "%s\n%s\n%s\n%s\n", - "usage: tr [-csu] string1 string2", - " tr [-cu] -d string1", - " tr [-cu] -s string1", - " tr [-cu] -ds string1 string2"); + "usage: tr [-C|-c] [-su] string1 string2", + " tr -d [-C|-c] [-u] string1", + " tr -s [-C|-c] [-u] string1", + " tr -ds [-C|-c] [-u] string1 string2"); exit(1); } To Unsubscribe: send mail to majordomo@FreeBSD.org with "unsubscribe freebsd-standards" in the body of the message