Skip site navigation (1)Skip section navigation (2)
Date:      Fri, 8 Feb 2002 23:37:47 +1100
From:      Tim Robbins <tim@robbins.dropbear.id.au>
To:        freebsd-standards@FreeBSD.ORG
Subject:   tr -C option patch
Message-ID:  <20020208233747.A41428@descent.robbins.dropbear.id.au>

next in thread | raw e-mail | index | archive | help
This patch adds the -C option to tr, as well as fixing PR bin/34663,
tr -c "" causing bogus results.

Unless I'm mistaken, the "in ascending collation sequence, as defined by the
current setting of LC_COLLATE" procedure isn't necessary.

Although the standard says "An empty string used for string1 or string2
produces undefined results", the current behaviour with -c and an empty
string1 is clearly not what was intended. 4.3BSD's tr behaviour is evidence
of this.

Equivalence classes are not correctly implemented by tr yet. From str.c:
 * English doesn't have any equivalence classes, so for now
 * we just syntax check and grab the character.
... this patch doesn't address this problem.

Index: tr/tr.1
===================================================================
RCS file: /home/ncvs/src/usr.bin/tr/tr.1,v
retrieving revision 1.14
diff -u -r1.14 tr.1
--- tr/tr.1	2001/08/15 09:09:44	1.14
+++ tr/tr.1	2002/02/08 12:09:48
@@ -43,19 +43,23 @@
 .Nd translate characters
 .Sh SYNOPSIS
 .Nm
-.Op Fl csu
+.Op Fl C | c
+.Op Fl su
 .Ar string1 string2
 .Nm
-.Op Fl cu
 .Fl d
+.Op Fl C | c
+.Op Fl u
 .Ar string1
 .Nm
-.Op Fl cu
 .Fl s
+.Op Fl C | c
+.Op Fl u
 .Ar string1
 .Nm
-.Op Fl cu
 .Fl ds
+.Op Fl C | c
+.Op Fl u
 .Ar string1 string2
 .Sh DESCRIPTION
 The
@@ -65,18 +69,20 @@
 .Pp
 The following options are available:
 .Bl -tag -width Ds
+.It Fl C
+Complement the set of characters in
+.Ar string1
+according to the current locale.
 .It Fl c
-Complements the set of characters in
+Complement the set of values in
 .Ar string1 ,
-that is ``-c ab'' includes every character except for ``a'' and ``b''.
+that is ``-c ab'' includes every value except for ``a'' and ``b''.
 .It Fl d
-The
-.Fl d
-option causes characters to be deleted from the input.
+Delete characters in
+.Ar string1
+from the input.
 .It Fl s
-The
-.Fl s
-option squeezes multiple occurrences of the characters listed in the last
+Squeeze multiple occurrences of the characters listed in the last
 operand (either
 .Ar string1
 or
@@ -84,9 +90,7 @@
 in the input into a single instance of the character.
 This occurs after all deletion and translation is completed.
 .It Fl u
-The
-.Fl u
-option guarantees that any output is unbuffered.
+Guarantee that any output is unbuffered.
 .El
 .Pp
 In the first synopsis form, the characters in
@@ -283,20 +287,22 @@
 .Sh STANDARDS
 The
 .Nm
-utility is expected to be
-.St -p1003.2
-compatible.
-It should be noted that the feature wherein the last character of
+utility is expected to be compliant with the
+.St -p1003.1-2001
+specification.
+.Pp
+The feature wherein the last character of
 .Ar string2
 is duplicated if
 .Ar string2
 has less characters than
 .Ar string1
-is permitted by POSIX but is not required.
-Shell scripts attempting to be portable to other POSIX systems should use
+is permitted by
+but is not required.
+Shell scripts attempting to be portable to other
+systems should use
 the ``[#*]'' convention instead of relying on this behavior.
+.Pp
 The
 .Fl u
-option is an extension to the
-.St -p1003.2
-standard.
+option is an extension to the standard.
Index: tr/tr.c
===================================================================
RCS file: /home/ncvs/src/usr.bin/tr/tr.c,v
retrieving revision 1.9
diff -u -r1.9 tr.c
--- tr/tr.c	2001/12/11 23:36:25	1.9
+++ tr/tr.c	2002/02/08 12:09:53
@@ -47,6 +47,7 @@
 
 #include <sys/types.h>
 
+#include <ctype.h>
 #include <err.h>
 #include <locale.h>
 #include <stdio.h>
@@ -56,6 +57,12 @@
 
 #include "extern.h"
 
+/*
+ * For -C option: determine whether a byte is a valid character in the
+ * current character set as defined by LC_CTYPE.
+ */
+#define ISCHAR(c) (iscntrl(c) || isprint(c))
+
 static int string1[NCHARS] = {
 	0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,		/* ASCII */
 	0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f,
@@ -94,7 +101,7 @@
 STR s1 = { STRING1, NORMAL, 0, OOBCH, { 0, OOBCH }, NULL, NULL };
 STR s2 = { STRING2, NORMAL, 0, OOBCH, { 0, OOBCH }, NULL, NULL };
 
-static void setup __P((int *, char *, STR *, int));
+static void setup __P((int *, char *, STR *, int, int));
 static void usage __P((void));
 
 int
@@ -103,13 +110,16 @@
 	char **argv;
 {
 	int ch, cnt, lastch, *p;
-	int cflag, dflag, sflag, isstring2;
+	int Cflag, cflag, dflag, sflag, isstring2;
 
 	(void) setlocale(LC_CTYPE, "");
 
-	cflag = dflag = sflag = 0;
-	while ((ch = getopt(argc, argv, "cdsu")) != -1)
+	Cflag = cflag = dflag = sflag = 0;
+	while ((ch = getopt(argc, argv, "Ccdsu")) != -1)
 		switch((char)ch) {
+		case 'C':
+			Cflag = 1;
+			break;
 		case 'c':
 			cflag = 1;
 			break;
@@ -143,7 +153,7 @@
 	}
 
 	/*
-	 * tr -ds [-c] string1 string2
+	 * tr -ds [-C|-c] string1 string2
 	 * Delete all characters (or complemented characters) in string1.
 	 * Squeeze all characters in string2.
 	 */
@@ -151,8 +161,8 @@
 		if (!isstring2)
 			usage();
 
-		setup(string1, argv[0], &s1, cflag);
-		setup(string2, argv[1], &s2, 0);
+		setup(string1, argv[0], &s1, cflag, Cflag);
+		setup(string2, argv[1], &s2, 0, 0);
 
 		for (lastch = OOBCH; (ch = getchar()) != EOF;)
 			if (!string1[ch] && (!string2[ch] || lastch != ch)) {
@@ -163,14 +173,14 @@
 	}
 
 	/*
-	 * tr -d [-c] string1
+	 * tr -d [-C|-c] string1
 	 * Delete all characters (or complemented characters) in string1.
 	 */
 	if (dflag) {
 		if (isstring2)
 			usage();
 
-		setup(string1, argv[0], &s1, cflag);
+		setup(string1, argv[0], &s1, cflag, Cflag);
 
 		while ((ch = getchar()) != EOF)
 			if (!string1[ch])
@@ -179,11 +189,11 @@
 	}
 
 	/*
-	 * tr -s [-c] string1
+	 * tr -s [-C|-c] string1
 	 * Squeeze all characters (or complemented characters) in string1.
 	 */
 	if (sflag && !isstring2) {
-		setup(string1, argv[0], &s1, cflag);
+		setup(string1, argv[0], &s1, cflag, Cflag);
 
 		for (lastch = OOBCH; (ch = getchar()) != EOF;)
 			if (!string1[ch] || lastch != ch) {
@@ -194,7 +204,7 @@
 	}
 
 	/*
-	 * tr [-cs] string1 string2
+	 * tr [-C|-c] [-s] string1 string2
 	 * Replace all characters (or complemented characters) in string1 with
 	 * the character in the same position in string2.  If the -s option is
 	 * specified, squeeze all the characters in string2.
@@ -205,7 +215,7 @@
 	s1.str = argv[0];
 	s2.str = argv[1];
 
-	if (cflag)
+	if (cflag || Cflag)
 		for (cnt = NCHARS, p = string1; cnt--;)
 			*p++ = OOBCH;
 
@@ -213,6 +223,7 @@
 		errx(1, "empty string2");
 
 	/* If string2 runs out of characters, use the last one specified. */
+	ch = s2.lastch;
 	if (sflag)
 		while (next(&s1)) {
 			string1[s1.lastch] = ch = s2.lastch;
@@ -225,9 +236,10 @@
 			(void)next(&s2);
 		}
 
-	if (cflag)
+	if (cflag || Cflag)
 		for (cnt = 0, p = string1; cnt < NCHARS; ++p, ++cnt)
-			*p = *p == OOBCH ? ch : cnt;
+			*p = (*p == OOBCH && (!Cflag || ISCHAR(cnt))) ?
+			    ch : cnt;
 
 	if (sflag)
 		for (lastch = OOBCH; (ch = getchar()) != EOF;) {
@@ -244,30 +256,31 @@
 }
 
 static void
-setup(string, arg, str, cflag)
+setup(string, arg, str, cflag, Cflag)
 	int *string;
 	char *arg;
 	STR *str;
 	int cflag;
+	int Cflag;
 {
-	int cnt, *p;
+	int cnt;
 
 	str->str = arg;
 	bzero(string, NCHARS * sizeof(int));
 	while (next(str))
 		string[str->lastch] = 1;
-	if (cflag)
-		for (p = string, cnt = NCHARS; cnt--; ++p)
-			*p = !*p;
+	if (cflag || Cflag)
+		for (cnt = 0; cnt < NCHARS; cnt++)
+			string[cnt] = !string[cnt] && (!Cflag || ISCHAR(cnt));
 }
 
 static void
 usage()
 {
 	(void)fprintf(stderr, "%s\n%s\n%s\n%s\n",
-		"usage: tr [-csu] string1 string2",
-		"       tr [-cu] -d string1",
-		"       tr [-cu] -s string1",
-		"       tr [-cu] -ds string1 string2");
+		"usage: tr [-C|-c] [-su] string1 string2",
+		"       tr -d [-C|-c] [-u] string1",
+		"       tr -s [-C|-c] [-u] string1",
+		"       tr -ds [-C|-c] [-u] string1 string2");
 	exit(1);
 }

To Unsubscribe: send mail to majordomo@FreeBSD.org
with "unsubscribe freebsd-standards" in the body of the message




Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?20020208233747.A41428>