Date: Fri, 8 Feb 2002 23:37:47 +1100 From: Tim Robbins <tim@robbins.dropbear.id.au> To: freebsd-standards@FreeBSD.ORG Subject: tr -C option patch Message-ID: <20020208233747.A41428@descent.robbins.dropbear.id.au>
next in thread | raw e-mail | index | archive | help
This patch adds the -C option to tr, as well as fixing PR bin/34663,
tr -c "" causing bogus results.
Unless I'm mistaken, the "in ascending collation sequence, as defined by the
current setting of LC_COLLATE" procedure isn't necessary.
Although the standard says "An empty string used for string1 or string2
produces undefined results", the current behaviour with -c and an empty
string1 is clearly not what was intended. 4.3BSD's tr behaviour is evidence
of this.
Equivalence classes are not correctly implemented by tr yet. From str.c:
* English doesn't have any equivalence classes, so for now
* we just syntax check and grab the character.
... this patch doesn't address this problem.
Index: tr/tr.1
===================================================================
RCS file: /home/ncvs/src/usr.bin/tr/tr.1,v
retrieving revision 1.14
diff -u -r1.14 tr.1
--- tr/tr.1 2001/08/15 09:09:44 1.14
+++ tr/tr.1 2002/02/08 12:09:48
@@ -43,19 +43,23 @@
.Nd translate characters
.Sh SYNOPSIS
.Nm
-.Op Fl csu
+.Op Fl C | c
+.Op Fl su
.Ar string1 string2
.Nm
-.Op Fl cu
.Fl d
+.Op Fl C | c
+.Op Fl u
.Ar string1
.Nm
-.Op Fl cu
.Fl s
+.Op Fl C | c
+.Op Fl u
.Ar string1
.Nm
-.Op Fl cu
.Fl ds
+.Op Fl C | c
+.Op Fl u
.Ar string1 string2
.Sh DESCRIPTION
The
@@ -65,18 +69,20 @@
.Pp
The following options are available:
.Bl -tag -width Ds
+.It Fl C
+Complement the set of characters in
+.Ar string1
+according to the current locale.
.It Fl c
-Complements the set of characters in
+Complement the set of values in
.Ar string1 ,
-that is ``-c ab'' includes every character except for ``a'' and ``b''.
+that is ``-c ab'' includes every value except for ``a'' and ``b''.
.It Fl d
-The
-.Fl d
-option causes characters to be deleted from the input.
+Delete characters in
+.Ar string1
+from the input.
.It Fl s
-The
-.Fl s
-option squeezes multiple occurrences of the characters listed in the last
+Squeeze multiple occurrences of the characters listed in the last
operand (either
.Ar string1
or
@@ -84,9 +90,7 @@
in the input into a single instance of the character.
This occurs after all deletion and translation is completed.
.It Fl u
-The
-.Fl u
-option guarantees that any output is unbuffered.
+Guarantee that any output is unbuffered.
.El
.Pp
In the first synopsis form, the characters in
@@ -283,20 +287,22 @@
.Sh STANDARDS
The
.Nm
-utility is expected to be
-.St -p1003.2
-compatible.
-It should be noted that the feature wherein the last character of
+utility is expected to be compliant with the
+.St -p1003.1-2001
+specification.
+.Pp
+The feature wherein the last character of
.Ar string2
is duplicated if
.Ar string2
has less characters than
.Ar string1
-is permitted by POSIX but is not required.
-Shell scripts attempting to be portable to other POSIX systems should use
+is permitted by
+but is not required.
+Shell scripts attempting to be portable to other
+systems should use
the ``[#*]'' convention instead of relying on this behavior.
+.Pp
The
.Fl u
-option is an extension to the
-.St -p1003.2
-standard.
+option is an extension to the standard.
Index: tr/tr.c
===================================================================
RCS file: /home/ncvs/src/usr.bin/tr/tr.c,v
retrieving revision 1.9
diff -u -r1.9 tr.c
--- tr/tr.c 2001/12/11 23:36:25 1.9
+++ tr/tr.c 2002/02/08 12:09:53
@@ -47,6 +47,7 @@
#include <sys/types.h>
+#include <ctype.h>
#include <err.h>
#include <locale.h>
#include <stdio.h>
@@ -56,6 +57,12 @@
#include "extern.h"
+/*
+ * For -C option: determine whether a byte is a valid character in the
+ * current character set as defined by LC_CTYPE.
+ */
+#define ISCHAR(c) (iscntrl(c) || isprint(c))
+
static int string1[NCHARS] = {
0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, /* ASCII */
0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f,
@@ -94,7 +101,7 @@
STR s1 = { STRING1, NORMAL, 0, OOBCH, { 0, OOBCH }, NULL, NULL };
STR s2 = { STRING2, NORMAL, 0, OOBCH, { 0, OOBCH }, NULL, NULL };
-static void setup __P((int *, char *, STR *, int));
+static void setup __P((int *, char *, STR *, int, int));
static void usage __P((void));
int
@@ -103,13 +110,16 @@
char **argv;
{
int ch, cnt, lastch, *p;
- int cflag, dflag, sflag, isstring2;
+ int Cflag, cflag, dflag, sflag, isstring2;
(void) setlocale(LC_CTYPE, "");
- cflag = dflag = sflag = 0;
- while ((ch = getopt(argc, argv, "cdsu")) != -1)
+ Cflag = cflag = dflag = sflag = 0;
+ while ((ch = getopt(argc, argv, "Ccdsu")) != -1)
switch((char)ch) {
+ case 'C':
+ Cflag = 1;
+ break;
case 'c':
cflag = 1;
break;
@@ -143,7 +153,7 @@
}
/*
- * tr -ds [-c] string1 string2
+ * tr -ds [-C|-c] string1 string2
* Delete all characters (or complemented characters) in string1.
* Squeeze all characters in string2.
*/
@@ -151,8 +161,8 @@
if (!isstring2)
usage();
- setup(string1, argv[0], &s1, cflag);
- setup(string2, argv[1], &s2, 0);
+ setup(string1, argv[0], &s1, cflag, Cflag);
+ setup(string2, argv[1], &s2, 0, 0);
for (lastch = OOBCH; (ch = getchar()) != EOF;)
if (!string1[ch] && (!string2[ch] || lastch != ch)) {
@@ -163,14 +173,14 @@
}
/*
- * tr -d [-c] string1
+ * tr -d [-C|-c] string1
* Delete all characters (or complemented characters) in string1.
*/
if (dflag) {
if (isstring2)
usage();
- setup(string1, argv[0], &s1, cflag);
+ setup(string1, argv[0], &s1, cflag, Cflag);
while ((ch = getchar()) != EOF)
if (!string1[ch])
@@ -179,11 +189,11 @@
}
/*
- * tr -s [-c] string1
+ * tr -s [-C|-c] string1
* Squeeze all characters (or complemented characters) in string1.
*/
if (sflag && !isstring2) {
- setup(string1, argv[0], &s1, cflag);
+ setup(string1, argv[0], &s1, cflag, Cflag);
for (lastch = OOBCH; (ch = getchar()) != EOF;)
if (!string1[ch] || lastch != ch) {
@@ -194,7 +204,7 @@
}
/*
- * tr [-cs] string1 string2
+ * tr [-C|-c] [-s] string1 string2
* Replace all characters (or complemented characters) in string1 with
* the character in the same position in string2. If the -s option is
* specified, squeeze all the characters in string2.
@@ -205,7 +215,7 @@
s1.str = argv[0];
s2.str = argv[1];
- if (cflag)
+ if (cflag || Cflag)
for (cnt = NCHARS, p = string1; cnt--;)
*p++ = OOBCH;
@@ -213,6 +223,7 @@
errx(1, "empty string2");
/* If string2 runs out of characters, use the last one specified. */
+ ch = s2.lastch;
if (sflag)
while (next(&s1)) {
string1[s1.lastch] = ch = s2.lastch;
@@ -225,9 +236,10 @@
(void)next(&s2);
}
- if (cflag)
+ if (cflag || Cflag)
for (cnt = 0, p = string1; cnt < NCHARS; ++p, ++cnt)
- *p = *p == OOBCH ? ch : cnt;
+ *p = (*p == OOBCH && (!Cflag || ISCHAR(cnt))) ?
+ ch : cnt;
if (sflag)
for (lastch = OOBCH; (ch = getchar()) != EOF;) {
@@ -244,30 +256,31 @@
}
static void
-setup(string, arg, str, cflag)
+setup(string, arg, str, cflag, Cflag)
int *string;
char *arg;
STR *str;
int cflag;
+ int Cflag;
{
- int cnt, *p;
+ int cnt;
str->str = arg;
bzero(string, NCHARS * sizeof(int));
while (next(str))
string[str->lastch] = 1;
- if (cflag)
- for (p = string, cnt = NCHARS; cnt--; ++p)
- *p = !*p;
+ if (cflag || Cflag)
+ for (cnt = 0; cnt < NCHARS; cnt++)
+ string[cnt] = !string[cnt] && (!Cflag || ISCHAR(cnt));
}
static void
usage()
{
(void)fprintf(stderr, "%s\n%s\n%s\n%s\n",
- "usage: tr [-csu] string1 string2",
- " tr [-cu] -d string1",
- " tr [-cu] -s string1",
- " tr [-cu] -ds string1 string2");
+ "usage: tr [-C|-c] [-su] string1 string2",
+ " tr -d [-C|-c] [-u] string1",
+ " tr -s [-C|-c] [-u] string1",
+ " tr -ds [-C|-c] [-u] string1 string2");
exit(1);
}
To Unsubscribe: send mail to majordomo@FreeBSD.org
with "unsubscribe freebsd-standards" in the body of the message
Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?20020208233747.A41428>
