From owner-freebsd-i18n@FreeBSD.ORG Thu Jul 31 19:37:05 2003 Return-Path: Delivered-To: freebsd-i18n@freebsd.org Received: from mx1.FreeBSD.org (mx1.freebsd.org [216.136.204.125]) by hub.freebsd.org (Postfix) with ESMTP id 3661737B401; Thu, 31 Jul 2003 19:37:05 -0700 (PDT) Received: from nagual.pp.ru (pobrecita.freebsd.ru [194.87.13.42]) by mx1.FreeBSD.org (Postfix) with ESMTP id 366ED43F75; Thu, 31 Jul 2003 19:37:04 -0700 (PDT) (envelope-from ache@pobrecita.freebsd.ru) Received: from pobrecita.freebsd.ru (ache@localhost [127.0.0.1]) by nagual.pp.ru (8.12.9/8.12.9) with ESMTP id h712b3Yk023739; Fri, 1 Aug 2003 06:37:03 +0400 (MSD) (envelope-from ache@pobrecita.freebsd.ru) Received: (from ache@localhost) by pobrecita.freebsd.ru (8.12.9/8.12.9/Submit) id h712b3vN023738; Fri, 1 Aug 2003 06:37:03 +0400 (MSD) Date: Fri, 1 Aug 2003 06:37:03 +0400 From: Andrey Chernov To: current@freebsd.org Message-ID: <20030801023703.GA23702@nagual.pp.ru> References: <20030801004408.GA22054@nagual.pp.ru> Mime-Version: 1.0 Content-Type: text/plain; charset=us-ascii Content-Disposition: inline In-Reply-To: <20030801004408.GA22054@nagual.pp.ru> User-Agent: Mutt/1.5.4i cc: i18n@freebsd.org Subject: Revised version (was Re: Serious 'tr' bug, patch for review included) X-BeenThere: freebsd-i18n@freebsd.org X-Mailman-Version: 2.1.1 Precedence: list List-Id: FreeBSD Internationalization Effort List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , X-List-Received-Date: Fri, 01 Aug 2003 02:37:05 -0000 On Fri, Aug 01, 2003 at 04:44:08 +0400, Andrey Chernov wrote: > This patch address two problems. Revides patch version with accurate skipping. Surprisingly, the code is reduced. Only in .: CVS diff -u ./extern.h /usr/src/usr.bin/tr/extern.h --- ./extern.h Fri Jun 14 19:56:52 2002 +++ /usr/src/usr.bin/tr/extern.h Fri Aug 1 04:19:36 2003 @@ -40,7 +40,8 @@ typedef struct { enum { STRING1, STRING2 } which; - enum { EOS, INFINITE, NORMAL, RANGE, SEQUENCE, SET } state; + enum { EOS, INFINITE, NORMAL, RANGE, SEQUENCE, + SET, SET_UPPER, SET_LOWER } state; int cnt; /* character count */ int lastch; /* last character */ int equiv[NCHARS]; /* equivalence set */ @@ -49,3 +50,5 @@ } STR; int next(STR *); +int charcoll(const void *, const void *); + diff -u ./str.c /usr/src/usr.bin/tr/str.c --- ./str.c Fri Jul 5 13:28:13 2002 +++ /usr/src/usr.bin/tr/str.c Fri Aug 1 04:22:11 2003 @@ -106,6 +106,8 @@ } return (1); case SET: + case SET_UPPER: + case SET_LOWER: if ((s->lastch = s->set[s->cnt++]) == OOBCH) { s->state = NORMAL; return (next(s)); @@ -194,7 +196,7 @@ { int cnt, (*func)(int); CLASS *cp, tmp; - int *p; + int *p, n; tmp.name = s->str; if ((cp = (CLASS *)bsearch(&tmp, classes, sizeof(classes) / @@ -208,10 +210,18 @@ if ((func)(cnt)) *p++ = cnt; *p = OOBCH; + n = p - cp->set; s->cnt = 0; - s->state = SET; s->set = cp->set; + if (strcmp(s->str, "upper") == 0) + s->state = SET_UPPER; + else if (strcmp(s->str, "lower") == 0) { + s->state = SET_LOWER; + } else + s->state = SET; + if ((s->state == SET_LOWER || s->state == SET_UPPER) && n > 1) + mergesort(s->set, n, sizeof(*(s->set)), charcoll); } static int diff -u ./tr.c /usr/src/usr.bin/tr/tr.c --- ./tr.c Thu Sep 5 03:29:07 2002 +++ /usr/src/usr.bin/tr/tr.c Fri Aug 1 06:30:24 2003 @@ -101,8 +101,9 @@ STR s1 = { STRING1, NORMAL, 0, OOBCH, { 0, OOBCH }, NULL, NULL }; STR s2 = { STRING2, NORMAL, 0, OOBCH, { 0, OOBCH }, NULL, NULL }; -static int charcoll(const void *, const void *); static void setup(int *, char *, STR *, int, int); +static void process_upper(int); +static void process_lower(int); static void usage(void); int @@ -224,20 +225,47 @@ if (!next(&s2)) errx(1, "empty string2"); - ch = s2.lastch; /* If string2 runs out of characters, use the last one specified. */ - if (sflag) - while (next(&s1)) { - string1[s1.lastch] = ch = s2.lastch; - string2[ch] = 1; - (void)next(&s2); - } - else - while (next(&s1)) { - string1[s1.lastch] = ch = s2.lastch; - (void)next(&s2); + while (next(&s1)) { + again: + if (s1.state == SET_LOWER && + s2.state == SET_UPPER && + s1.cnt == 1 && s2.cnt == 1) { + do { + if (!next(&s1)) { + process_upper(sflag); + goto endloop; + } + } while (s1.state == SET_LOWER && s1.cnt > 1); + do { + if (!next(&s2)) + break; + } while (s2.state == SET_UPPER && s2.cnt > 1); + process_upper(sflag); + goto again; + } else if (s1.state == SET_UPPER && + s2.state == SET_LOWER && + s1.cnt == 1 && s2.cnt == 1) { + do { + if (!next(&s1)) { + process_lower(sflag); + goto endloop; + } + } while (s1.state == SET_UPPER && s1.cnt > 1); + do { + if (!next(&s2)) + break; + } while (s2.state == SET_LOWER && s2.cnt > 1); + process_lower(sflag); + goto again; + } else { + string1[s1.lastch] = s2.lastch; + if (sflag) + string2[s2.lastch] = 1; } - + (void)next(&s2); + } +endloop: if (cflag || Cflag) { s2.str = argv[1]; s2.state = NORMAL; @@ -294,15 +322,59 @@ string[cnt] = !string[cnt] && ISCHAR(cnt); } -static int +int charcoll(const void *a, const void *b) { - char sa[2], sb[2]; + static char sa[2], sb[2]; + int r; sa[0] = *(const int *)a; sb[0] = *(const int *)b; - sa[1] = sb[1] = '\0'; - return (strcoll(sa, sb)); + r = strcoll(sa, sb); + if (r == 0) + r = *(const int *)a - *(const int *)b; + return (r); +} + + +/* + * For -s result will contain only those characters defined + * as the second characters in each of the toupper or tolower + * pairs. + */ + +static void +process_upper(int sflag) +{ + int cnt, ch; + + for (cnt = 0; cnt < NCHARS; cnt++) { + ch = string1[cnt]; + if (ch == OOBCH) /* [Cc]flag */ + ch = cnt; + if (islower(ch)) { + string1[cnt] = ch = toupper(ch); + if (sflag && isupper(ch)) + string2[ch] = 1; + } + } +} + +static void +process_lower(int sflag) +{ + int cnt, ch; + + for (cnt = 0; cnt < NCHARS; cnt++) { + ch = string1[cnt]; + if (ch == OOBCH) /* [Cc]flag */ + ch = cnt; + if (isupper(ch)) { + string1[cnt] = ch = tolower(ch); + if (sflag && islower(ch)) + string2[ch] = 1; + } + } } static void