Skip site navigation (1)Skip section navigation (2)
Date:      Fri, 30 Jul 2021 23:34:17 GMT
From:      Warner Losh <imp@FreeBSD.org>
To:        src-committers@FreeBSD.org, dev-commits-src-all@FreeBSD.org, dev-commits-src-branches@FreeBSD.org
Subject:   git: da30af61b5af - stable/13 - awk: revert to upstream behavior for ranges for gawk compatibility
Message-ID:  <202107302334.16UNYHmV084243@gitrepo.freebsd.org>

next in thread | raw e-mail | index | archive | help
The branch stable/13 has been updated by imp:

URL: https://cgit.FreeBSD.org/src/commit/?id=da30af61b5af3353a0e2156fc027dd2fe38cc753

commit da30af61b5af3353a0e2156fc027dd2fe38cc753
Author:     Warner Losh <imp@FreeBSD.org>
AuthorDate: 2021-07-22 02:22:43 +0000
Commit:     Warner Losh <imp@FreeBSD.org>
CommitDate: 2021-07-30 23:02:12 +0000

    awk: revert to upstream behavior for ranges for gawk compatibility
    
    In 2005, FreeBSD changed one-true-awk to honor the locale's collating
    order. This was billed as a temporary patch. It was also compatible with
    the then-current behavior of gawk. That temporary patch has lasted 16
    years now.
    
    However, IEEE Std 1003.1-2008 changed the behaivor of ranges in regular
    expressions outside of the "C" and "POSIX" locales to be undefined.
    
    Starting in 2011, gawk 4.0 stopped using the locale for the range
    regular expressions and used the traditional behavior only. The
    maintainer had grown weary of answering why '[A-Z]' would sometimes
    match lower-case expressions. The details about are explained here:
    https://www.gnu.org/software/gawk/manual/html_node/Ranges-and-Locales.html
    
    To restore compatibility with other implementaitons of awk, revert this
    patch. FreeBSD is the odd-system out. It also has the nice side effect
    of eliminating the last of our differences with upstream one-true-awk.
    
    Reviewed by:            cy, rgrimes
    MFC After:              2 weeks
    Sponsored by:           Netflix
    Differential Revision:  https://reviews.freebsd.org/D31114
    
    (cherry picked from commit 628bd30ab5a492839b84dbf2aaf25848dae3866a)
---
 contrib/one-true-awk/b.c    | 22 ++++------------------
 contrib/one-true-awk/main.c |  1 -
 contrib/one-true-awk/run.c  |  2 +-
 3 files changed, 5 insertions(+), 20 deletions(-)

diff --git a/contrib/one-true-awk/b.c b/contrib/one-true-awk/b.c
index dd64f09647b0..01119470de05 100644
--- a/contrib/one-true-awk/b.c
+++ b/contrib/one-true-awk/b.c
@@ -361,20 +361,9 @@ int quoted(const uschar **pp)	/* pick up next thing after a \\ */
 	return c;
 }
 
-static int collate_range_cmp(int a, int b)
-{
-	static char s[2][2];
-
-	if ((uschar)a == (uschar)b)
-		return 0;
-	s[0][0] = a;
-	s[1][0] = b;
-	return (strcoll(s[0], s[1]));
-}
-
 char *cclenter(const char *argp)	/* add a character class */
 {
-	int i, c, c2, j;
+	int i, c, c2;
 	const uschar *op, *p = (const uschar *) argp;
 	uschar *bp;
 	static uschar *buf = NULL;
@@ -393,18 +382,15 @@ char *cclenter(const char *argp)	/* add a character class */
 				c2 = *p++;
 				if (c2 == '\\')
 					c2 = quoted(&p);
-				if (collate_range_cmp(c, c2) > 0) {
+				if (c > c2) {	/* empty; ignore */
 					bp--;
 					i--;
 					continue;
 				}
-				for (j = 0; j < NCHARS; j++) {
-					if ((collate_range_cmp(c, j) > 0) ||
-					    collate_range_cmp(j, c2) > 0)
-						continue;
+				while (c < c2) {
 					if (!adjbuf((char **) &buf, &bufsz, bp-buf+2, 100, (char **) &bp, "cclenter1"))
 						FATAL("out of space for character class [%.10s...] 2", p);
-					*bp++ = j;
+					*bp++ = ++c;
 					i++;
 				}
 				continue;
diff --git a/contrib/one-true-awk/main.c b/contrib/one-true-awk/main.c
index cfe3fafaf7fe..95eeba00d9ca 100644
--- a/contrib/one-true-awk/main.c
+++ b/contrib/one-true-awk/main.c
@@ -117,7 +117,6 @@ int main(int argc, char *argv[])
 	char *fn, *vn;
 
 	setlocale(LC_CTYPE, "");
-	setlocale(LC_COLLATE, "");
 	setlocale(LC_NUMERIC, "C"); /* for parsing cmdline & prog */
 	cmdname = argv[0];
 	if (argc == 1) {
diff --git a/contrib/one-true-awk/run.c b/contrib/one-true-awk/run.c
index 9752cd773630..3342eb274e3a 100644
--- a/contrib/one-true-awk/run.c
+++ b/contrib/one-true-awk/run.c
@@ -665,7 +665,7 @@ Cell *relop(Node **a, int n)	/* a[0 < a[1], etc. */
 		j = x->fval - y->fval;
 		i = j<0? -1: (j>0? 1: 0);
 	} else {
-		i = strcoll(getsval(x), getsval(y));
+		i = strcmp(getsval(x), getsval(y));
 	}
 	tempfree(x);
 	tempfree(y);



Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?202107302334.16UNYHmV084243>