Skip site navigation (1)Skip section navigation (2)
Date:      Sun, 10 Jul 2016 03:49:38 +0000 (UTC)
From:      "Andrey A. Chernov" <ache@FreeBSD.org>
To:        src-committers@freebsd.org, svn-src-all@freebsd.org, svn-src-head@freebsd.org
Subject:   svn commit: r302512 - in head/lib/libc: gen locale regex stdio
Message-ID:  <201607100349.u6A3ncUC050491@repo.freebsd.org>

next in thread | raw e-mail | index | archive | help
Author: ache
Date: Sun Jul 10 03:49:38 2016
New Revision: 302512
URL: https://svnweb.freebsd.org/changeset/base/302512

Log:
  Remove broken support for collation in [a-z] type ranges.
  Only first 256 wide chars are considered currently, all other are just
  dropped from the range. Proper implementation require reverse tables
  database lookup, since objects are really big as max UTF-8 (1114112
  code points), so just the same scanning as it was for 256 chars will
  slow things down.
  
  POSIX does not require collation for [a-z] type ranges and does not
  prohibit it for non-POSIX locales. POSIX require collation for ranges
  only for POSIX (or C) locale which is equal to ASCII and binary for
  other chars, so we already have it.
  
  No other *BSD implements collation for [a-z] type ranges.
  
  Restore ABI compatibility with unused now __collate_range_cmp() which
  is visible from outside (will be removed later).

Modified:
  head/lib/libc/gen/fnmatch.c
  head/lib/libc/gen/glob.c
  head/lib/libc/locale/collate.h
  head/lib/libc/locale/collcmp.c
  head/lib/libc/regex/regcomp.c
  head/lib/libc/stdio/vfscanf.c

Modified: head/lib/libc/gen/fnmatch.c
==============================================================================
--- head/lib/libc/gen/fnmatch.c	Sun Jul 10 02:46:20 2016	(r302511)
+++ head/lib/libc/gen/fnmatch.c	Sun Jul 10 03:49:38 2016	(r302512)
@@ -63,8 +63,6 @@ __FBSDID("$FreeBSD$");
 #include <wchar.h>
 #include <wctype.h>
 
-#include "collate.h"
-
 #define	EOS	'\0'
 
 #define RANGE_MATCH     1
@@ -238,8 +236,6 @@ rangematch(const char *pattern, wchar_t 
 	wchar_t c, c2;
 	size_t pclen;
 	const char *origpat;
-	struct xlocale_collate *table =
-		(struct xlocale_collate*)__get_locale()->components[XLC_COLLATE];
 
 	/*
 	 * A bracket expression starting with an unquoted circumflex
@@ -294,11 +290,7 @@ rangematch(const char *pattern, wchar_t 
 			if (flags & FNM_CASEFOLD)
 				c2 = towlower(c2);
 
-			if (table->__collate_load_error ?
-			    c <= test && test <= c2 :
-			       __wcollate_range_cmp(table, c, test) <= 0
-			    && __wcollate_range_cmp(table, test, c2) <= 0
-			   )
+			if (c <= test && test <= c2)
 				ok = 1;
 		} else if (c == test)
 			ok = 1;

Modified: head/lib/libc/gen/glob.c
==============================================================================
--- head/lib/libc/gen/glob.c	Sun Jul 10 02:46:20 2016	(r302511)
+++ head/lib/libc/gen/glob.c	Sun Jul 10 03:49:38 2016	(r302512)
@@ -92,8 +92,6 @@ __FBSDID("$FreeBSD$");
 #include <unistd.h>
 #include <wchar.h>
 
-#include "collate.h"
-
 /*
  * glob(3) expansion limits. Stop the expansion if any of these limits
  * is reached. This caps the runtime in the face of DoS attacks. See
@@ -804,8 +802,6 @@ match(Char *name, Char *pat, Char *paten
 {
 	int ok, negate_range;
 	Char c, k;
-	struct xlocale_collate *table =
-		(struct xlocale_collate*)__get_locale()->components[XLC_COLLATE];
 
 	while (pat < patend) {
 		c = *pat++;
@@ -830,11 +826,7 @@ match(Char *name, Char *pat, Char *paten
 				++pat;
 			while (((c = *pat++) & M_MASK) != M_END)
 				if ((*pat & M_MASK) == M_RNG) {
-					if (table->__collate_load_error ?
-					    CHAR(c) <= CHAR(k) && CHAR(k) <= CHAR(pat[1]) :
-					       __wcollate_range_cmp(table, CHAR(c), CHAR(k)) <= 0
-					    && __wcollate_range_cmp(table, CHAR(k), CHAR(pat[1])) <= 0
-					   )
+					if (CHAR(c) <= CHAR(k) && CHAR(k) <= CHAR(pat[1]))
 						ok = 1;
 					pat += 2;
 				} else if (c == k)

Modified: head/lib/libc/locale/collate.h
==============================================================================
--- head/lib/libc/locale/collate.h	Sun Jul 10 02:46:20 2016	(r302511)
+++ head/lib/libc/locale/collate.h	Sun Jul 10 03:49:38 2016	(r302512)
@@ -128,8 +128,7 @@ int	__collate_load_tables(const char *);
 int	__collate_equiv_value(locale_t, const wchar_t *, size_t);
 void	_collate_lookup(struct xlocale_collate *,const wchar_t *, int *, int *,
 	int, const int **);
-int	__collate_range_cmp(struct xlocale_collate *, char, char);
-int	__wcollate_range_cmp(struct xlocale_collate *, wchar_t, wchar_t);
+int	__collate_range_cmp(int, int);
 size_t	_collate_wxfrm(struct xlocale_collate *, const wchar_t *, wchar_t *,
 	size_t);
 size_t	_collate_sxfrm(struct xlocale_collate *, const wchar_t *, char *,

Modified: head/lib/libc/locale/collcmp.c
==============================================================================
--- head/lib/libc/locale/collcmp.c	Sun Jul 10 02:46:20 2016	(r302511)
+++ head/lib/libc/locale/collcmp.c	Sun Jul 10 03:49:38 2016	(r302512)
@@ -33,15 +33,13 @@
 __FBSDID("$FreeBSD$");
 
 #include <string.h>
-#include <wchar.h>
-#include <xlocale.h>
 #include "collate.h"
 
 /*
  * Compare two characters using collate
  */
 
-int __collate_range_cmp(struct xlocale_collate *table, char c1, char c2)
+int __collate_range_cmp(int c1, int c2)
 {
 	char s1[2], s2[2];
 
@@ -49,20 +47,5 @@ int __collate_range_cmp(struct xlocale_c
 	s1[1] = '\0';
 	s2[0] = c2;
 	s2[1] = '\0';
-	struct _xlocale l = {{0}};
-	l.components[XLC_COLLATE] = (struct xlocale_component *)table;
-	return (strcoll_l(s1, s2, &l));
-}
-
-int __wcollate_range_cmp(struct xlocale_collate *table, wchar_t c1, wchar_t c2)
-{
-	wchar_t s1[2], s2[2];
-
-	s1[0] = c1;
-	s1[1] = L'\0';
-	s2[0] = c2;
-	s2[1] = L'\0';
-	struct _xlocale l = {{0}};
-	l.components[XLC_COLLATE] = (struct xlocale_component *)table;
-	return (wcscoll_l(s1, s2, &l));
+	return (strcoll(s1, s2));
 }

Modified: head/lib/libc/regex/regcomp.c
==============================================================================
--- head/lib/libc/regex/regcomp.c	Sun Jul 10 02:46:20 2016	(r302511)
+++ head/lib/libc/regex/regcomp.c	Sun Jul 10 03:49:38 2016	(r302512)
@@ -51,12 +51,9 @@ __FBSDID("$FreeBSD$");
 #include <limits.h>
 #include <stdlib.h>
 #include <regex.h>
-#include <runetype.h>
 #include <wchar.h>
 #include <wctype.h>
 
-#include "collate.h"
-
 #include "utils.h"
 #include "regex2.h"
 
@@ -767,9 +764,6 @@ p_b_term(struct parse *p, cset *cs)
 {
 	char c;
 	wint_t start, finish;
-	wint_t i;
-	struct xlocale_collate *table =
-		(struct xlocale_collate*)__get_locale()->components[XLC_COLLATE];
 
 	/* classify what we've got */
 	switch ((MORE()) ? PEEK() : '\0') {
@@ -817,18 +811,8 @@ p_b_term(struct parse *p, cset *cs)
 		if (start == finish)
 			CHadd(p, cs, start);
 		else {
-			if (table->__collate_load_error) {
-				(void)REQUIRE((uch)start <= (uch)finish, REG_ERANGE);
-				CHaddrange(p, cs, start, finish);
-			} else {
-				(void)REQUIRE(__wcollate_range_cmp(table, start, finish) <= 0, REG_ERANGE);
-				for (i = 0; i <= UCHAR_MAX; i++) {
-					if (   __wcollate_range_cmp(table, start, i) <= 0
-					    && __wcollate_range_cmp(table, i, finish) <= 0
-					   )
-						CHadd(p, cs, i);
-				}
-			}
+			(void)REQUIRE(start <= finish, REG_ERANGE);
+			CHaddrange(p, cs, start, finish);
 		}
 		break;
 	}

Modified: head/lib/libc/stdio/vfscanf.c
==============================================================================
--- head/lib/libc/stdio/vfscanf.c	Sun Jul 10 02:46:20 2016	(r302511)
+++ head/lib/libc/stdio/vfscanf.c	Sun Jul 10 03:49:38 2016	(r302512)
@@ -53,7 +53,6 @@ __FBSDID("$FreeBSD$");
 #include <wctype.h>
 #include "un-namespace.h"
 
-#include "collate.h"
 #include "libc_private.h"
 #include "local.h"
 #include "xlocale_private.h"
@@ -816,9 +815,7 @@ match_failure:
 static const u_char *
 __sccl(char *tab, const u_char *fmt)
 {
-	int c, n, v, i;
-	struct xlocale_collate *table =
-		(struct xlocale_collate*)__get_locale()->components[XLC_COLLATE];
+	int c, n, v;
 
 	/* first `clear' the whole table */
 	c = *fmt++;		/* first char hat => negated scanset */
@@ -871,29 +868,15 @@ doswitch:
 			 * we just stored in the table (c).
 			 */
 			n = *fmt;
-			if (n == ']'
-			    || (table->__collate_load_error ? n < c :
-				__wcollate_range_cmp(table, n, c) < 0
-			       )
-			   ) {
+			if (n == ']' || n < c) {
 				c = '-';
 				break;	/* resume the for(;;) */
 			}
 			fmt++;
-			/* fill in the range */
-			if (table->__collate_load_error) {
-				do {
-					tab[++c] = v;
-				} while (c < n);
-			} else {
-				for (i = 0; i < 256; i ++)
-					if (__wcollate_range_cmp(table, c, i) < 0 &&
-					    __wcollate_range_cmp(table, i, n) <= 0
-					   )
-						tab[i] = v;
-			}
+			do {		/* fill in the range */
+				tab[++c] = v;
+			} while (c < n);
 #if 1	/* XXX another disgusting compatibility hack */
-			c = n;
 			/*
 			 * Alas, the V7 Unix scanf also treats formats
 			 * such as [a-c-e] as `the letters a through e'.



Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?201607100349.u6A3ncUC050491>