Date: Sun, 10 Jul 2016 03:49:38 +0000 (UTC) From: "Andrey A. Chernov" <ache@FreeBSD.org> To: src-committers@freebsd.org, svn-src-all@freebsd.org, svn-src-head@freebsd.org Subject: svn commit: r302512 - in head/lib/libc: gen locale regex stdio Message-ID: <201607100349.u6A3ncUC050491@repo.freebsd.org>
next in thread | raw e-mail | index | archive | help
Author: ache Date: Sun Jul 10 03:49:38 2016 New Revision: 302512 URL: https://svnweb.freebsd.org/changeset/base/302512 Log: Remove broken support for collation in [a-z] type ranges. Only first 256 wide chars are considered currently, all other are just dropped from the range. Proper implementation require reverse tables database lookup, since objects are really big as max UTF-8 (1114112 code points), so just the same scanning as it was for 256 chars will slow things down. POSIX does not require collation for [a-z] type ranges and does not prohibit it for non-POSIX locales. POSIX require collation for ranges only for POSIX (or C) locale which is equal to ASCII and binary for other chars, so we already have it. No other *BSD implements collation for [a-z] type ranges. Restore ABI compatibility with unused now __collate_range_cmp() which is visible from outside (will be removed later). Modified: head/lib/libc/gen/fnmatch.c head/lib/libc/gen/glob.c head/lib/libc/locale/collate.h head/lib/libc/locale/collcmp.c head/lib/libc/regex/regcomp.c head/lib/libc/stdio/vfscanf.c Modified: head/lib/libc/gen/fnmatch.c ============================================================================== --- head/lib/libc/gen/fnmatch.c Sun Jul 10 02:46:20 2016 (r302511) +++ head/lib/libc/gen/fnmatch.c Sun Jul 10 03:49:38 2016 (r302512) @@ -63,8 +63,6 @@ __FBSDID("$FreeBSD$"); #include <wchar.h> #include <wctype.h> -#include "collate.h" - #define EOS '\0' #define RANGE_MATCH 1 @@ -238,8 +236,6 @@ rangematch(const char *pattern, wchar_t wchar_t c, c2; size_t pclen; const char *origpat; - struct xlocale_collate *table = - (struct xlocale_collate*)__get_locale()->components[XLC_COLLATE]; /* * A bracket expression starting with an unquoted circumflex @@ -294,11 +290,7 @@ rangematch(const char *pattern, wchar_t if (flags & FNM_CASEFOLD) c2 = towlower(c2); - if (table->__collate_load_error ? - c <= test && test <= c2 : - __wcollate_range_cmp(table, c, test) <= 0 - && __wcollate_range_cmp(table, test, c2) <= 0 - ) + if (c <= test && test <= c2) ok = 1; } else if (c == test) ok = 1; Modified: head/lib/libc/gen/glob.c ============================================================================== --- head/lib/libc/gen/glob.c Sun Jul 10 02:46:20 2016 (r302511) +++ head/lib/libc/gen/glob.c Sun Jul 10 03:49:38 2016 (r302512) @@ -92,8 +92,6 @@ __FBSDID("$FreeBSD$"); #include <unistd.h> #include <wchar.h> -#include "collate.h" - /* * glob(3) expansion limits. Stop the expansion if any of these limits * is reached. This caps the runtime in the face of DoS attacks. See @@ -804,8 +802,6 @@ match(Char *name, Char *pat, Char *paten { int ok, negate_range; Char c, k; - struct xlocale_collate *table = - (struct xlocale_collate*)__get_locale()->components[XLC_COLLATE]; while (pat < patend) { c = *pat++; @@ -830,11 +826,7 @@ match(Char *name, Char *pat, Char *paten ++pat; while (((c = *pat++) & M_MASK) != M_END) if ((*pat & M_MASK) == M_RNG) { - if (table->__collate_load_error ? - CHAR(c) <= CHAR(k) && CHAR(k) <= CHAR(pat[1]) : - __wcollate_range_cmp(table, CHAR(c), CHAR(k)) <= 0 - && __wcollate_range_cmp(table, CHAR(k), CHAR(pat[1])) <= 0 - ) + if (CHAR(c) <= CHAR(k) && CHAR(k) <= CHAR(pat[1])) ok = 1; pat += 2; } else if (c == k) Modified: head/lib/libc/locale/collate.h ============================================================================== --- head/lib/libc/locale/collate.h Sun Jul 10 02:46:20 2016 (r302511) +++ head/lib/libc/locale/collate.h Sun Jul 10 03:49:38 2016 (r302512) @@ -128,8 +128,7 @@ int __collate_load_tables(const char *); int __collate_equiv_value(locale_t, const wchar_t *, size_t); void _collate_lookup(struct xlocale_collate *,const wchar_t *, int *, int *, int, const int **); -int __collate_range_cmp(struct xlocale_collate *, char, char); -int __wcollate_range_cmp(struct xlocale_collate *, wchar_t, wchar_t); +int __collate_range_cmp(int, int); size_t _collate_wxfrm(struct xlocale_collate *, const wchar_t *, wchar_t *, size_t); size_t _collate_sxfrm(struct xlocale_collate *, const wchar_t *, char *, Modified: head/lib/libc/locale/collcmp.c ============================================================================== --- head/lib/libc/locale/collcmp.c Sun Jul 10 02:46:20 2016 (r302511) +++ head/lib/libc/locale/collcmp.c Sun Jul 10 03:49:38 2016 (r302512) @@ -33,15 +33,13 @@ __FBSDID("$FreeBSD$"); #include <string.h> -#include <wchar.h> -#include <xlocale.h> #include "collate.h" /* * Compare two characters using collate */ -int __collate_range_cmp(struct xlocale_collate *table, char c1, char c2) +int __collate_range_cmp(int c1, int c2) { char s1[2], s2[2]; @@ -49,20 +47,5 @@ int __collate_range_cmp(struct xlocale_c s1[1] = '\0'; s2[0] = c2; s2[1] = '\0'; - struct _xlocale l = {{0}}; - l.components[XLC_COLLATE] = (struct xlocale_component *)table; - return (strcoll_l(s1, s2, &l)); -} - -int __wcollate_range_cmp(struct xlocale_collate *table, wchar_t c1, wchar_t c2) -{ - wchar_t s1[2], s2[2]; - - s1[0] = c1; - s1[1] = L'\0'; - s2[0] = c2; - s2[1] = L'\0'; - struct _xlocale l = {{0}}; - l.components[XLC_COLLATE] = (struct xlocale_component *)table; - return (wcscoll_l(s1, s2, &l)); + return (strcoll(s1, s2)); } Modified: head/lib/libc/regex/regcomp.c ============================================================================== --- head/lib/libc/regex/regcomp.c Sun Jul 10 02:46:20 2016 (r302511) +++ head/lib/libc/regex/regcomp.c Sun Jul 10 03:49:38 2016 (r302512) @@ -51,12 +51,9 @@ __FBSDID("$FreeBSD$"); #include <limits.h> #include <stdlib.h> #include <regex.h> -#include <runetype.h> #include <wchar.h> #include <wctype.h> -#include "collate.h" - #include "utils.h" #include "regex2.h" @@ -767,9 +764,6 @@ p_b_term(struct parse *p, cset *cs) { char c; wint_t start, finish; - wint_t i; - struct xlocale_collate *table = - (struct xlocale_collate*)__get_locale()->components[XLC_COLLATE]; /* classify what we've got */ switch ((MORE()) ? PEEK() : '\0') { @@ -817,18 +811,8 @@ p_b_term(struct parse *p, cset *cs) if (start == finish) CHadd(p, cs, start); else { - if (table->__collate_load_error) { - (void)REQUIRE((uch)start <= (uch)finish, REG_ERANGE); - CHaddrange(p, cs, start, finish); - } else { - (void)REQUIRE(__wcollate_range_cmp(table, start, finish) <= 0, REG_ERANGE); - for (i = 0; i <= UCHAR_MAX; i++) { - if ( __wcollate_range_cmp(table, start, i) <= 0 - && __wcollate_range_cmp(table, i, finish) <= 0 - ) - CHadd(p, cs, i); - } - } + (void)REQUIRE(start <= finish, REG_ERANGE); + CHaddrange(p, cs, start, finish); } break; } Modified: head/lib/libc/stdio/vfscanf.c ============================================================================== --- head/lib/libc/stdio/vfscanf.c Sun Jul 10 02:46:20 2016 (r302511) +++ head/lib/libc/stdio/vfscanf.c Sun Jul 10 03:49:38 2016 (r302512) @@ -53,7 +53,6 @@ __FBSDID("$FreeBSD$"); #include <wctype.h> #include "un-namespace.h" -#include "collate.h" #include "libc_private.h" #include "local.h" #include "xlocale_private.h" @@ -816,9 +815,7 @@ match_failure: static const u_char * __sccl(char *tab, const u_char *fmt) { - int c, n, v, i; - struct xlocale_collate *table = - (struct xlocale_collate*)__get_locale()->components[XLC_COLLATE]; + int c, n, v; /* first `clear' the whole table */ c = *fmt++; /* first char hat => negated scanset */ @@ -871,29 +868,15 @@ doswitch: * we just stored in the table (c). */ n = *fmt; - if (n == ']' - || (table->__collate_load_error ? n < c : - __wcollate_range_cmp(table, n, c) < 0 - ) - ) { + if (n == ']' || n < c) { c = '-'; break; /* resume the for(;;) */ } fmt++; - /* fill in the range */ - if (table->__collate_load_error) { - do { - tab[++c] = v; - } while (c < n); - } else { - for (i = 0; i < 256; i ++) - if (__wcollate_range_cmp(table, c, i) < 0 && - __wcollate_range_cmp(table, i, n) <= 0 - ) - tab[i] = v; - } + do { /* fill in the range */ + tab[++c] = v; + } while (c < n); #if 1 /* XXX another disgusting compatibility hack */ - c = n; /* * Alas, the V7 Unix scanf also treats formats * such as [a-c-e] as `the letters a through e'.
Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?201607100349.u6A3ncUC050491>