From owner-svn-src-head@freebsd.org Sun Jul 17 09:40:00 2016 Return-Path: Delivered-To: svn-src-head@mailman.ysv.freebsd.org Received: from mx1.freebsd.org (mx1.freebsd.org [IPv6:2001:1900:2254:206a::19:1]) by mailman.ysv.freebsd.org (Postfix) with ESMTP id 7BCF9B9AC86; Sun, 17 Jul 2016 09:40:00 +0000 (UTC) (envelope-from ache@FreeBSD.org) Received: from repo.freebsd.org (repo.freebsd.org [IPv6:2610:1c1:1:6068::e6a:0]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (Client did not present a certificate) by mx1.freebsd.org (Postfix) with ESMTPS id 3A71C1315; Sun, 17 Jul 2016 09:40:00 +0000 (UTC) (envelope-from ache@FreeBSD.org) Received: from repo.freebsd.org ([127.0.1.37]) by repo.freebsd.org (8.15.2/8.15.2) with ESMTP id u6H9dxOO034666; Sun, 17 Jul 2016 09:39:59 GMT (envelope-from ache@FreeBSD.org) Received: (from ache@localhost) by repo.freebsd.org (8.15.2/8.15.2/Submit) id u6H9dxtm034665; Sun, 17 Jul 2016 09:39:59 GMT (envelope-from ache@FreeBSD.org) Message-Id: <201607170939.u6H9dxtm034665@repo.freebsd.org> X-Authentication-Warning: repo.freebsd.org: ache set sender to ache@FreeBSD.org using -f From: "Andrey A. Chernov" Date: Sun, 17 Jul 2016 09:39:59 +0000 (UTC) To: src-committers@freebsd.org, svn-src-all@freebsd.org, svn-src-head@freebsd.org Subject: svn commit: r302943 - head/lib/libc/gen X-SVN-Group: head MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit X-BeenThere: svn-src-head@freebsd.org X-Mailman-Version: 2.1.22 Precedence: list List-Id: SVN commit messages for the src tree for head/-current List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , X-List-Received-Date: Sun, 17 Jul 2016 09:40:00 -0000 Author: ache Date: Sun Jul 17 09:39:59 2016 New Revision: 302943 URL: https://svnweb.freebsd.org/changeset/base/302943 Log: 1) This file full of direct char <-> wchar_t assignment, not converted, cut them down. This hack still remains: * 2. Illegal byte sequences in filenames are handled by treating them as * single-byte characters with a values of such bytes of the sequence * cast to wchar_t. 2) Reword the comment in the hack above to reflect implementation. 3) Protect signed wchar_t from sign extension when a signed char is assigned to it in the hack above. 3) Corresponding backward hack in g_Ctoc() was not implemented, so all pathes with illegal byte sequences are skipped as result, implement it now. 4) globtilde() forget to convert expanded user home dir from multibyte to wchar. 5) Protect globtilde() from long expansion truncation. 6) Results was not sorted according to collate as POSIX requires. Modified: head/lib/libc/gen/glob.c Modified: head/lib/libc/gen/glob.c ============================================================================== --- head/lib/libc/gen/glob.c Sun Jul 17 08:31:21 2016 (r302942) +++ head/lib/libc/gen/glob.c Sun Jul 17 09:39:59 2016 (r302943) @@ -71,7 +71,7 @@ __FBSDID("$FreeBSD$"); * 1. Patterns with illegal byte sequences match nothing - even if * GLOB_NOCHECK is specified. * 2. Illegal byte sequences in filenames are handled by treating them as - * single-byte characters with a value of the first byte of the sequence + * single-byte characters with a values of such bytes of the sequence * cast to wchar_t. * 3. State-dependent encodings are not currently supported. */ @@ -113,23 +113,21 @@ struct glob_limit { size_t l_string_cnt; }; -#define DOLLAR '$' -#define DOT '.' -#define EOS '\0' -#define LBRACKET '[' -#define NOT '!' -#define QUESTION '?' -#define QUOTE '\\' -#define RANGE '-' -#define RBRACKET ']' -#define SEP '/' -#define STAR '*' -#define TILDE '~' -#define UNDERSCORE '_' -#define LBRACE '{' -#define RBRACE '}' -#define SLASH '/' -#define COMMA ',' +#define DOT L'.' +#define EOS L'\0' +#define LBRACKET L'[' +#define NOT L'!' +#define QUESTION L'?' +#define QUOTE L'\\' +#define RANGE L'-' +#define RBRACKET L']' +#define SEP L'/' +#define STAR L'*' +#define TILDE L'~' +#define LBRACE L'{' +#define RBRACE L'}' +#define SLASH L'/' +#define COMMA L',' #ifndef DEBUG @@ -154,12 +152,12 @@ typedef char Char; #define CHAR(c) ((Char)((c)&M_CHAR)) #define META(c) ((Char)((c)|M_QUOTE)) -#define M_ALL META('*') -#define M_END META(']') -#define M_NOT META('!') -#define M_ONE META('?') -#define M_RNG META('-') -#define M_SET META('[') +#define M_ALL META(L'*') +#define M_END META(L']') +#define M_NOT META(L'!') +#define M_ONE META(L'?') +#define M_RNG META(L'-') +#define M_SET META(L'[') #define ismeta(c) (((c)&M_QUOTE) != 0) @@ -233,8 +231,8 @@ glob(const char * __restrict pattern, in /* Protect the quoted characters. */ memset(&mbs, 0, sizeof(mbs)); while (bufend - bufnext >= MB_CUR_MAX) { - if (*patnext == QUOTE) { - if (*++patnext == EOS) { + if (*patnext == '\\') { + if (*++patnext == '\0') { *bufnext++ = QUOTE | M_PROTECT; continue; } @@ -401,9 +399,15 @@ static const Char * globtilde(const Char *pattern, Char *patbuf, size_t patbuf_len, glob_t *pglob) { struct passwd *pwd; - char *h; + char *h, *sc; const Char *p; Char *b, *eb; + wchar_t wc; + wchar_t wbuf[MAXPATHLEN]; + wchar_t *wbufend, *dc; + size_t clen; + mbstate_t mbs; + int too_long; if (*pattern != TILDE || !(pglob->gl_flags & GLOB_TILDE)) return (pattern); @@ -412,13 +416,17 @@ globtilde(const Char *pattern, Char *pat * Copy up to the end of the string or / */ eb = &patbuf[patbuf_len - 1]; - for (p = pattern + 1, h = (char *) patbuf; - h < (char *)eb && *p && *p != SLASH; *h++ = *p++) + for (p = pattern + 1, b = patbuf; + b < eb && *p != EOS && *p != SLASH; *b++ = *p++) continue; - *h = EOS; + if (*p != EOS && *p != SLASH) + return (pattern); + + *b = EOS; + h = NULL; - if (((char *) patbuf)[0] == EOS) { + if (patbuf[0] == EOS) { /* * handle a plain ~ or ~/ by expanding $HOME first (iff * we're not running setuid or setgid) and then trying @@ -438,20 +446,55 @@ globtilde(const Char *pattern, Char *pat /* * Expand a ~user */ - if ((pwd = getpwnam((char*) patbuf)) == NULL) + if (g_Ctoc(patbuf, (char *)wbuf, sizeof(wbuf)) || + (pwd = getpwnam((char *)wbuf)) == NULL) return (pattern); else h = pwd->pw_dir; } /* Copy the home directory */ - for (b = patbuf; b < eb && *h; *b++ = *h++) + dc = wbuf; + sc = h; + wbufend = wbuf + MAXPATHLEN - 1; + too_long = 1; + memset(&mbs, 0, sizeof(mbs)); + while (dc <= wbufend) { + clen = mbrtowc(&wc, sc, MB_LEN_MAX, &mbs); + if (clen == (size_t)-1 || clen == (size_t)-2) { + /* XXX See initial comment #2. */ + wc = (unsigned char)*sc; + clen = 1; + memset(&mbs, 0, sizeof(mbs)); + } + if ((*dc++ = wc) == EOS) { + too_long = 0; + break; + } + sc += clen; + } + if (too_long) + return (pattern); + + dc = wbuf; + for (b = patbuf; b < eb && *dc != EOS; *b++ = *dc++) continue; + if (*dc != EOS) + return (pattern); /* Append the rest of the pattern */ - while (b < eb && (*b++ = *p++) != EOS) - continue; - *b = EOS; + if (*p != EOS) { + too_long = 1; + while (b <= eb) { + if ((*b++ = *p++) == EOS) { + too_long = 0; + break; + } + } + if (too_long) + return (pattern); + } else + *b = EOS; return (patbuf); } @@ -553,7 +596,7 @@ glob0(const Char *pattern, glob_t *pglob static int compare(const void *p, const void *q) { - return (strcmp(*(char **)p, *(char **)q)); + return (strcoll(*(char **)p, *(char **)q)); } static int @@ -699,7 +742,7 @@ glob3(Char *pathbuf, Char *pathend, Char } /* Initial DOT must be matched literally. */ - if (dp->d_name[0] == DOT && *pattern != DOT) + if (dp->d_name[0] == '.' && *pattern != DOT) continue; memset(&mbs, 0, sizeof(mbs)); dc = pathend; @@ -707,7 +750,8 @@ glob3(Char *pathbuf, Char *pathend, Char while (dc < pathend_last) { clen = mbrtowc(&wc, sc, MB_LEN_MAX, &mbs); if (clen == (size_t)-1 || clen == (size_t)-2) { - wc = *sc; + /* XXX See initial comment #2. */ + wc = (unsigned char)*sc; clen = 1; memset(&mbs, 0, sizeof(mbs)); } @@ -831,10 +875,12 @@ match(Char *name, Char *pat, Char *paten while (((c = *pat++) & M_MASK) != M_END) if ((*pat & M_MASK) == M_RNG) { if (table->__collate_load_error ? - CHAR(c) <= CHAR(k) && CHAR(k) <= CHAR(pat[1]) : - __wcollate_range_cmp(CHAR(c), CHAR(k)) <= 0 - && __wcollate_range_cmp(CHAR(k), CHAR(pat[1])) <= 0 - ) + CHAR(c) <= CHAR(k) && + CHAR(k) <= CHAR(pat[1]) : + __wcollate_range_cmp(CHAR(c), + CHAR(k)) <= 0 && + __wcollate_range_cmp(CHAR(k), + CHAR(pat[1])) <= 0) ok = 1; pat += 2; } else if (c == k) @@ -873,7 +919,7 @@ g_opendir(Char *str, glob_t *pglob) { char buf[MAXPATHLEN]; - if (!*str) + if (*str == EOS) strcpy(buf, "."); else { if (g_Ctoc(str, buf, sizeof(buf))) @@ -934,9 +980,13 @@ g_Ctoc(const Char *str, char *buf, size_ memset(&mbs, 0, sizeof(mbs)); while (len >= MB_CUR_MAX) { clen = wcrtomb(buf, *str, &mbs); - if (clen == (size_t)-1) - return (1); - if (*str == L'\0') + if (clen == (size_t)-1) { + /* XXX See initial comment #2. */ + *buf = (char)*str; + clen = 1; + memset(&mbs, 0, sizeof(mbs)); + } + if (*buf == '\0') return (0); str++; buf += clen;