From owner-svn-src-projects@freebsd.org Sat Aug 8 18:22:19 2015 Return-Path: Delivered-To: svn-src-projects@mailman.ysv.freebsd.org Received: from mx1.freebsd.org (mx1.freebsd.org [IPv6:2001:1900:2254:206a::19:1]) by mailman.ysv.freebsd.org (Postfix) with ESMTP id 695869B5D10 for ; Sat, 8 Aug 2015 18:22:19 +0000 (UTC) (envelope-from bapt@FreeBSD.org) Received: from repo.freebsd.org (repo.freebsd.org [IPv6:2001:1900:2254:2068::e6a:0]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (Client did not present a certificate) by mx1.freebsd.org (Postfix) with ESMTPS id 59386988; Sat, 8 Aug 2015 18:22:19 +0000 (UTC) (envelope-from bapt@FreeBSD.org) Received: from repo.freebsd.org ([127.0.1.70]) by repo.freebsd.org (8.14.9/8.14.9) with ESMTP id t78IMJn5085111; Sat, 8 Aug 2015 18:22:19 GMT (envelope-from bapt@FreeBSD.org) Received: (from bapt@localhost) by repo.freebsd.org (8.14.9/8.14.9/Submit) id t78IMFfS085095; Sat, 8 Aug 2015 18:22:15 GMT (envelope-from bapt@FreeBSD.org) Message-Id: <201508081822.t78IMFfS085095@repo.freebsd.org> X-Authentication-Warning: repo.freebsd.org: bapt set sender to bapt@FreeBSD.org using -f From: Baptiste Daroussin Date: Sat, 8 Aug 2015 18:22:15 +0000 (UTC) To: src-committers@freebsd.org, svn-src-projects@freebsd.org Subject: svn commit: r286459 - projects/collation/lib/libc/locale X-SVN-Group: projects MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit X-BeenThere: svn-src-projects@freebsd.org X-Mailman-Version: 2.1.20 Precedence: list List-Id: "SVN commit messages for the src " projects" tree" List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , X-List-Received-Date: Sat, 08 Aug 2015 18:22:19 -0000 Author: bapt Date: Sat Aug 8 18:22:14 2015 New Revision: 286459 URL: https://svnweb.freebsd.org/changeset/base/286459 Log: Revamp CTYPE support (from Illumos & Dragonfly) Obtained from: Dragonfly Deleted: projects/collation/lib/libc/locale/ascii.c Modified: projects/collation/lib/libc/locale/Makefile.inc projects/collation/lib/libc/locale/big5.c projects/collation/lib/libc/locale/collate.c projects/collation/lib/libc/locale/collate.h projects/collation/lib/libc/locale/euc.c projects/collation/lib/libc/locale/gb18030.c projects/collation/lib/libc/locale/gb2312.c projects/collation/lib/libc/locale/gbk.c projects/collation/lib/libc/locale/mblocal.h projects/collation/lib/libc/locale/mbsnrtowcs.c projects/collation/lib/libc/locale/mskanji.c projects/collation/lib/libc/locale/none.c projects/collation/lib/libc/locale/rune.c projects/collation/lib/libc/locale/setrunelocale.c projects/collation/lib/libc/locale/utf8.c projects/collation/lib/libc/locale/wcsnrtombs.c Modified: projects/collation/lib/libc/locale/Makefile.inc ============================================================================== --- projects/collation/lib/libc/locale/Makefile.inc Sat Aug 8 18:14:59 2015 (r286458) +++ projects/collation/lib/libc/locale/Makefile.inc Sat Aug 8 18:22:14 2015 (r286459) @@ -4,7 +4,7 @@ # locale sources .PATH: ${LIBC_SRCTOP}/${LIBC_ARCH}/locale ${LIBC_SRCTOP}/locale -SRCS+= ascii.c big5.c btowc.c collate.c collcmp.c euc.c fix_grouping.c \ +SRCS+= big5.c btowc.c collate.c collcmp.c euc.c fix_grouping.c \ gb18030.c gb2312.c gbk.c ctype.c isctype.c iswctype.c \ ldpart.c lmessages.c lmonetary.c lnumeric.c localeconv.c mblen.c \ mbrlen.c \ Modified: projects/collation/lib/libc/locale/big5.c ============================================================================== --- projects/collation/lib/libc/locale/big5.c Sat Aug 8 18:14:59 2015 (r286458) +++ projects/collation/lib/libc/locale/big5.c Sat Aug 8 18:22:14 2015 (r286459) @@ -1,4 +1,6 @@ /*- + * Copyright 2013 Garrett D'Amore + * Copyright 2010 Nexenta Systems, Inc. All rights reserved. * Copyright (c) 2002-2004 Tim J. Robbins. All rights reserved. * Copyright (c) 1993 * The Regents of the University of California. All rights reserved. @@ -19,11 +21,7 @@ * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. - * 3. All advertising materials mentioning features or use of this software - * must display the following acknowledgement: - * This product includes software developed by the University of - * California, Berkeley and its contributors. - * 4. Neither the name of the University nor the names of its contributors + * 3. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * @@ -61,6 +59,12 @@ static size_t _BIG5_mbrtowc(wchar_t * __ static int _BIG5_mbsinit(const mbstate_t *); static size_t _BIG5_wcrtomb(char * __restrict, wchar_t, mbstate_t * __restrict); +static size_t _BIG5_mbsnrtowcs(wchar_t * __restrict, + const char ** __restrict, size_t, size_t, + mbstate_t * __restrict); +static size_t _BIG5_wcsnrtombs(char * __restrict, + const wchar_t ** __restrict, size_t, size_t, + mbstate_t * __restrict); typedef struct { wchar_t ch; @@ -72,6 +76,8 @@ _BIG5_init(struct xlocale_ctype *l, _Run l->__mbrtowc = _BIG5_mbrtowc; l->__wcrtomb = _BIG5_wcrtomb; + l->__mbsnrtowcs = _BIG5_mbsnrtowcs; + l->__wcsnrtombs = _BIG5_wcsnrtombs; l->__mbsinit = _BIG5_mbsinit; l->runes = rl; l->__mb_cur_max = 2; @@ -147,7 +153,7 @@ _BIG5_mbrtowc(wchar_t * __restrict pwc, wc = (wc << 8) | (*s++ & 0xff); if (pwc != NULL) *pwc = wc; - return (2); + return (2); } else { if (pwc != NULL) *pwc = wc; @@ -178,3 +184,17 @@ _BIG5_wcrtomb(char * __restrict s, wchar *s = wc & 0xff; return (1); } + +static size_t +_BIG5_mbsnrtowcs(wchar_t * __restrict dst, const char ** __restrict src, + size_t nms, size_t len, mbstate_t * __restrict ps) +{ + return (__mbsnrtowcs_std(dst, src, nms, len, ps, _BIG5_mbrtowc)); +} + +static size_t +_BIG5_wcsnrtombs(char * __restrict dst, const wchar_t ** __restrict src, + size_t nwc, size_t len, mbstate_t * __restrict ps) +{ + return (__wcsnrtombs_std(dst, src, nwc, len, ps, _BIG5_wcrtomb)); +} Modified: projects/collation/lib/libc/locale/collate.c ============================================================================== --- projects/collation/lib/libc/locale/collate.c Sat Aug 8 18:14:59 2015 (r286458) +++ projects/collation/lib/libc/locale/collate.c Sat Aug 8 18:22:14 2015 (r286459) @@ -1,4 +1,5 @@ /*- + * Copyright 2014 Garrett D'Amore * Copright 2010 Nexenta Systems, Inc. All rights reserved. * Copyright (c) 1995 Alex Tatmanjants * at Electronni Visti IA, Kiev, Ukraine. Modified: projects/collation/lib/libc/locale/collate.h ============================================================================== --- projects/collation/lib/libc/locale/collate.h Sat Aug 8 18:14:59 2015 (r286458) +++ projects/collation/lib/libc/locale/collate.h Sat Aug 8 18:22:14 2015 (r286459) @@ -1,4 +1,5 @@ /*- + * Copyright 2010 Nexenta Systmes, Inc. All rights reserved. * Copyright (c) 1995 Alex Tatmanjants * at Electronni Visti IA, Kiev, Ukraine. * All rights reserved. Modified: projects/collation/lib/libc/locale/euc.c ============================================================================== --- projects/collation/lib/libc/locale/euc.c Sat Aug 8 18:14:59 2015 (r286458) +++ projects/collation/lib/libc/locale/euc.c Sat Aug 8 18:22:14 2015 (r286459) @@ -1,4 +1,6 @@ /*- + * Copyright 2013 Garrett D'Amore + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. * Copyright (c) 2002-2004 Tim J. Robbins. All rights reserved. * Copyright (c) 1993 * The Regents of the University of California. All rights reserved. @@ -19,11 +21,7 @@ * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. - * 3. All advertising materials mentioning features or use of this software - * must display the following acknowledgement: - * This product includes software developed by the University of - * California, Berkeley and its contributors. - * 4. Neither the name of the University nor the names of its contributors + * 3. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * @@ -56,17 +54,56 @@ __FBSDID("$FreeBSD$"); extern int __mb_sb_limit; -static size_t _EUC_mbrtowc(wchar_t * __restrict, const char * __restrict, +static size_t _EUC_mbrtowc_impl(wchar_t * __restrict, const char * __restrict, + size_t, mbstate_t * __restrict, uint8_t, uint8_t, uint8_t, uint8_t); +static size_t _EUC_wcrtomb_impl(char * __restrict, wchar_t, + mbstate_t * __restrict, uint8_t, uint8_t, uint8_t, uint8_t); + +static size_t _EUC_CN_mbrtowc(wchar_t * __restrict, const char * __restrict, size_t, mbstate_t * __restrict); -static int _EUC_mbsinit(const mbstate_t *); -static size_t _EUC_wcrtomb(char * __restrict, wchar_t, +static size_t _EUC_JP_mbrtowc(wchar_t * __restrict, const char * __restrict, + size_t, mbstate_t * __restrict); +static size_t _EUC_KR_mbrtowc(wchar_t * __restrict, const char * __restrict, + size_t, mbstate_t * __restrict); +static size_t _EUC_TW_mbrtowc(wchar_t * __restrict, const char * __restrict, + size_t, mbstate_t * __restrict); + +static size_t _EUC_CN_wcrtomb(char * __restrict, wchar_t, + mbstate_t * __restrict); +static size_t _EUC_JP_wcrtomb(char * __restrict, wchar_t, + mbstate_t * __restrict); +static size_t _EUC_KR_wcrtomb(char * __restrict, wchar_t, + mbstate_t * __restrict); +static size_t _EUC_TW_wcrtomb(char * __restrict, wchar_t, mbstate_t * __restrict); -typedef struct { - int count[4]; - wchar_t bits[4]; - wchar_t mask; -} _EucInfo; +static size_t _EUC_CN_mbsnrtowcs(wchar_t * __restrict, + const char ** __restrict, size_t, size_t, + mbstate_t * __restrict); +static size_t _EUC_JP_mbsnrtowcs(wchar_t * __restrict, + const char ** __restrict, size_t, size_t, + mbstate_t * __restrict); +static size_t _EUC_KR_mbsnrtowcs(wchar_t * __restrict, + const char ** __restrict, size_t, size_t, + mbstate_t * __restrict); +static size_t _EUC_TW_mbsnrtowcs(wchar_t * __restrict, + const char ** __restrict, size_t, size_t, + mbstate_t * __restrict); + +static size_t _EUC_CN_wcsnrtombs(char * __restrict, + const wchar_t ** __restrict, size_t, size_t, + mbstate_t * __restrict); +static size_t _EUC_JP_wcsnrtombs(char * __restrict, + const wchar_t ** __restrict, size_t, size_t, + mbstate_t * __restrict); +static size_t _EUC_KR_wcsnrtombs(char * __restrict, + const wchar_t ** __restrict, size_t, size_t, + mbstate_t * __restrict); +static size_t _EUC_TW_wcsnrtombs(char * __restrict, + const wchar_t ** __restrict, size_t, size_t, + mbstate_t * __restrict); + +static int _EUC_mbsinit(const mbstate_t *); typedef struct { wchar_t ch; @@ -74,94 +111,218 @@ typedef struct { int want; } _EucState; +static int +_EUC_mbsinit(const mbstate_t *ps) +{ + + return (ps == NULL || ((const _EucState *)ps)->want == 0); +} + +/* + * EUC-CN uses CS0, CS1 and CS2 (4 bytes). + */ int -_EUC_init(struct xlocale_ctype *l, _RuneLocale *rl) +_EUC_CN_init(struct xlocale_ctype *l, _RuneLocale *rl) { - _EucInfo *ei; - int x, new__mb_cur_max; - char *v, *e; + l->__mbrtowc = _EUC_CN_mbrtowc; + l->__wcrtomb = _EUC_CN_wcrtomb; + l->__mbsnrtowcs = _EUC_CN_mbsnrtowcs; + l->__wcsnrtombs = _EUC_CN_wcsnrtombs; + l->__mbsinit = _EUC_mbsinit; - if (rl->__variable == NULL) - return (EFTYPE); + l->runes = rl; + l->__mb_cur_max = 4; + l->__mb_sb_limit = 256; + return (0); +} - v = (char *)rl->__variable; +static size_t +_EUC_CN_mbrtowc(wchar_t * __restrict pwc, const char * __restrict s, + size_t n, mbstate_t * __restrict ps) +{ + return (_EUC_mbrtowc_impl(pwc, s, n, ps, SS2, 4, 0, 0)); +} - while (*v == ' ' || *v == '\t') - ++v; +static size_t +_EUC_CN_mbsnrtowcs(wchar_t * __restrict dst, + const char ** __restrict src, + size_t nms, size_t len, mbstate_t * __restrict ps) +{ + return (__mbsnrtowcs_std(dst, src, nms, len, ps, _EUC_CN_mbrtowc)); +} - if ((ei = malloc(sizeof(_EucInfo))) == NULL) - return (errno == 0 ? ENOMEM : errno); +static size_t +_EUC_CN_wcrtomb(char * __restrict s, wchar_t wc, + mbstate_t * __restrict ps) +{ + return (_EUC_wcrtomb_impl(s, wc, ps, SS2, 4, 0, 0)); +} - new__mb_cur_max = 0; - for (x = 0; x < 4; ++x) { - ei->count[x] = (int)strtol(v, &e, 0); - if (v == e || !(v = e)) { - free(ei); - return (EFTYPE); - } - if (new__mb_cur_max < ei->count[x]) - new__mb_cur_max = ei->count[x]; - while (*v == ' ' || *v == '\t') - ++v; - ei->bits[x] = (int)strtol(v, &e, 0); - if (v == e || !(v = e)) { - free(ei); - return (EFTYPE); - } - while (*v == ' ' || *v == '\t') - ++v; - } - ei->mask = (int)strtol(v, &e, 0); - if (v == e || !(v = e)) { - free(ei); - return (EFTYPE); - } - rl->__variable = ei; - rl->__variable_len = sizeof(_EucInfo); - l->runes = rl; - l->__mb_cur_max = new__mb_cur_max; - l->__mbrtowc = _EUC_mbrtowc; - l->__wcrtomb = _EUC_wcrtomb; +static size_t +_EUC_CN_wcsnrtombs(char * __restrict dst, const wchar_t ** __restrict src, + size_t nwc, size_t len, mbstate_t * __restrict ps) +{ + return (__wcsnrtombs_std(dst, src, nwc, len, ps, _EUC_CN_wcrtomb)); +} + +/* + * EUC-KR uses only CS0 and CS1. + */ +int +_EUC_KR_init(struct xlocale_ctype *l, _RuneLocale *rl) +{ + l->__mbrtowc = _EUC_KR_mbrtowc; + l->__wcrtomb = _EUC_KR_wcrtomb; + l->__mbsnrtowcs = _EUC_KR_mbsnrtowcs; + l->__wcsnrtombs = _EUC_KR_wcsnrtombs; l->__mbsinit = _EUC_mbsinit; - l->__mb_sb_limit = 256; + + l->runes = rl; + l->__mb_cur_max = 2; + l->__mb_sb_limit = 128; return (0); } -static int -_EUC_mbsinit(const mbstate_t *ps) +static size_t +_EUC_KR_mbrtowc(wchar_t * __restrict pwc, const char * __restrict s, + size_t n, mbstate_t * __restrict ps) { + return (_EUC_mbrtowc_impl(pwc, s, n, ps, 0, 0, 0, 0)); +} - return (ps == NULL || ((const _EucState *)ps)->want == 0); +static size_t +_EUC_KR_mbsnrtowcs(wchar_t * __restrict dst, + const char ** __restrict src, + size_t nms, size_t len, mbstate_t * __restrict ps) +{ + return (__mbsnrtowcs_std(dst, src, nms, len, ps, _EUC_KR_mbrtowc)); } -#define CEI ((_EucInfo *)(_CurrentRuneLocale->__variable)) +static size_t +_EUC_KR_wcrtomb(char * __restrict s, wchar_t wc, + mbstate_t * __restrict ps) +{ + return (_EUC_wcrtomb_impl(s, wc, ps, 0, 0, 0, 0)); +} -#define _SS2 0x008e -#define _SS3 0x008f +static size_t +_EUC_KR_wcsnrtombs(char * __restrict dst, const wchar_t ** __restrict src, + size_t nwc, size_t len, mbstate_t * __restrict ps) +{ + return (__wcsnrtombs_std(dst, src, nwc, len, ps, _EUC_KR_wcrtomb)); +} -#define GR_BITS 0x80808080 /* XXX: to be fixed */ +/* + * EUC-JP uses CS0, CS1, CS2, and CS3. + */ +int +_EUC_JP_init(struct xlocale_ctype *l, _RuneLocale *rl) +{ + l->__mbrtowc = _EUC_JP_mbrtowc; + l->__wcrtomb = _EUC_JP_wcrtomb; + l->__mbsnrtowcs = _EUC_JP_mbsnrtowcs; + l->__wcsnrtombs = _EUC_JP_wcsnrtombs; + l->__mbsinit = _EUC_mbsinit; -static __inline int -_euc_set(u_int c) + l->runes = rl; + l->__mb_cur_max = 3; + l->__mb_sb_limit = 196; + return (0); +} + +static size_t +_EUC_JP_mbrtowc(wchar_t * __restrict pwc, const char * __restrict s, + size_t n, mbstate_t * __restrict ps) { + return (_EUC_mbrtowc_impl(pwc, s, n, ps, SS2, 2, SS3, 3)); +} - c &= 0xff; - return ((c & 0x80) ? c == _SS3 ? 3 : c == _SS2 ? 2 : 1 : 0); +static size_t +_EUC_JP_mbsnrtowcs(wchar_t * __restrict dst, + const char ** __restrict src, + size_t nms, size_t len, mbstate_t * __restrict ps) +{ + return (__mbsnrtowcs_std(dst, src, nms, len, ps, _EUC_JP_mbrtowc)); } static size_t -_EUC_mbrtowc(wchar_t * __restrict pwc, const char * __restrict s, size_t n, +_EUC_JP_wcrtomb(char * __restrict s, wchar_t wc, mbstate_t * __restrict ps) { + return (_EUC_wcrtomb_impl(s, wc, ps, SS2, 2, SS3, 3)); +} + +static size_t +_EUC_JP_wcsnrtombs(char * __restrict dst, const wchar_t ** __restrict src, + size_t nwc, size_t len, mbstate_t * __restrict ps) +{ + return (__wcsnrtombs_std(dst, src, nwc, len, ps, _EUC_JP_wcrtomb)); +} + +/* + * EUC-TW uses CS0, CS1, and CS2. + */ +int +_EUC_TW_init(struct xlocale_ctype *l, _RuneLocale *rl) +{ + l->__mbrtowc = _EUC_TW_mbrtowc; + l->__wcrtomb = _EUC_TW_wcrtomb; + l->__mbsnrtowcs = _EUC_TW_mbsnrtowcs; + l->__wcsnrtombs = _EUC_TW_wcsnrtombs; + l->__mbsinit = _EUC_mbsinit; + + l->runes = rl; + l->__mb_cur_max = 4; + l->__mb_sb_limit = 256; + return (0); +} + +static size_t +_EUC_TW_mbrtowc(wchar_t * __restrict pwc, const char * __restrict s, + size_t n, mbstate_t * __restrict ps) +{ + return (_EUC_mbrtowc_impl(pwc, s, n, ps, SS2, 4, 0, 0)); +} + +static size_t +_EUC_TW_mbsnrtowcs(wchar_t * __restrict dst, + const char ** __restrict src, + size_t nms, size_t len, mbstate_t * __restrict ps) +{ + return (__mbsnrtowcs_std(dst, src, nms, len, ps, _EUC_TW_mbrtowc)); +} + +static size_t +_EUC_TW_wcrtomb(char * __restrict s, wchar_t wc, + mbstate_t * __restrict ps) +{ + return (_EUC_wcrtomb_impl(s, wc, ps, SS2, 4, 0, 0)); +} + +static size_t +_EUC_TW_wcsnrtombs(char * __restrict dst, const wchar_t ** __restrict src, + size_t nwc, size_t len, mbstate_t * __restrict ps) +{ + return (__wcsnrtombs_std(dst, src, nwc, len, ps, _EUC_TW_wcrtomb)); +} + +/* + * Common EUC code. + */ + +static size_t +_EUC_mbrtowc_impl(wchar_t * __restrict pwc, const char * __restrict s, + size_t n, mbstate_t * __restrict ps, + uint8_t cs2, uint8_t cs2width, uint8_t cs3, uint8_t cs3width) +{ _EucState *es; - int i, set, want; + int i, want; wchar_t wc; - const char *os; + unsigned char ch; es = (_EucState *)ps; - if (es->want < 0 || es->want > MB_CUR_MAX || es->set < 0 || - es->set > 3) { + if (es->want < 0 || es->want > MB_CUR_MAX) { errno = EINVAL; return ((size_t)-1); } @@ -176,58 +337,59 @@ _EUC_mbrtowc(wchar_t * __restrict pwc, c /* Incomplete multibyte sequence */ return ((size_t)-2); - os = s; - if (es->want == 0) { - want = CEI->count[set = _euc_set(*s)]; - if (set == 2 || set == 3) { - --want; - if (--n == 0) { - /* Incomplete multibyte sequence */ - es->set = set; - es->want = want; - es->ch = 0; - return ((size_t)-2); - } - ++s; - if (*s == '\0') { - errno = EILSEQ; - return ((size_t)-1); - } + /* Fast path for plain ASCII (CS0) */ + if (((ch = (unsigned char)*s) & 0x80) == 0) { + if (pwc != NULL) + *pwc = ch; + return (ch != '\0' ? 1 : 0); } - wc = (unsigned char)*s++; + + if (ch >= 0xa1) { + /* CS1 */ + want = 2; + } else if (ch == cs2) { + want = cs2width; + } else if (ch == cs3) { + want = cs3width; + } else { + errno = EILSEQ; + return ((size_t)-1); + } + + + es->want = want; + es->ch = 0; } else { - set = es->set; want = es->want; wc = es->ch; } - for (i = (es->want == 0) ? 1 : 0; i < MIN(want, n); i++) { - if (*s == '\0') { - errno = EILSEQ; - return ((size_t)-1); - } - wc = (wc << 8) | (unsigned char)*s++; + + for (i = 0; i < MIN(want, n); i++) { + wc <<= 8; + wc |= *s; + s++; } if (i < want) { /* Incomplete multibyte sequence */ - es->set = set; es->want = want - i; es->ch = wc; return ((size_t)-2); } - wc = (wc & ~CEI->mask) | CEI->bits[set]; if (pwc != NULL) *pwc = wc; es->want = 0; - return (wc == L'\0' ? 0 : s - os); + return (wc == L'\0' ? 0 : want); } static size_t -_EUC_wcrtomb(char * __restrict s, wchar_t wc, mbstate_t * __restrict ps) +_EUC_wcrtomb_impl(char * __restrict s, wchar_t wc, + mbstate_t * __restrict ps, + uint8_t cs2, uint8_t cs2width, uint8_t cs3, uint8_t cs3width) { _EucState *es; - wchar_t m, nm; int i, len; + wchar_t nm; es = (_EucState *)ps; @@ -240,34 +402,52 @@ _EUC_wcrtomb(char * __restrict s, wchar_ /* Reset to initial shift state (no-op) */ return (1); - m = wc & CEI->mask; - nm = wc & ~m; + if ((wc & ~0x7f) == 0) { + /* Fast path for plain ASCII (CS0) */ + *s = (char)wc; + return (1); + } - if (m == CEI->bits[1]) { -CodeSet1: - /* Codeset 1: The first byte must have 0x80 in it. */ - i = len = CEI->count[1]; - while (i-- > 0) - *s++ = (nm >> (i << 3)) | 0x80; + /* Determine the "length" */ + if ((unsigned)wc > 0xffffff) { + len = 4; + } else if ((unsigned)wc > 0xffff) { + len = 3; + } else if ((unsigned)wc > 0xff) { + len = 2; } else { - if (m == CEI->bits[0]) - i = len = CEI->count[0]; - else if (m == CEI->bits[2]) { - i = len = CEI->count[2]; - *s++ = _SS2; - --i; - /* SS2 designates G2 into GR */ - nm |= GR_BITS; - } else if (m == CEI->bits[3]) { - i = len = CEI->count[3]; - *s++ = _SS3; - --i; - /* SS3 designates G3 into GR */ - nm |= GR_BITS; - } else - goto CodeSet1; /* Bletch */ - while (i-- > 0) - *s++ = (nm >> (i << 3)) & 0xff; + len = 1; + } + + if (len > MB_CUR_MAX) { + errno = EILSEQ; + return ((size_t)-1); + } + + /* This first check excludes CS1, which is implicitly valid. */ + if ((wc < 0xa100) || (wc > 0xffff)) { + /* Check for valid CS2 or CS3 */ + nm = (wc >> ((len - 1) * 8)); + if (nm == cs2) { + if (len != cs2width) { + errno = EILSEQ; + return ((size_t)-1); + } + } else if (nm == cs3) { + if (len != cs3width) { + errno = EILSEQ; + return ((size_t)-1); + } + } else { + errno = EILSEQ; + return ((size_t)-1); + } + } + + /* Stash the bytes, least significant last */ + for (i = len - 1; i >= 0; i--) { + s[i] = (wc & 0xff); + wc >>= 8; } return (len); } Modified: projects/collation/lib/libc/locale/gb18030.c ============================================================================== --- projects/collation/lib/libc/locale/gb18030.c Sat Aug 8 18:14:59 2015 (r286458) +++ projects/collation/lib/libc/locale/gb18030.c Sat Aug 8 18:22:14 2015 (r286459) @@ -1,4 +1,6 @@ /*- + * Copyright 2013 Garrett D'Amore + * Copyright 2010 Nexenta Systems, Inc. All rights reserved. * Copyright (c) 2002-2004 Tim J. Robbins * All rights reserved. * @@ -28,6 +30,7 @@ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ + /* * PRC National Standard GB 18030-2000 encoding of Chinese text. * @@ -49,6 +52,13 @@ static size_t _GB18030_mbrtowc(wchar_t * static int _GB18030_mbsinit(const mbstate_t *); static size_t _GB18030_wcrtomb(char * __restrict, wchar_t, mbstate_t * __restrict); +static size_t _GB18030_mbsnrtowcs(wchar_t * __restrict, + const char ** __restrict, size_t, size_t, + mbstate_t * __restrict); +static size_t _GB18030_wcsnrtombs(char * __restrict, + const wchar_t ** __restrict, size_t, size_t, + mbstate_t * __restrict); + typedef struct { int count; @@ -62,6 +72,8 @@ _GB18030_init(struct xlocale_ctype *l, _ l->__mbrtowc = _GB18030_mbrtowc; l->__wcrtomb = _GB18030_wcrtomb; l->__mbsinit = _GB18030_mbsinit; + l->__mbsnrtowcs = _GB18030_mbsnrtowcs; + l->__wcsnrtombs = _GB18030_wcsnrtombs; l->runes = rl; l->__mb_cur_max = 4; l->__mb_sb_limit = 128; @@ -222,3 +234,19 @@ ilseq: errno = EILSEQ; return ((size_t)-1); } + +static size_t +_GB18030_mbsnrtowcs(wchar_t * __restrict dst, + const char ** __restrict src, size_t nms, size_t len, + mbstate_t * __restrict ps) +{ + return (__mbsnrtowcs_std(dst, src, nms, len, ps, _GB18030_mbrtowc)); +} + +static size_t +_GB18030_wcsnrtombs(char * __restrict dst, + const wchar_t ** __restrict src, size_t nwc, size_t len, + mbstate_t * __restrict ps) +{ + return (__wcsnrtombs_std(dst, src, nwc, len, ps, _GB18030_wcrtomb)); +} Modified: projects/collation/lib/libc/locale/gb2312.c ============================================================================== --- projects/collation/lib/libc/locale/gb2312.c Sat Aug 8 18:14:59 2015 (r286458) +++ projects/collation/lib/libc/locale/gb2312.c Sat Aug 8 18:22:14 2015 (r286459) @@ -1,4 +1,6 @@ /*- + * Copyright 2013 Garrett D'Amore + * Copyright 2010 Nexenta Systems, Inc. All rights reserved. * Copyright (c) 2004 Tim J. Robbins. All rights reserved. * Copyright (c) 2003 David Xu * All rights reserved. @@ -45,6 +47,13 @@ static size_t _GB2312_mbrtowc(wchar_t * static int _GB2312_mbsinit(const mbstate_t *); static size_t _GB2312_wcrtomb(char * __restrict, wchar_t, mbstate_t * __restrict); +static size_t _GB2312_mbsnrtowcs(wchar_t * __restrict, + const char ** __restrict, size_t, size_t, + mbstate_t * __restrict); +static size_t _GB2312_wcsnrtombs(char * __restrict, + const wchar_t ** __restrict, size_t, size_t, + mbstate_t * __restrict); + typedef struct { int count; @@ -59,6 +68,8 @@ _GB2312_init(struct xlocale_ctype *l, _R l->__mbrtowc = _GB2312_mbrtowc; l->__wcrtomb = _GB2312_wcrtomb; l->__mbsinit = _GB2312_mbsinit; + l->__mbsnrtowcs = _GB2312_mbsnrtowcs; + l->__wcsnrtombs = _GB2312_wcsnrtombs; l->__mb_cur_max = 2; l->__mb_sb_limit = 128; return (0); @@ -71,7 +82,7 @@ _GB2312_mbsinit(const mbstate_t *ps) return (ps == NULL || ((const _GB2312State *)ps)->count == 0); } -static __inline int +static int _GB2312_check(const char *str, size_t n) { const u_char *s = (const u_char *)str; @@ -90,7 +101,7 @@ _GB2312_check(const char *str, size_t n) } else if (s[0] & 0x80) { /* Invalid multibyte sequence */ return (-1); - } + } return (1); } @@ -158,3 +169,19 @@ _GB2312_wcrtomb(char * __restrict s, wch *s = wc & 0xff; return (1); } + +static size_t +_GB2312_mbsnrtowcs(wchar_t * __restrict dst, + const char ** __restrict src, size_t nms, size_t len, + mbstate_t * __restrict ps) +{ + return (__mbsnrtowcs_std(dst, src, nms, len, ps, _GB2312_mbrtowc)); +} + +static size_t +_GB2312_wcsnrtombs(char * __restrict dst, + const wchar_t ** __restrict src, size_t nwc, size_t len, + mbstate_t * __restrict ps) +{ + return (__wcsnrtombs_std(dst, src, nwc, len, ps, _GB2312_wcrtomb)); +} Modified: projects/collation/lib/libc/locale/gbk.c ============================================================================== --- projects/collation/lib/libc/locale/gbk.c Sat Aug 8 18:14:59 2015 (r286458) +++ projects/collation/lib/libc/locale/gbk.c Sat Aug 8 18:22:14 2015 (r286459) @@ -1,4 +1,6 @@ /*- + * Copyright 2013 Garrett D'Amore + * Copyright 2010 Nexenta Systems, Inc. All rights reserved. * Copyright (c) 2002-2004 Tim J. Robbins. All rights reserved. * Copyright (c) 1993 * The Regents of the University of California. All rights reserved. @@ -54,6 +56,12 @@ static size_t _GBK_mbrtowc(wchar_t * __r static int _GBK_mbsinit(const mbstate_t *); static size_t _GBK_wcrtomb(char * __restrict, wchar_t, mbstate_t * __restrict); +static size_t _GBK_mbsnrtowcs(wchar_t * __restrict, + const char ** __restrict, size_t, size_t, + mbstate_t * __restrict); +static size_t _GBK_wcsnrtombs(char * __restrict, + const wchar_t ** __restrict, size_t, size_t, + mbstate_t * __restrict); typedef struct { wchar_t ch; @@ -66,6 +74,8 @@ _GBK_init(struct xlocale_ctype *l, _Rune l->__mbrtowc = _GBK_mbrtowc; l->__wcrtomb = _GBK_wcrtomb; l->__mbsinit = _GBK_mbsinit; + l->__mbsnrtowcs = _GBK_mbsnrtowcs; + l->__wcsnrtombs = _GBK_wcsnrtombs; l->runes = rl; l->__mb_cur_max = 2; l->__mb_sb_limit = 128; @@ -79,7 +89,7 @@ _GBK_mbsinit(const mbstate_t *ps) return (ps == NULL || ((const _GBKState *)ps)->ch == 0); } -static __inline int +static int _gbk_check(u_int c) { @@ -140,7 +150,7 @@ _GBK_mbrtowc(wchar_t * __restrict pwc, c wc = (wc << 8) | (*s++ & 0xff); if (pwc != NULL) *pwc = wc; - return (2); + return (2); } else { if (pwc != NULL) *pwc = wc; @@ -171,3 +181,17 @@ _GBK_wcrtomb(char * __restrict s, wchar_ *s = wc & 0xff; return (1); } + +static size_t +_GBK_mbsnrtowcs(wchar_t * __restrict dst, const char ** __restrict src, + size_t nms, size_t len, mbstate_t * __restrict ps) +{ + return (__mbsnrtowcs_std(dst, src, nms, len, ps, _GBK_mbrtowc)); +} + +static size_t +_GBK_wcsnrtombs(char * __restrict dst, const wchar_t ** __restrict src, + size_t nwc, size_t len, mbstate_t * __restrict ps) +{ + return (__wcsnrtombs_std(dst, src, nwc, len, ps, _GBK_wcrtomb)); +} Modified: projects/collation/lib/libc/locale/mblocal.h ============================================================================== --- projects/collation/lib/libc/locale/mblocal.h Sat Aug 8 18:14:59 2015 (r286458) +++ projects/collation/lib/libc/locale/mblocal.h Sat Aug 8 18:22:14 2015 (r286459) @@ -1,4 +1,6 @@ /*- + * Copyright 2013 Garrett D'Amore + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. * Copyright (c) 2004 Tim J. Robbins. * All rights reserved. * @@ -37,6 +39,8 @@ #include #include "xlocale_private.h" +#define SS2 0x008e +#define SS3 0x008f /* * Conversion function pointers for current encoding. @@ -62,18 +66,24 @@ extern struct xlocale_ctype __xlocale_gl * Rune initialization function prototypes. */ int _none_init(struct xlocale_ctype *, _RuneLocale *); -int _ascii_init(struct xlocale_ctype *, _RuneLocale *); int _UTF8_init(struct xlocale_ctype *, _RuneLocale *); -int _EUC_init(struct xlocale_ctype *, _RuneLocale *); +int _EUC_CN_init(struct xlocale_ctype *, _RuneLocale *); +int _EUC_JP_init(struct xlocale_ctype *, _RuneLocale *); +int _EUC_KR_init(struct xlocale_ctype *, _RuneLocale *); +int _EUC_TW_init(struct xlocale_ctype *, _RuneLocale *); int _GB18030_init(struct xlocale_ctype *, _RuneLocale *); int _GB2312_init(struct xlocale_ctype *, _RuneLocale *); int _GBK_init(struct xlocale_ctype *, _RuneLocale *); int _BIG5_init(struct xlocale_ctype *, _RuneLocale *); int _MSKanji_init(struct xlocale_ctype *, _RuneLocale *); -extern size_t __mbsnrtowcs_std(wchar_t * __restrict, const char ** __restrict, - size_t, size_t, mbstate_t * __restrict); -extern size_t __wcsnrtombs_std(char * __restrict, const wchar_t ** __restrict, - size_t, size_t, mbstate_t * __restrict); +typedef size_t (*mbrtowc_pfn_t)(wchar_t * __restrict, + const char * __restrict, size_t, mbstate_t * __restrict); +typedef size_t (*wcrtomb_pfn_t)(char * __restrict, wchar_t, + mbstate_t * __restrict); +size_t __mbsnrtowcs_std(wchar_t * __restrict, const char ** __restrict, + size_t, size_t, mbstate_t * __restrict, mbrtowc_pfn_t); +size_t __wcsnrtombs_std(char * __restrict, const wchar_t ** __restrict, + size_t, size_t, mbstate_t * __restrict, wcrtomb_pfn_t); #endif /* _MBLOCAL_H_ */ Modified: projects/collation/lib/libc/locale/mbsnrtowcs.c ============================================================================== --- projects/collation/lib/libc/locale/mbsnrtowcs.c Sat Aug 8 18:14:59 2015 (r286458) +++ projects/collation/lib/libc/locale/mbsnrtowcs.c Sat Aug 8 18:22:14 2015 (r286459) @@ -1,4 +1,6 @@ /*- + * Copyright 2013 Garrett D'Amore + * Copyright 2010 Nexenta Systems, Inc. All rights reserved. * Copyright (c) 2002-2004 Tim J. Robbins. * * Copyright (c) 2011 The FreeBSD Foundation @@ -56,20 +58,20 @@ mbsnrtowcs(wchar_t * __restrict dst, con size_t __mbsnrtowcs_std(wchar_t * __restrict dst, const char ** __restrict src, - size_t nms, size_t len, mbstate_t * __restrict ps) + size_t nms, size_t len, mbstate_t * __restrict ps, + mbrtowc_pfn_t pmbrtowc) { const char *s; size_t nchr; wchar_t wc; size_t nb; - struct xlocale_ctype *ct = XLOCALE_CTYPE(__get_locale()); s = *src; nchr = 0; if (dst == NULL) { for (;;) { - if ((nb = ct->__mbrtowc(&wc, s, nms, ps)) == (size_t)-1) + if ((nb = pmbrtowc(&wc, s, nms, ps)) == (size_t)-1) /* Invalid sequence - mbrtowc() sets errno. */ return ((size_t)-1); else if (nb == 0 || nb == (size_t)-2) @@ -82,7 +84,7 @@ __mbsnrtowcs_std(wchar_t * __restrict ds } while (len-- > 0) { - if ((nb = ct->__mbrtowc(dst, s, nms, ps)) == (size_t)-1) { + if ((nb = pmbrtowc(dst, s, nms, ps)) == (size_t)-1) { *src = s; return ((size_t)-1); } else if (nb == (size_t)-2) { Modified: projects/collation/lib/libc/locale/mskanji.c ============================================================================== --- projects/collation/lib/libc/locale/mskanji.c Sat Aug 8 18:14:59 2015 (r286458) +++ projects/collation/lib/libc/locale/mskanji.c Sat Aug 8 18:22:14 2015 (r286459) @@ -1,4 +1,6 @@ /* + * Copyright 2013 Garrett D'Amore + * Copyright 2010 Nexenta Systems, Inc. All rights reserved. * Copyright (c) 2002-2004 Tim J. Robbins. All rights reserved. * * ja_JP.SJIS locale table for BSD4.4/rune @@ -28,14 +30,14 @@ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. */ #if defined(LIBC_SCCS) && !defined(lint) @@ -59,6 +61,12 @@ static size_t _MSKanji_mbrtowc(wchar_t * static int _MSKanji_mbsinit(const mbstate_t *); static size_t _MSKanji_wcrtomb(char * __restrict, wchar_t, mbstate_t * __restrict); +static size_t _MSKanji_mbsnrtowcs(wchar_t * __restrict, + const char ** __restrict, size_t, size_t, + mbstate_t * __restrict); +static size_t _MSKanji_wcsnrtombs(char * __restrict, + const wchar_t ** __restrict, size_t, size_t, + mbstate_t * __restrict); typedef struct { wchar_t ch; @@ -70,6 +78,8 @@ _MSKanji_init(struct xlocale_ctype *l, _ l->__mbrtowc = _MSKanji_mbrtowc; l->__wcrtomb = _MSKanji_wcrtomb; + l->__mbsnrtowcs = _MSKanji_mbsnrtowcs; + l->__wcsnrtombs = _MSKanji_wcsnrtombs; l->__mbsinit = _MSKanji_mbsinit; l->runes = rl; l->__mb_cur_max = 2; @@ -163,3 +173,19 @@ _MSKanji_wcrtomb(char * __restrict s, wc *s++ = wc >> (i << 3); return (len); } + +static size_t +_MSKanji_mbsnrtowcs(wchar_t * __restrict dst, + const char ** __restrict src, size_t nms, + size_t len, mbstate_t * __restrict ps) +{ + return (__mbsnrtowcs_std(dst, src, nms, len, ps, _MSKanji_mbrtowc)); +} + +static size_t +_MSKanji_wcsnrtombs(char * __restrict dst, + const wchar_t ** __restrict src, size_t nwc, + size_t len, mbstate_t * __restrict ps) +{ + return (__wcsnrtombs_std(dst, src, nwc, len, ps, _MSKanji_wcrtomb)); +} Modified: projects/collation/lib/libc/locale/none.c ============================================================================== --- projects/collation/lib/libc/locale/none.c Sat Aug 8 18:14:59 2015 (r286458) +++ projects/collation/lib/libc/locale/none.c Sat Aug 8 18:22:14 2015 (r286459) @@ -1,4 +1,6 @@ /*- + * Copyright 2013 Garrett D'Amore + * Copyright 2010 Nexenta Systems, Inc. All rights reserved. * Copyright (c) 2002-2004 Tim J. Robbins. All rights reserved. *** DIFF OUTPUT TRUNCATED AT 1000 LINES ***