From owner-svn-src-head@FreeBSD.ORG Mon Jun 3 17:17:59 2013 Return-Path: Delivered-To: svn-src-head@freebsd.org Received: from mx1.freebsd.org (mx1.freebsd.org [IPv6:2001:1900:2254:206a::19:1]) by hub.freebsd.org (Postfix) with ESMTP id 16681B62; Mon, 3 Jun 2013 17:17:59 +0000 (UTC) (envelope-from ed@FreeBSD.org) Received: from svn.freebsd.org (svn.freebsd.org [IPv6:2001:1900:2254:2068::e6a:0]) by mx1.freebsd.org (Postfix) with ESMTP id 07FAF1B82; Mon, 3 Jun 2013 17:17:59 +0000 (UTC) Received: from svn.freebsd.org ([127.0.1.70]) by svn.freebsd.org (8.14.7/8.14.7) with ESMTP id r53HHxsP086209; Mon, 3 Jun 2013 17:17:59 GMT (envelope-from ed@svn.freebsd.org) Received: (from ed@localhost) by svn.freebsd.org (8.14.7/8.14.5/Submit) id r53HHvGY086194; Mon, 3 Jun 2013 17:17:57 GMT (envelope-from ed@svn.freebsd.org) Message-Id: <201306031717.r53HHvGY086194@svn.freebsd.org> From: Ed Schouten Date: Mon, 3 Jun 2013 17:17:57 +0000 (UTC) To: src-committers@freebsd.org, svn-src-all@freebsd.org, svn-src-head@freebsd.org Subject: svn commit: r251314 - in head: lib/libc/locale tools/regression/lib/libc/locale X-SVN-Group: head MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit X-BeenThere: svn-src-head@freebsd.org X-Mailman-Version: 2.1.14 Precedence: list List-Id: SVN commit messages for the src tree for head/-current List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , X-List-Received-Date: Mon, 03 Jun 2013 17:17:59 -0000 Author: ed Date: Mon Jun 3 17:17:56 2013 New Revision: 251314 URL: http://svnweb.freebsd.org/changeset/base/251314 Log: Add libiconv based versions of *c16*() and *c32*(). I initially thought wchar_t was locale independent, but this seems to be only the case on Linux. This means that we cannot depend on the *wc*() routines to implement *c16*() and *c32*(). Instead, use the Citrus libiconv that is part of libc. I'll see if there is anything I can do to make the existing functions somewhat useful in case the system is built without libiconv in the nearby future. If not, I'll simply remove the broken implementations. Reviewed by: jilles, gabor Added: head/lib/libc/locale/c16rtomb_iconv.c (contents, props changed) head/lib/libc/locale/c32rtomb_iconv.c (contents, props changed) head/lib/libc/locale/cXXrtomb_iconv.h (contents, props changed) head/lib/libc/locale/mbrtoc16_iconv.c (contents, props changed) head/lib/libc/locale/mbrtoc32_iconv.c (contents, props changed) head/lib/libc/locale/mbrtocXX_iconv.h (contents, props changed) Modified: head/lib/libc/locale/Makefile.inc head/tools/regression/lib/libc/locale/test-c16rtomb.c head/tools/regression/lib/libc/locale/test-mbrtoc16.c Modified: head/lib/libc/locale/Makefile.inc ============================================================================== --- head/lib/libc/locale/Makefile.inc Mon Jun 3 17:13:37 2013 (r251313) +++ head/lib/libc/locale/Makefile.inc Mon Jun 3 17:17:56 2013 (r251314) @@ -4,11 +4,11 @@ # locale sources .PATH: ${.CURDIR}/${LIBC_ARCH}/locale ${.CURDIR}/locale -SRCS+= ascii.c big5.c btowc.c c16rtomb.c c32rtomb.c collate.c collcmp.c euc.c \ - fix_grouping.c gb18030.c gb2312.c gbk.c ctype.c isctype.c iswctype.c \ +SRCS+= ascii.c big5.c btowc.c collate.c collcmp.c euc.c fix_grouping.c \ + gb18030.c gb2312.c gbk.c ctype.c isctype.c iswctype.c \ ldpart.c lmessages.c lmonetary.c lnumeric.c localeconv.c mblen.c \ mbrlen.c \ - mbrtoc16.c mbrtoc32.c mbrtowc.c mbsinit.c mbsnrtowcs.c \ + mbrtowc.c mbsinit.c mbsnrtowcs.c \ mbsrtowcs.c mbtowc.c mbstowcs.c \ mskanji.c nextwctype.c nl_langinfo.c nomacros.c none.c rpmatch.c \ rune.c \ @@ -23,6 +23,12 @@ SRCS+= ascii.c big5.c btowc.c c16rtomb.c wcwidth.c\ xlocale.c +.if ${MK_ICONV} != "no" +SRCS+= c16rtomb_iconv.c c32rtomb_iconv.c mbrtoc16_iconv.c mbrtoc32_iconv.c +.else +SRCS+= c16rtomb.c c32rtomb.c mbrtoc16.c mbrtoc32.c +.endif + SYM_MAPS+=${.CURDIR}/locale/Symbol.map MAN+= btowc.3 \ Added: head/lib/libc/locale/c16rtomb_iconv.c ============================================================================== --- /dev/null 00:00:00 1970 (empty, because file is newly added) +++ head/lib/libc/locale/c16rtomb_iconv.c Mon Jun 3 17:17:56 2013 (r251314) @@ -0,0 +1,8 @@ +/* $FreeBSD$ */ +#define charXX_t char16_t +#define cXXrtomb c16rtomb +#define cXXrtomb_l c16rtomb_l +#define SRCBUF_LEN 2 +#define UTF_XX_INTERNAL "UTF-16-INTERNAL" + +#include "cXXrtomb_iconv.h" Added: head/lib/libc/locale/c32rtomb_iconv.c ============================================================================== --- /dev/null 00:00:00 1970 (empty, because file is newly added) +++ head/lib/libc/locale/c32rtomb_iconv.c Mon Jun 3 17:17:56 2013 (r251314) @@ -0,0 +1,8 @@ +/* $FreeBSD$ */ +#define charXX_t char32_t +#define cXXrtomb c32rtomb +#define cXXrtomb_l c32rtomb_l +#define SRCBUF_LEN 1 +#define UTF_XX_INTERNAL "UTF-32-INTERNAL" + +#include "cXXrtomb_iconv.h" Added: head/lib/libc/locale/cXXrtomb_iconv.h ============================================================================== --- /dev/null 00:00:00 1970 (empty, because file is newly added) +++ head/lib/libc/locale/cXXrtomb_iconv.h Mon Jun 3 17:17:56 2013 (r251314) @@ -0,0 +1,115 @@ +/*- + * Copyright (c) 2013 Ed Schouten + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include +__FBSDID("$FreeBSD$"); + +#include + +#include +#include +#include +#include + +#include "../iconv/citrus_hash.h" +#include "../iconv/citrus_module.h" +#include "../iconv/citrus_iconv.h" +#include "xlocale_private.h" + +typedef struct { + bool initialized; + struct _citrus_iconv iconv; + union { + charXX_t widechar[SRCBUF_LEN]; + char bytes[sizeof(charXX_t) * SRCBUF_LEN]; + } srcbuf; + size_t srcbuf_len; +} _ConversionState; +_Static_assert(sizeof(_ConversionState) <= sizeof(mbstate_t), + "Size of _ConversionState must not exceed mbstate_t's size."); + +size_t +cXXrtomb_l(char * __restrict s, charXX_t c, mbstate_t * __restrict ps, + locale_t locale) +{ + _ConversionState *cs; + struct _citrus_iconv *handle; + char *src, *dst; + size_t srcleft, dstleft, invlen; + int err; + + FIX_LOCALE(locale); + if (ps == NULL) + ps = &locale->cXXrtomb; + cs = (_ConversionState *)ps; + handle = &cs->iconv; + + /* Reinitialize mbstate_t. */ + if (s == NULL || !cs->initialized) { + if (_citrus_iconv_open(&handle, UTF_XX_INTERNAL, + nl_langinfo_l(CODESET, locale)) != 0) { + cs->initialized = false; + errno = EINVAL; + return (-1); + } + handle->cv_shared->ci_discard_ilseq = true; + handle->cv_shared->ci_hooks = NULL; + cs->srcbuf_len = 0; + cs->initialized = true; + if (s == NULL) + return (1); + } + + assert(cs->srcbuf_len < sizeof(cs->srcbuf.widechar) / sizeof(charXX_t)); + cs->srcbuf.widechar[cs->srcbuf_len++] = c; + + /* Perform conversion. */ + src = cs->srcbuf.bytes; + srcleft = cs->srcbuf_len * sizeof(charXX_t); + dst = s; + dstleft = MB_CUR_MAX_L(locale); + err = _citrus_iconv_convert(handle, &src, &srcleft, &dst, &dstleft, + 0, &invlen); + + /* Character is part of a surrogate pair. We need more input. */ + if (err == EINVAL) + return (0); + cs->srcbuf_len = 0; + + /* Illegal sequence. */ + if (dst == s) { + errno = EILSEQ; + return ((size_t)-1); + } + return (dst - s); +} + +size_t +cXXrtomb(char * __restrict s, charXX_t c, mbstate_t * __restrict ps) +{ + + return (cXXrtomb_l(s, c, ps, __get_locale())); +} Added: head/lib/libc/locale/mbrtoc16_iconv.c ============================================================================== --- /dev/null 00:00:00 1970 (empty, because file is newly added) +++ head/lib/libc/locale/mbrtoc16_iconv.c Mon Jun 3 17:17:56 2013 (r251314) @@ -0,0 +1,8 @@ +/* $FreeBSD$ */ +#define charXX_t char16_t +#define mbrtocXX mbrtoc16 +#define mbrtocXX_l mbrtoc16_l +#define DSTBUF_LEN 2 +#define UTF_XX_INTERNAL "UTF-16-INTERNAL" + +#include "mbrtocXX_iconv.h" Added: head/lib/libc/locale/mbrtoc32_iconv.c ============================================================================== --- /dev/null 00:00:00 1970 (empty, because file is newly added) +++ head/lib/libc/locale/mbrtoc32_iconv.c Mon Jun 3 17:17:56 2013 (r251314) @@ -0,0 +1,8 @@ +/* $FreeBSD$ */ +#define charXX_t char32_t +#define mbrtocXX mbrtoc32 +#define mbrtocXX_l mbrtoc32_l +#define DSTBUF_LEN 1 +#define UTF_XX_INTERNAL "UTF-32-INTERNAL" + +#include "mbrtocXX_iconv.h" Added: head/lib/libc/locale/mbrtocXX_iconv.h ============================================================================== --- /dev/null 00:00:00 1970 (empty, because file is newly added) +++ head/lib/libc/locale/mbrtocXX_iconv.h Mon Jun 3 17:17:56 2013 (r251314) @@ -0,0 +1,158 @@ +/*- + * Copyright (c) 2013 Ed Schouten + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include +__FBSDID("$FreeBSD$"); + +#include + +#include +#include +#include +#include +#include +#include + +#include "../iconv/citrus_hash.h" +#include "../iconv/citrus_module.h" +#include "../iconv/citrus_iconv.h" +#include "xlocale_private.h" + +typedef struct { + bool initialized; + struct _citrus_iconv iconv; + char srcbuf[MB_LEN_MAX]; + size_t srcbuf_len; + union { + charXX_t widechar[DSTBUF_LEN]; + char bytes[sizeof(charXX_t) * DSTBUF_LEN]; + } dstbuf; + size_t dstbuf_len; +} _ConversionState; +_Static_assert(sizeof(_ConversionState) <= sizeof(mbstate_t), + "Size of _ConversionState must not exceed mbstate_t's size."); + +size_t +mbrtocXX_l(charXX_t * __restrict pc, const char * __restrict s, size_t n, + mbstate_t * __restrict ps, locale_t locale) +{ + _ConversionState *cs; + struct _citrus_iconv *handle; + size_t i, retval; + charXX_t retchar; + + FIX_LOCALE(locale); + if (ps == NULL) + ps = &locale->mbrtocXX; + cs = (_ConversionState *)ps; + handle = &cs->iconv; + + /* Reinitialize mbstate_t. */ + if (s == NULL || !cs->initialized) { + if (_citrus_iconv_open(&handle, + nl_langinfo_l(CODESET, locale), UTF_XX_INTERNAL) != 0) { + cs->initialized = false; + errno = EINVAL; + return (-1); + } + handle->cv_shared->ci_discard_ilseq = true; + handle->cv_shared->ci_hooks = NULL; + cs->srcbuf_len = cs->dstbuf_len = 0; + cs->initialized = true; + if (s == NULL) + return (0); + } + + /* See if we still have characters left from the previous invocation. */ + if (cs->dstbuf_len > 0) { + retval = (size_t)-3; + goto return_char; + } + + /* Fill up the read buffer as far as possible. */ + if (n > sizeof(cs->srcbuf) - cs->srcbuf_len) + n = sizeof(cs->srcbuf) - cs->srcbuf_len; + memcpy(cs->srcbuf + cs->srcbuf_len, s, n); + + /* Convert as few characters to the dst buffer as possible. */ + for (i = 0; ; i++) { + char *src, *dst; + size_t srcleft, dstleft, invlen; + int err; + + src = cs->srcbuf; + srcleft = cs->srcbuf_len + n; + dst = cs->dstbuf.bytes; + dstleft = i * sizeof(charXX_t); + assert(srcleft <= sizeof(cs->srcbuf) && + dstleft <= sizeof(cs->dstbuf.bytes)); + err = _citrus_iconv_convert(handle, &src, &srcleft, + &dst, &dstleft, 0, &invlen); + cs->dstbuf_len = (dst - cs->dstbuf.bytes) / sizeof(charXX_t); + + /* Got new character(s). Return the first. */ + if (cs->dstbuf_len > 0) { + assert(src - cs->srcbuf > cs->srcbuf_len); + retval = src - cs->srcbuf - cs->srcbuf_len; + cs->srcbuf_len = 0; + goto return_char; + } + + /* Increase dst buffer size, to obtain the surrogate pair. */ + if (err == E2BIG) + continue; + + /* Illegal sequence. */ + if (invlen > 0) { + cs->srcbuf_len = 0; + errno = EILSEQ; + return ((size_t)-1); + } + + /* Save unprocessed remainder for the next invocation. */ + memmove(cs->srcbuf, src, srcleft); + cs->srcbuf_len = srcleft; + return ((size_t)-2); + } + +return_char: + retchar = cs->dstbuf.widechar[0]; + memmove(&cs->dstbuf.widechar[0], &cs->dstbuf.widechar[1], + --cs->dstbuf_len * sizeof(charXX_t)); + if (pc != NULL) + *pc = retchar; + if (retchar == 0) + return (0); + return (retval); +} + +size_t +mbrtocXX(charXX_t * __restrict pc, const char * __restrict s, size_t n, + mbstate_t * __restrict ps) +{ + + return (mbrtocXX_l(pc, s, n, ps, __get_locale())); +} Modified: head/tools/regression/lib/libc/locale/test-c16rtomb.c ============================================================================== --- head/tools/regression/lib/libc/locale/test-c16rtomb.c Mon Jun 3 17:13:37 2013 (r251313) +++ head/tools/regression/lib/libc/locale/test-c16rtomb.c Mon Jun 3 17:17:56 2013 (r251314) @@ -82,6 +82,34 @@ main(int argc, char *argv[]) assert(c16rtomb(buf, 0xd83d, &s) == 0); assert(c16rtomb(buf, 0xdca9, &s) == (size_t)-1); assert(errno == EILSEQ); + assert((unsigned char)buf[0] == 0xcc); + + /* + * ISO8859-1. + */ + + assert(strcmp(setlocale(LC_CTYPE, "en_US.ISO8859-1"), + "en_US.ISO8859-1") == 0); + + /* Unicode character 'Euro sign'. */ + memset(&s, 0, sizeof(s)); + memset(buf, 0xcc, sizeof(buf)); + assert(c16rtomb(buf, 0x20ac, &s) == (size_t)-1); + assert(errno == EILSEQ); + assert((unsigned char)buf[0] == 0xcc); + + /* + * ISO8859-15. + */ + + assert(strcmp(setlocale(LC_CTYPE, "en_US.ISO8859-15"), + "en_US.ISO8859-15") == 0); + + /* Unicode character 'Euro sign'. */ + memset(&s, 0, sizeof(s)); + memset(buf, 0xcc, sizeof(buf)); + assert(c16rtomb(buf, 0x20ac, &s) == 1); + assert((unsigned char)buf[0] == 0xa4 && (unsigned char)buf[1] == 0xcc); /* * UTF-8. @@ -104,12 +132,14 @@ main(int argc, char *argv[]) assert(c16rtomb(buf, 0xd83d, &s) == 0); assert(c16rtomb(buf, L'A', &s) == (size_t)-1); assert(errno == EILSEQ); + assert((unsigned char)buf[0] == 0xcc); /* Invalid code; 'Pile of poo' without the lead surrogate. */ memset(&s, 0, sizeof(s)); memset(buf, 0xcc, sizeof(buf)); assert(c16rtomb(buf, 0xdca9, &s) == (size_t)-1); assert(errno == EILSEQ); + assert((unsigned char)buf[0] == 0xcc); printf("ok 1 - c16rtomb()\n"); } Modified: head/tools/regression/lib/libc/locale/test-mbrtoc16.c ============================================================================== --- head/tools/regression/lib/libc/locale/test-mbrtoc16.c Mon Jun 3 17:13:37 2013 (r251313) +++ head/tools/regression/lib/libc/locale/test-mbrtoc16.c Mon Jun 3 17:17:56 2013 (r251314) @@ -85,6 +85,37 @@ main(int argc, char *argv[]) assert(mbrtoc16(&c16, "", 0, &s) == (size_t)-2); assert(c16 == L'z'); + /* Check that mbrtoc16() doesn't read ahead too aggressively. */ + memset(&s, 0, sizeof(s)); + assert(mbrtoc16(&c16, "AB", 2, &s) == 1); + assert(c16 == L'A'); + assert(mbrtoc16(&c16, "C", 1, &s) == 1); + assert(c16 == L'C'); + + /* + * ISO-8859-1. + */ + + assert(strcmp(setlocale(LC_CTYPE, "en_US.ISO8859-1"), + "en_US.ISO8859-1") == 0); + + /* Currency sign. */ + memset(&s, 0, sizeof(s)); + assert(mbrtoc16(&c16, "\xa4", 1, &s) == 1); + assert(c16 == 0xa4); + + /* + * ISO-8859-15. + */ + + assert(strcmp(setlocale(LC_CTYPE, "en_US.ISO8859-15"), + "en_US.ISO8859-15") == 0); + + /* Euro sign. */ + memset(&s, 0, sizeof(s)); + assert(mbrtoc16(&c16, "\xa4", 1, &s) == 1); + assert(c16 == 0x20ac); + /* * UTF-8. */ @@ -144,6 +175,20 @@ main(int argc, char *argv[]) assert(mbrtoc16(&c16, "", 0, &s) == (size_t)-3); assert(c16 == 0xdca9); + /* Letter e with acute, precomposed. */ + memset(&s, 0, sizeof(s)); + c16 = 0; + assert(mbrtoc16(&c16, "\xc3\xa9", 2, &s) == 2); + assert(c16 == 0xe9); + + /* Letter e with acute, combined. */ + memset(&s, 0, sizeof(s)); + c16 = 0; + assert(mbrtoc16(&c16, "\x65\xcc\x81", 3, &s) == 1); + assert(c16 == 0x65); + assert(mbrtoc16(&c16, "\xcc\x81", 2, &s) == 2); + assert(c16 == 0x301); + printf("ok 1 - mbrtoc16()\n"); return (0);