Date: Tue, 21 May 2013 19:59:37 +0000 (UTC) From: Ed Schouten <ed@FreeBSD.org> To: src-committers@freebsd.org, svn-src-all@freebsd.org, svn-src-head@freebsd.org Subject: svn commit: r250883 - in head: include include/xlocale lib/libc/locale sys/sys tools/regression/lib/libc/locale Message-ID: <201305211959.r4LJxbLx034714@svn.freebsd.org>
next in thread | raw e-mail | index | archive | help
Author: ed Date: Tue May 21 19:59:37 2013 New Revision: 250883 URL: http://svnweb.freebsd.org/changeset/base/250883 Log: Add <uchar.h>. The <uchar.h> header, part of C11, adds a small number of utility functions for 16/32-bit "universal" characters, which may or may not be UTF-16/32. As our wchar_t is already ISO 10646, simply add light-weight wrappers around wcrtomb() and mbrtowc(). While there, also add (non-yet-standard) _l functions, similar to the ones we already have for the other locale-dependent functions. Reviewed by: theraven Added: head/include/uchar.h (contents, props changed) head/include/xlocale/_uchar.h (contents, props changed) head/lib/libc/locale/c16rtomb.c (contents, props changed) head/lib/libc/locale/c32rtomb.c (contents, props changed) head/lib/libc/locale/mbrtoc16.c (contents, props changed) head/lib/libc/locale/mbrtoc32.c (contents, props changed) head/tools/regression/lib/libc/locale/test-c16rtomb.c (contents, props changed) head/tools/regression/lib/libc/locale/test-mbrtoc16.c (contents, props changed) Modified: head/include/Makefile head/include/stdatomic.h head/include/xlocale/Makefile head/lib/libc/locale/Makefile.inc head/lib/libc/locale/Symbol.map head/lib/libc/locale/mbrtowc.3 head/lib/libc/locale/wcrtomb.3 head/lib/libc/locale/xlocale_private.h head/sys/sys/_types.h head/tools/regression/lib/libc/locale/Makefile Modified: head/include/Makefile ============================================================================== --- head/include/Makefile Tue May 21 19:56:03 2013 (r250882) +++ head/include/Makefile Tue May 21 19:59:37 2013 (r250883) @@ -23,7 +23,7 @@ INCS= a.out.h ar.h assert.h bitstring.h stdnoreturn.h stdio.h stdlib.h string.h stringlist.h \ strings.h sysexits.h tar.h termios.h tgmath.h \ time.h timeconv.h timers.h ttyent.h \ - ulimit.h unistd.h utime.h utmpx.h uuid.h varargs.h \ + uchar.h ulimit.h unistd.h utime.h utmpx.h uuid.h varargs.h \ wchar.h wctype.h wordexp.h xlocale.h .PATH: ${.CURDIR}/../contrib/libc-vis Modified: head/include/stdatomic.h ============================================================================== --- head/include/stdatomic.h Tue May 21 19:56:03 2013 (r250882) +++ head/include/stdatomic.h Tue May 21 19:59:37 2013 (r250883) @@ -145,10 +145,8 @@ typedef _Atomic(long) atomic_long; typedef _Atomic(unsigned long) atomic_ulong; typedef _Atomic(long long) atomic_llong; typedef _Atomic(unsigned long long) atomic_ullong; -#if 0 typedef _Atomic(__char16_t) atomic_char16_t; typedef _Atomic(__char32_t) atomic_char32_t; -#endif typedef _Atomic(__wchar_t) atomic_wchar_t; typedef _Atomic(__int_least8_t) atomic_int_least8_t; typedef _Atomic(__uint_least8_t) atomic_uint_least8_t; Added: head/include/uchar.h ============================================================================== --- /dev/null 00:00:00 1970 (empty, because file is newly added) +++ head/include/uchar.h Tue May 21 19:59:37 2013 (r250883) @@ -0,0 +1,60 @@ +/*- + * Copyright (c) 2013 Ed Schouten <ed@FreeBSD.org> + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * $FreeBSD$ + */ + +#ifndef _UCHAR_H_ +#define _UCHAR_H_ + +#include <sys/cdefs.h> +#include <sys/_types.h> + +#ifndef _MBSTATE_T_DECLARED +typedef __mbstate_t mbstate_t; +#define _MBSTATE_T_DECLARED +#endif + +#ifndef _SIZE_T_DECLARED +typedef __size_t size_t; +#define _SIZE_T_DECLARED +#endif + +typedef __char16_t char16_t; +typedef __char32_t char32_t; + +__BEGIN_DECLS +size_t c16rtomb(char * __restrict, char16_t, mbstate_t * __restrict); +size_t c32rtomb(char * __restrict, char32_t, mbstate_t * __restrict); +size_t mbrtoc16(char16_t * __restrict, const char * __restrict, size_t, + mbstate_t * __restrict); +size_t mbrtoc32(char32_t * __restrict, const char * __restrict, size_t, + mbstate_t * __restrict); +#if __BSD_VISIBLE || defined(_XLOCALE_H_) +#include <xlocale/_uchar.h> +#endif +__END_DECLS + +#endif /* !_UCHAR_H_ */ Modified: head/include/xlocale/Makefile ============================================================================== --- head/include/xlocale/Makefile Tue May 21 19:56:03 2013 (r250882) +++ head/include/xlocale/Makefile Tue May 21 19:59:37 2013 (r250883) @@ -2,7 +2,7 @@ NO_OBJ= INCS= _ctype.h _inttypes.h _langinfo.h _locale.h _monetary.h _stdio.h\ - _stdlib.h _string.h _time.h _wchar.h + _stdlib.h _string.h _time.h _uchar.h _wchar.h INCSDIR=${INCLUDEDIR}/xlocale .include <bsd.prog.mk> Added: head/include/xlocale/_uchar.h ============================================================================== --- /dev/null 00:00:00 1970 (empty, because file is newly added) +++ head/include/xlocale/_uchar.h Tue May 21 19:59:37 2013 (r250883) @@ -0,0 +1,46 @@ +/*- + * Copyright (c) 2013 Ed Schouten <ed@FreeBSD.org> + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * $FreeBSD$ + */ + +#ifndef _LOCALE_T_DEFINED +#define _LOCALE_T_DEFINED +typedef struct _xlocale *locale_t; +#endif + +#ifndef _XLOCALE_UCHAR_H_ +#define _XLOCALE_UCHAR_H_ + +size_t c16rtomb_l(char * __restrict, char16_t, mbstate_t * __restrict, + locale_t); +size_t c32rtomb_l(char * __restrict, char32_t, mbstate_t * __restrict, + locale_t); +size_t mbrtoc16_l(char16_t * __restrict, const char * __restrict, size_t, + mbstate_t * __restrict, locale_t); +size_t mbrtoc32_l(char32_t * __restrict, const char * __restrict, size_t, + mbstate_t * __restrict, locale_t); + +#endif /* _XLOCALE_UCHAR_H_ */ Modified: head/lib/libc/locale/Makefile.inc ============================================================================== --- head/lib/libc/locale/Makefile.inc Tue May 21 19:56:03 2013 (r250882) +++ head/lib/libc/locale/Makefile.inc Tue May 21 19:59:37 2013 (r250883) @@ -4,11 +4,11 @@ # locale sources .PATH: ${.CURDIR}/${LIBC_ARCH}/locale ${.CURDIR}/locale -SRCS+= ascii.c big5.c btowc.c collate.c collcmp.c euc.c fix_grouping.c \ - gb18030.c gb2312.c gbk.c ctype.c isctype.c iswctype.c \ +SRCS+= ascii.c big5.c btowc.c c16rtomb.c c32rtomb.c collate.c collcmp.c euc.c \ + fix_grouping.c gb18030.c gb2312.c gbk.c ctype.c isctype.c iswctype.c \ ldpart.c lmessages.c lmonetary.c lnumeric.c localeconv.c mblen.c \ mbrlen.c \ - mbrtowc.c mbsinit.c mbsnrtowcs.c \ + mbrtoc16.c mbrtoc32.c mbrtowc.c mbsinit.c mbsnrtowcs.c \ mbsrtowcs.c mbtowc.c mbstowcs.c \ mskanji.c nextwctype.c nl_langinfo.c nomacros.c none.c rpmatch.c \ rune.c \ @@ -72,7 +72,9 @@ MLINKS+=iswalnum_l.3 iswalpha_l.3 iswaln iswalnum_l.3 iswspecial_l.3 iswalnum_l.3 nextwctype_l.3 \ iswalnum_l.3 towctrans_l.3 iswalnum_l.3 wctrans_l.3 MLINKS+=isxdigit.3 ishexnumber.3 +MLINKS+=mbrtowc.3 mbrtoc16.3 mbrtowc.3 mbrtoc32.3 MLINKS+=mbsrtowcs.3 mbsnrtowcs.3 +MLINKS+=wcrtomb.3 c16rtomb.3 wcrtomb.3 c32rtomb.3 MLINKS+=wcsrtombs.3 wcsnrtombs.3 MLINKS+=wcstod.3 wcstof.3 wcstod.3 wcstold.3 MLINKS+=wcstol.3 wcstoul.3 wcstol.3 wcstoll.3 wcstol.3 wcstoull.3 \ Modified: head/lib/libc/locale/Symbol.map ============================================================================== --- head/lib/libc/locale/Symbol.map Tue May 21 19:56:03 2013 (r250882) +++ head/lib/libc/locale/Symbol.map Tue May 21 19:59:37 2013 (r250883) @@ -199,6 +199,14 @@ FBSD_1.3 { __istype_l; __runes_for_locale; _ThreadRuneLocale; + c16rtomb; + c16rtomb_l; + c32rtomb; + c32rtomb_l; + mbrtoc16; + mbrtoc16_l; + mbrtoc32; + mbrtoc32_l; }; FBSDprivate_1.0 { Added: head/lib/libc/locale/c16rtomb.c ============================================================================== --- /dev/null 00:00:00 1970 (empty, because file is newly added) +++ head/lib/libc/locale/c16rtomb.c Tue May 21 19:59:37 2013 (r250883) @@ -0,0 +1,81 @@ +/*- + * Copyright (c) 2013 Ed Schouten <ed@FreeBSD.org> + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include <sys/cdefs.h> +__FBSDID("$FreeBSD$"); + +#include <errno.h> +#include <uchar.h> +#include "xlocale_private.h" + +typedef struct { + char16_t lead_surrogate; + mbstate_t c32_mbstate; +} _Char16State; + +size_t +c16rtomb_l(char * __restrict s, char16_t c16, mbstate_t * __restrict ps, + locale_t locale) +{ + _Char16State *cs; + char32_t c32; + + FIX_LOCALE(locale); + if (ps == NULL) + ps = &locale->c16rtomb; + cs = (_Char16State *)ps; + + /* If s is a null pointer, the value of parameter c16 is ignored. */ + if (s == NULL) { + c32 = 0; + } else if (cs->lead_surrogate >= 0xd800 && + cs->lead_surrogate <= 0xdbff) { + /* We should see a trail surrogate now. */ + if (c16 < 0xdc00 || c16 > 0xdfff) { + errno = EILSEQ; + return ((size_t)-1); + } + c32 = 0x10000 + ((cs->lead_surrogate & 0x3ff) << 10 | + (c16 & 0x3ff)); + } else if (c16 >= 0xd800 && c16 <= 0xdbff) { + /* Store lead surrogate for next invocation. */ + cs->lead_surrogate = c16; + return (0); + } else { + /* Regular character. */ + c32 = c16; + } + cs->lead_surrogate = 0; + + return (c32rtomb_l(s, c32, &cs->c32_mbstate, locale)); +} + +size_t +c16rtomb(char * __restrict s, char16_t c16, mbstate_t * __restrict ps) +{ + + return (c16rtomb_l(s, c16, ps, __get_locale())); +} Added: head/lib/libc/locale/c32rtomb.c ============================================================================== --- /dev/null 00:00:00 1970 (empty, because file is newly added) +++ head/lib/libc/locale/c32rtomb.c Tue May 21 19:59:37 2013 (r250883) @@ -0,0 +1,59 @@ +/*- + * Copyright (c) 2013 Ed Schouten <ed@FreeBSD.org> + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include <sys/cdefs.h> +__FBSDID("$FreeBSD$"); + +#include <errno.h> +#include <uchar.h> +#include <wchar.h> +#include "xlocale_private.h" + +size_t +c32rtomb_l(char * __restrict s, char32_t c32, mbstate_t * __restrict ps, + locale_t locale) +{ + + /* Unicode Standard 5.0, D90: ill-formed characters. */ + if ((c32 >= 0xd800 && c32 <= 0xdfff) || c32 > 0x10ffff) { + errno = EILSEQ; + return ((size_t)-1); + } + + FIX_LOCALE(locale); + if (ps == NULL) + ps = &locale->c32rtomb; + + /* Assume wchar_t uses UTF-32. */ + return (wcrtomb_l(s, c32, ps, locale)); +} + +size_t +c32rtomb(char * __restrict s, char32_t c32, mbstate_t * __restrict ps) +{ + + return (c32rtomb_l(s, c32, ps, __get_locale())); +} Added: head/lib/libc/locale/mbrtoc16.c ============================================================================== --- /dev/null 00:00:00 1970 (empty, because file is newly added) +++ head/lib/libc/locale/mbrtoc16.c Tue May 21 19:59:37 2013 (r250883) @@ -0,0 +1,89 @@ +/*- + * Copyright (c) 2013 Ed Schouten <ed@FreeBSD.org> + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include <sys/cdefs.h> +__FBSDID("$FreeBSD$"); + +#include <uchar.h> +#include "xlocale_private.h" + +typedef struct { + char16_t trail_surrogate; + mbstate_t c32_mbstate; +} _Char16State; + +size_t +mbrtoc16_l(char16_t * __restrict pc16, const char * __restrict s, size_t n, + mbstate_t * __restrict ps, locale_t locale) +{ + _Char16State *cs; + char32_t c32; + ssize_t len; + + FIX_LOCALE(locale); + if (ps == NULL) + ps = &locale->mbrtoc16; + cs = (_Char16State *)ps; + + /* + * Call straight into mbrtoc32_l() if we don't need to return a + * character value. According to the spec, if s is a null + * pointer, the value of parameter pc16 is also ignored. + */ + if (pc16 == NULL || s == NULL) { + cs->trail_surrogate = 0; + return (mbrtoc32_l(NULL, s, n, &cs->c32_mbstate, locale)); + } + + /* Return the trail surrogate from the previous invocation. */ + if (cs->trail_surrogate >= 0xdc00 && cs->trail_surrogate <= 0xdfff) { + *pc16 = cs->trail_surrogate; + cs->trail_surrogate = 0; + return ((size_t)-3); + } + + len = mbrtoc32_l(&c32, s, n, &cs->c32_mbstate, locale); + if (len >= 0) { + if (c32 < 0x10000) { + /* Fits in one UTF-16 character. */ + *pc16 = c32; + } else { + /* Split up in a surrogate pair. */ + c32 -= 0x10000; + *pc16 = 0xd800 | (c32 >> 10); + cs->trail_surrogate = 0xdc00 | (c32 & 0x3ff); + } + } + return (len); +} + +size_t +mbrtoc16(char16_t * __restrict pc16, const char * __restrict s, size_t n, + mbstate_t * __restrict ps) +{ + + return (mbrtoc16_l(pc16, s, n, ps, __get_locale())); +} Added: head/lib/libc/locale/mbrtoc32.c ============================================================================== --- /dev/null 00:00:00 1970 (empty, because file is newly added) +++ head/lib/libc/locale/mbrtoc32.c Tue May 21 19:59:37 2013 (r250883) @@ -0,0 +1,53 @@ +/*- + * Copyright (c) 2013 Ed Schouten <ed@FreeBSD.org> + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include <sys/cdefs.h> +__FBSDID("$FreeBSD$"); + +#include <uchar.h> +#include <wchar.h> +#include "xlocale_private.h" + +size_t +mbrtoc32_l(char32_t * __restrict pc32, const char * __restrict s, size_t n, + mbstate_t * __restrict ps, locale_t locale) +{ + + FIX_LOCALE(locale); + if (ps == NULL) + ps = &locale->mbrtoc32; + + /* Assume wchar_t uses UTF-32. */ + return (mbrtowc_l(pc32, s, n, ps, locale)); +} + +size_t +mbrtoc32(char32_t * __restrict pc32, const char * __restrict s, size_t n, + mbstate_t * __restrict ps) +{ + + return (mbrtoc32_l(pc32, s, n, ps, __get_locale())); +} Modified: head/lib/libc/locale/mbrtowc.3 ============================================================================== --- head/lib/libc/locale/mbrtowc.3 Tue May 21 19:56:03 2013 (r250882) +++ head/lib/libc/locale/mbrtowc.3 Tue May 21 19:59:37 2013 (r250883) @@ -24,11 +24,13 @@ .\" .\" $FreeBSD$ .\" -.Dd April 8, 2004 +.Dd May 21, 2013 .Dt MBRTOWC 3 .Os .Sh NAME -.Nm mbrtowc +.Nm mbrtowc , +.Nm mbrtoc16 , +.Nm mbrtoc32 .Nd "convert a character to a wide-character code (restartable)" .Sh LIBRARY .Lb libc @@ -36,35 +38,51 @@ .In wchar.h .Ft size_t .Fo mbrtowc -.Fa "wchar_t * restrict pwc" "const char * restrict s" "size_t n" +.Fa "wchar_t * restrict pc" "const char * restrict s" "size_t n" +.Fa "mbstate_t * restrict ps" +.Fc +.In uchar.h +.Ft size_t +.Fo mbrtoc16 +.Fa "char16_t * restrict pc" "const char * restrict s" "size_t n" +.Fa "mbstate_t * restrict ps" +.Fc +.Ft size_t +.Fo mbrtoc32 +.Fa "char32_t * restrict pc" "const char * restrict s" "size_t n" .Fa "mbstate_t * restrict ps" .Fc .Sh DESCRIPTION The -.Fn mbrtowc -function inspects at most +.Fn mbrtowc , +.Fn mbrtoc16 +and +.Fn mbrtoc32 +functions inspect at most .Fa n bytes pointed to by .Fa s to determine the number of bytes needed to complete the next multibyte character. If a character can be completed, and -.Fa pwc +.Fa pc is not .Dv NULL , the wide character which is represented by .Fa s is stored in the -.Vt wchar_t +.Vt wchar_t , +.Vt char16_t +or +.Vt char32_t it points to. .Pp If .Fa s is .Dv NULL , -.Fn mbrtowc -behaves as if -.Fa pwc +these functions behave as if +.Fa pc was .Dv NULL , .Fa s @@ -81,15 +99,24 @@ argument, is used to keep track of the shift state. If it is .Dv NULL , -.Fn mbrtowc -uses an internal, static +these functions use an internal, static .Vt mbstate_t object, which is initialized to the initial conversion state at program startup. +.Pp +As a single +.Vt char16_t +is not large enough to represent certain multibyte characters, the +.Fn mbrtoc16 +function may need to be invoked multiple times to convert a single +multibyte character sequence. .Sh RETURN VALUES The -.Fn mbrtowc -functions returns: +.Fn mbrtowc , +.Fn mbrtoc16 +and +.Fn mbrtoc32 +functions return: .Bl -tag -width indent .It 0 The next @@ -100,10 +127,13 @@ represent the null wide character .It >0 The next .Fa n -or fewer bytes -represent a valid character, -.Fn mbrtowc -returns the number of bytes used to complete the multibyte character. +or fewer bytes represent a valid character, these functions +return the number of bytes used to complete the multibyte character. +.It Po Vt size_t Pc Ns \-1 +An encoding error has occurred. +The next +.Fa n +or fewer bytes do not contribute to a valid multibyte character. .It Po Vt size_t Pc Ns \-2 The next .Fa n @@ -111,16 +141,23 @@ contribute to, but do not complete, a va and all .Fa n bytes have been processed. -.It Po Vt size_t Pc Ns \-1 -An encoding error has occurred. -The next -.Fa n -or fewer bytes do not contribute to a valid multibyte character. +.El +.Pp +The +.Fn mbrtoc16 +function also returns: +.Bl -tag -width indent +.It Po Vt size_t Pc Ns \-3 +The next character resulting from a previous call has been stored. +No bytes from the input have been consumed. .El .Sh ERRORS The -.Fn mbrtowc -function will fail if: +.Fn mbrtowc , +.Fn mbrtoc16 +and +.Fn mbrtoc32 +functions will fail if: .Bl -tag -width Er .It Bq Er EILSEQ An invalid multibyte sequence was detected. @@ -134,6 +171,9 @@ The conversion state is invalid. .Xr wcrtomb 3 .Sh STANDARDS The -.Fn mbrtowc -function conforms to -.St -isoC-99 . +.Fn mbrtowc , +.Fn mbrtoc16 +and +.Fn mbrtoc32 +functions conform to +.St -isoC-2011 . Modified: head/lib/libc/locale/wcrtomb.3 ============================================================================== --- head/lib/libc/locale/wcrtomb.3 Tue May 21 19:56:03 2013 (r250882) +++ head/lib/libc/locale/wcrtomb.3 Tue May 21 19:59:37 2013 (r250883) @@ -24,24 +24,34 @@ .\" .\" $FreeBSD$ .\" -.Dd April 8, 2004 +.Dd May 21, 2013 .Dt WCRTOMB 3 .Os .Sh NAME -.Nm wcrtomb +.Nm wcrtomb , +.Nm c16rtomb , +.Nm c32rtomb .Nd "convert a wide-character code to a character (restartable)" .Sh LIBRARY .Lb libc .Sh SYNOPSIS .In wchar.h .Ft size_t -.Fn wcrtomb "char * restrict s" "wchar_t wc" "mbstate_t * restrict ps" +.Fn wcrtomb "char * restrict s" "wchar_t c" "mbstate_t * restrict ps" +.In uchar.h +.Ft size_t +.Fn c16rtomb "char * restrict s" "char16_t c" "mbstate_t * restrict ps" +.Ft size_t +.Fn c32rtomb "char * restrict s" "char32_t c" "mbstate_t * restrict ps" .Sh DESCRIPTION The -.Fn wcrtomb -function stores a multibyte sequence representing the +.Fn wcrtomb , +.Fn c16rtomb +and +.Fn c32rtomb +functions store a multibyte sequence representing the wide character -.Fa wc , +.Fa c , including any necessary shift sequences, to the character array .Fa s , @@ -53,11 +63,10 @@ If .Fa s is .Dv NULL , -.Fn wcrtomb -behaves as if +these functions behave as if .Fa s pointed to an internal buffer and -.Fa wc +.Fa c was a null wide character (L'\e0'). .Pp The @@ -67,26 +76,32 @@ argument, is used to keep track of the shift state. If it is .Dv NULL , -.Fn wcrtomb -uses an internal, static +these functions use an internal, static .Vt mbstate_t object, which is initialized to the initial conversion state at program startup. +.Pp +As certain multibyte characters may only be represented by a series of +16-bit characters, the +.Fn c16rtomb +may need to invoked multiple times before a multibyte sequence is +returned. .Sh RETURN VALUES -The -.Fn wcrtomb -functions returns the length (in bytes) of the multibyte sequence +These functions return the length (in bytes) of the multibyte sequence needed to represent -.Fa wc , +.Fa c , or .Po Vt size_t Pc Ns \-1 if -.Fa wc +.Fa c is not a valid wide character code. .Sh ERRORS The -.Fn wcrtomb -function will fail if: +.Fn wcrtomb , +.Fn c16rtomb +and +.Fn c32rtomb +functions will fail if: .Bl -tag -width Er .It Bq Er EILSEQ An invalid wide character code was specified. @@ -100,6 +115,9 @@ The conversion state is invalid. .Xr wctomb 3 .Sh STANDARDS The -.Fn wcrtomb -function conforms to -.St -isoC-99 . +.Fn wcrtomb , +.Fn c16rtomb +and +.Fn c32rtomb +functions conform to +.St -isoC-2011 . Modified: head/lib/libc/locale/xlocale_private.h ============================================================================== --- head/lib/libc/locale/xlocale_private.h Tue May 21 19:56:03 2013 (r250882) +++ head/lib/libc/locale/xlocale_private.h Tue May 21 19:59:37 2013 (r250883) @@ -109,6 +109,10 @@ struct _xlocale { __mbstate_t mblen; /** Persistent state used by mbrlen() calls. */ __mbstate_t mbrlen; + /** Persistent state used by mbrtoc16() calls. */ + __mbstate_t mbrtoc16; + /** Persistent state used by mbrtoc32() calls. */ + __mbstate_t mbrtoc32; /** Persistent state used by mbrtowc() calls. */ __mbstate_t mbrtowc; /** Persistent state used by mbsnrtowcs() calls. */ @@ -117,6 +121,10 @@ struct _xlocale { __mbstate_t mbsrtowcs; /** Persistent state used by mbtowc() calls. */ __mbstate_t mbtowc; + /** Persistent state used by c16rtomb() calls. */ + __mbstate_t c16rtomb; + /** Persistent state used by c32rtomb() calls. */ + __mbstate_t c32rtomb; /** Persistent state used by wcrtomb() calls. */ __mbstate_t wcrtomb; /** Persistent state used by wcsnrtombs() calls. */ Modified: head/sys/sys/_types.h ============================================================================== --- head/sys/sys/_types.h Tue May 21 19:56:03 2013 (r250882) +++ head/sys/sys/_types.h Tue May 21 19:59:37 2013 (r250883) @@ -89,6 +89,12 @@ typedef int __ct_rune_t; /* arg type fo typedef __ct_rune_t __rune_t; /* rune_t (see above) */ typedef __ct_rune_t __wint_t; /* wint_t (see above) */ +/* Clang already provides these types as built-ins, but only in C++ mode. */ +#if !defined(__clang__) || !defined(__cplusplus) +typedef __uint_least16_t __char16_t; +typedef __uint_least32_t __char32_t; +#endif + typedef __uint32_t __dev_t; /* device number */ typedef __uint32_t __fixpt_t; /* fixed point number */ Modified: head/tools/regression/lib/libc/locale/Makefile ============================================================================== --- head/tools/regression/lib/libc/locale/Makefile Tue May 21 19:56:03 2013 (r250882) +++ head/tools/regression/lib/libc/locale/Makefile Tue May 21 19:59:37 2013 (r250883) @@ -14,7 +14,9 @@ TESTS= test-mbrtowc \ test-wcstombs \ test-mblen \ test-iswctype \ - test-towctrans + test-towctrans \ + test-c16rtomb \ + test-mbrtoc16 .PHONY: tests tests: ${TESTS} Added: head/tools/regression/lib/libc/locale/test-c16rtomb.c ============================================================================== --- /dev/null 00:00:00 1970 (empty, because file is newly added) +++ head/tools/regression/lib/libc/locale/test-c16rtomb.c Tue May 21 19:59:37 2013 (r250883) @@ -0,0 +1,115 @@ +/*- + * Copyright (c) 2002 Tim J. Robbins + * All rights reserved. + * + * Copyright (c) 2013 Ed Schouten <ed@FreeBSD.org> + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ +/* + * Test program for c16rtomb() as specified by ISO/IEC 9899:2011. + */ + +#include <sys/cdefs.h> +__FBSDID("$FreeBSD$"); + +#include <assert.h> +#include <errno.h> +#include <limits.h> +#include <locale.h> +#include <stdio.h> +#include <string.h> +#include <uchar.h> + +int +main(int argc, char *argv[]) +{ + mbstate_t s; + char buf[MB_LEN_MAX + 1]; + + /* + * C/POSIX locale. + */ + + printf("1..1\n"); + + /* + * If the buffer argument is NULL, c16 is implicitly 0, + * c16rtomb() resets its internal state. + */ + assert(c16rtomb(NULL, L'\0', NULL) == 1); + assert(c16rtomb(NULL, 0xdc00, NULL) == 1); + + /* Null wide character. */ + memset(&s, 0, sizeof(s)); + memset(buf, 0xcc, sizeof(buf)); + assert(c16rtomb(buf, 0, &s) == 1); + assert((unsigned char)buf[0] == 0 && (unsigned char)buf[1] == 0xcc); + + /* Latin letter A, internal state. */ + assert(c16rtomb(NULL, L'\0', NULL) == 1); + assert(c16rtomb(NULL, L'A', NULL) == 1); + + /* Latin letter A. */ + memset(&s, 0, sizeof(s)); + memset(buf, 0xcc, sizeof(buf)); + assert(c16rtomb(buf, L'A', &s) == 1); + assert((unsigned char)buf[0] == 'A' && (unsigned char)buf[1] == 0xcc); + + /* Unicode character 'Pile of poo'. */ + memset(&s, 0, sizeof(s)); + memset(buf, 0xcc, sizeof(buf)); + assert(c16rtomb(buf, 0xd83d, &s) == 0); + assert(c16rtomb(buf, 0xdca9, &s) == (size_t)-1); + assert(errno == EILSEQ); + + /* + * UTF-8. + */ + + assert(strcmp(setlocale(LC_CTYPE, "en_US.UTF-8"), "en_US.UTF-8") == 0); + + /* Unicode character 'Pile of poo'. */ + memset(&s, 0, sizeof(s)); + memset(buf, 0xcc, sizeof(buf)); + assert(c16rtomb(buf, 0xd83d, &s) == 0); + assert(c16rtomb(buf, 0xdca9, &s) == 4); + assert((unsigned char)buf[0] == 0xf0 && (unsigned char)buf[1] == 0x9f && + (unsigned char)buf[2] == 0x92 && (unsigned char)buf[3] == 0xa9 && + (unsigned char)buf[4] == 0xcc); + + /* Invalid code; 'Pile of poo' without the trail surrogate. */ + memset(&s, 0, sizeof(s)); + memset(buf, 0xcc, sizeof(buf)); + assert(c16rtomb(buf, 0xd83d, &s) == 0); + assert(c16rtomb(buf, L'A', &s) == (size_t)-1); + assert(errno == EILSEQ); + + /* Invalid code; 'Pile of poo' without the lead surrogate. */ + memset(&s, 0, sizeof(s)); + memset(buf, 0xcc, sizeof(buf)); + assert(c16rtomb(buf, 0xdca9, &s) == (size_t)-1); + assert(errno == EILSEQ); + + printf("ok 1 - c16rtomb()\n"); +} Added: head/tools/regression/lib/libc/locale/test-mbrtoc16.c ============================================================================== --- /dev/null 00:00:00 1970 (empty, because file is newly added) +++ head/tools/regression/lib/libc/locale/test-mbrtoc16.c Tue May 21 19:59:37 2013 (r250883) @@ -0,0 +1,150 @@ +/*- + * Copyright (c) 2002 Tim J. Robbins + * All rights reserved. + * + * Copyright (c) 2013 Ed Schouten <ed@FreeBSD.org> + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ +/* + * Test program for mbrtoc16() as specified by ISO/IEC 9899:2011. + */ + +#include <sys/cdefs.h> +__FBSDID("$FreeBSD$"); + +#include <assert.h> +#include <errno.h> +#include <limits.h> +#include <locale.h> +#include <stdio.h> +#include <string.h> +#include <uchar.h> + +int +main(int argc, char *argv[]) +{ + mbstate_t s; + size_t len; + char16_t c16; + + /* + * C/POSIX locale. + */ + + printf("1..1\n"); + + /* Null wide character, internal state. */ + assert(mbrtoc16(&c16, "", 1, NULL) == 0); + assert(c16 == 0); + + /* Null wide character. */ *** DIFF OUTPUT TRUNCATED AT 1000 LINES ***
Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?201305211959.r4LJxbLx034714>