Date: Mon, 16 Jun 2014 22:53:47 GMT From: ghostmansd@FreeBSD.org To: svn-soc-all@FreeBSD.org Subject: socsvn commit: r269660 - in soc2014/ghostmansd/head: include lib/libc/string Message-ID: <201406162253.s5GMrl69059804@socsvn.freebsd.org>
next in thread | raw e-mail | index | archive | help
Author: ghostmansd Date: Mon Jun 16 22:53:46 2014 New Revision: 269660 URL: http://svnweb.FreeBSD.org/socsvn/?view=rev&rev=269660 Log: Unicode Normalization Algorithm Added: soc2014/ghostmansd/head/lib/libc/string/normalization.h soc2014/ghostmansd/head/lib/libc/string/strnorm.3 soc2014/ghostmansd/head/lib/libc/string/strnorm.c soc2014/ghostmansd/head/lib/libc/string/wcsnorm.3 soc2014/ghostmansd/head/lib/libc/string/wcsnorm.c Modified: soc2014/ghostmansd/head/include/string.h soc2014/ghostmansd/head/lib/libc/string/Makefile.inc Modified: soc2014/ghostmansd/head/include/string.h ============================================================================== --- soc2014/ghostmansd/head/include/string.h Mon Jun 16 22:44:38 2014 (r269659) +++ soc2014/ghostmansd/head/include/string.h Mon Jun 16 22:53:46 2014 (r269660) @@ -36,6 +36,7 @@ #include <sys/cdefs.h> #include <sys/_null.h> #include <sys/_types.h> +#include <xlocale.h> /* * Prototype functions which were historically defined in <string.h>, but @@ -139,6 +140,26 @@ #if __POSIX_VISIBLE >= 200809 || defined(_XLOCALE_H_) #include <xlocale/_string.h> #endif + +/* Unicode Normalization algorithm */ +#define __NORM_NFD 0 +#define __NORM_NFC 1 +#define __NORM_NFKD 2 +#define __NORM_NFKC 3 + +size_t __strnorm(char*, size_t, char const*, int); +size_t __strnorm_l(char*, size_t, char const*, int, locale_t); +size_t __wcsnorm(wchar_t*, size_t, wchar_t const*, int); +#ifdef _UNICODE_ADDENDA + #define strnorm __strnorm + #define wcsnorm __wcsnorm + #define NORM_NFD __NORM_NFD + #define NORM_NFC __NORM_NFC + #define NORM_NFKD __NORM_NFKD + #define NORM_NFKC __NORM_NFKC +#endif + + __END_DECLS #endif /* _STRING_H_ */ Modified: soc2014/ghostmansd/head/lib/libc/string/Makefile.inc ============================================================================== --- soc2014/ghostmansd/head/lib/libc/string/Makefile.inc Mon Jun 16 22:44:38 2014 (r269659) +++ soc2014/ghostmansd/head/lib/libc/string/Makefile.inc Mon Jun 16 22:53:46 2014 (r269660) @@ -21,6 +21,7 @@ wcsrchr.c wcsspn.c wcsstr.c wcstok.c wcswidth.c wcsxfrm.c wmemchr.c \ wmemcmp.c \ wmemcpy.c wmemmove.c wmemset.c +MISRCS+=strnorm.c wcsnorm.c SYM_MAPS+= ${LIBC_SRCTOP}/string/Symbol.map @@ -28,12 +29,13 @@ # machine-dependent string sources .sinclude "${LIBC_SRCTOP}/${LIBC_ARCH}/string/Makefile.inc" -MAN+= bcmp.3 bcopy.3 bstring.3 bzero.3 ffs.3 index.3 memccpy.3 memchr.3 \ +MAN+=bcmp.3 bcopy.3 bstring.3 bzero.3 ffs.3 index.3 memccpy.3 memchr.3 \ memcmp.3 memcpy.3 memmem.3 memmove.3 memset.3 strcasecmp.3 strcat.3 \ strchr.3 strcmp.3 strcoll.3 strcpy.3 strdup.3 strerror.3 \ string.3 strlcpy.3 strlen.3 strmode.3 strpbrk.3 strsep.3 \ strspn.3 strstr.3 strtok.3 strxfrm.3 swab.3 wcscoll.3 wcstok.3 \ wcswidth.3 wcsxfrm.3 wmemchr.3 +MAN+=strnorm.3 wcsnorm.3 MLINKS+=ffs.3 ffsl.3 \ ffs.3 ffsll.3 \ @@ -91,3 +93,4 @@ wmemchr.3 wmemcpy.3 \ wmemchr.3 wmemmove.3 \ wmemchr.3 wmemset.3 +MLINKS+=strnorm.3 wcsnorm.3 Added: soc2014/ghostmansd/head/lib/libc/string/normalization.h ============================================================================== --- /dev/null 00:00:00 1970 (empty, because file is newly added) +++ soc2014/ghostmansd/head/lib/libc/string/normalization.h Mon Jun 16 22:53:46 2014 (r269660) @@ -0,0 +1,619 @@ +/* + * Copyright (c) 2014 Dmitry Selyutin <ghostmansd@FreeBSD.org> + * at Lomonosov Moscow State University - www.msu.ru + * All rights reserved. + * + * Copyright (c) 2014 The FreeBSD Foundation + * All rights reserved. + * Portions of this software were developed by David Chisnall + * under sponsorship from the FreeBSD Foundation. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#ifndef _NORM_H_ +#define _NORM_H_ + + +#define HANGUL_MIN 0xAC00 +#define HANGUL_MAX 0xD7A4 +#define HANGUL_BASE HANGUL_MIN + +#define HANGUL_LEAD_COUNT 19 +#define HANGUL_VOWEL_COUNT 21 +#define HANGUL_TRAIL_COUNT 28 + +#define HANGUL_LEAD_MIN 0x1100 +#define HANGUL_VOWEL_MIN 0x1161 +#define HANGUL_TRAIL_MIN 0x11A7 + +#define HANGUL_LEAD_MAX ((HANGUL_LEAD_MIN + HANGUL_LEAD_COUNT) - 1) +#define HANGUL_VOWEL_MAX ((HANGUL_VOWEL_MIN + HANGUL_VOWEL_COUNT) - 1) +#define HANGUL_TRAIL_MAX ((HANGUL_TRAIL_MIN + HANGUL_TRAIL_COUNT) - 1) + +#define HANGUL_BASE_COUNT 588 +#define HANGUL_FULL_COUNT 11172 + +#define HANGUL_LEAD_TYPE 1 +#define HANGUL_VOWEL_TYPE 2 +#define HANGUL_TRAIL_TYPE 3 +#define HANGUL_LEAD_VOWEL_TYPE 4 +#define HANGUL_LEAD_VOWEL_TRAIL_TYPE 5 + + +/* The source code was automatically generated from HangulSyllableType.txt. + * This function is used to check type of the Hangul syllables. */ +static int hangul_syllable(wchar_t code) +{ + if (((0x1100 <= code) && (code <= 0x115F)) + || ((0xA960 <= code) && (code <= 0xA97C))) + return HANGUL_LEAD_TYPE; + + if (((0x1160 <= code) && (code <= 0x11A7)) + || ((0xD7B0 <= code) && (code <= 0xD7C6))) + return HANGUL_VOWEL_TYPE; + + if (((0x11A8 <= code) && (code <= 0x11FF)) + || ((0xD7CB <= code) && (code <= 0xD7FB))) + return HANGUL_TRAIL_TYPE; + + if ((code == 0xAC00)|| (code == 0xAC1C) || (code == 0xAC38) + || (code == 0xAC54) || (code == 0xAC70) || (code == 0xAC8C) + || (code == 0xACA8) || (code == 0xACC4) || (code == 0xACE0) + || (code == 0xACFC) || (code == 0xAD18) || (code == 0xAD34) + || (code == 0xAD50) || (code == 0xAD6C) || (code == 0xAD88) + || (code == 0xADA4) || (code == 0xADC0) || (code == 0xADDC) + || (code == 0xADF8) || (code == 0xAE14) || (code == 0xAE30) + || (code == 0xAE4C) || (code == 0xAE68) || (code == 0xAE84) + || (code == 0xAEA0) || (code == 0xAEBC) || (code == 0xAED8) + || (code == 0xAEF4) || (code == 0xAF10) || (code == 0xAF2C) + || (code == 0xAF48) || (code == 0xAF64) || (code == 0xAF80) + || (code == 0xAF9C) || (code == 0xAFB8) || (code == 0xAFD4) + || (code == 0xAFF0) || (code == 0xB00C) || (code == 0xB028) + || (code == 0xB044) || (code == 0xB060) || (code == 0xB07C) + || (code == 0xB098) || (code == 0xB0B4) || (code == 0xB0D0) + || (code == 0xB0EC) || (code == 0xB108) || (code == 0xB124) + || (code == 0xB140) || (code == 0xB15C) || (code == 0xB178) + || (code == 0xB194) || (code == 0xB1B0) || (code == 0xB1CC) + || (code == 0xB1E8) || (code == 0xB204) || (code == 0xB220) + || (code == 0xB23C) || (code == 0xB258) || (code == 0xB274) + || (code == 0xB290) || (code == 0xB2AC) || (code == 0xB2C8) + || (code == 0xB2E4) || (code == 0xB300) || (code == 0xB31C) + || (code == 0xB338) || (code == 0xB354) || (code == 0xB370) + || (code == 0xB38C) || (code == 0xB3A8) || (code == 0xB3C4) + || (code == 0xB3E0) || (code == 0xB3FC) || (code == 0xB418) + || (code == 0xB434) || (code == 0xB450) || (code == 0xB46C) + || (code == 0xB488) || (code == 0xB4A4) || (code == 0xB4C0) + || (code == 0xB4DC) || (code == 0xB4F8) || (code == 0xB514) + || (code == 0xB530) || (code == 0xB54C) || (code == 0xB568) + || (code == 0xB584) || (code == 0xB5A0) || (code == 0xB5BC) + || (code == 0xB5D8) || (code == 0xB5F4) || (code == 0xB610) + || (code == 0xB62C) || (code == 0xB648) || (code == 0xB664) + || (code == 0xB680) || (code == 0xB69C) || (code == 0xB6B8) + || (code == 0xB6D4) || (code == 0xB6F0) || (code == 0xB70C) + || (code == 0xB728) || (code == 0xB744) || (code == 0xB760) + || (code == 0xB77C) || (code == 0xB798) || (code == 0xB7B4) + || (code == 0xB7D0) || (code == 0xB7EC) || (code == 0xB808) + || (code == 0xB824) || (code == 0xB840) || (code == 0xB85C) + || (code == 0xB878) || (code == 0xB894) || (code == 0xB8B0) + || (code == 0xB8CC) || (code == 0xB8E8) || (code == 0xB904) + || (code == 0xB920) || (code == 0xB93C) || (code == 0xB958) + || (code == 0xB974) || (code == 0xB990) || (code == 0xB9AC) + || (code == 0xB9C8) || (code == 0xB9E4) || (code == 0xBA00) + || (code == 0xBA1C) || (code == 0xBA38) || (code == 0xBA54) + || (code == 0xBA70) || (code == 0xBA8C) || (code == 0xBAA8) + || (code == 0xBAC4) || (code == 0xBAE0) || (code == 0xBAFC) + || (code == 0xBB18) || (code == 0xBB34) || (code == 0xBB50) + || (code == 0xBB6C) || (code == 0xBB88) || (code == 0xBBA4) + || (code == 0xBBC0) || (code == 0xBBDC) || (code == 0xBBF8) + || (code == 0xBC14) || (code == 0xBC30) || (code == 0xBC4C) + || (code == 0xBC68) || (code == 0xBC84) || (code == 0xBCA0) + || (code == 0xBCBC) || (code == 0xBCD8) || (code == 0xBCF4) + || (code == 0xBD10) || (code == 0xBD2C) || (code == 0xBD48) + || (code == 0xBD64) || (code == 0xBD80) || (code == 0xBD9C) + || (code == 0xBDB8) || (code == 0xBDD4) || (code == 0xBDF0) + || (code == 0xBE0C) || (code == 0xBE28) || (code == 0xBE44) + || (code == 0xBE60) || (code == 0xBE7C) || (code == 0xBE98) + || (code == 0xBEB4) || (code == 0xBED0) || (code == 0xBEEC) + || (code == 0xBF08) || (code == 0xBF24) || (code == 0xBF40) + || (code == 0xBF5C) || (code == 0xBF78) || (code == 0xBF94) + || (code == 0xBFB0) || (code == 0xBFCC) || (code == 0xBFE8) + || (code == 0xC004) || (code == 0xC020) || (code == 0xC03C) + || (code == 0xC058) || (code == 0xC074) || (code == 0xC090) + || (code == 0xC0AC) || (code == 0xC0C8) || (code == 0xC0E4) + || (code == 0xC100) || (code == 0xC11C) || (code == 0xC138) + || (code == 0xC154) || (code == 0xC170) || (code == 0xC18C) + || (code == 0xC1A8) || (code == 0xC1C4) || (code == 0xC1E0) + || (code == 0xC1FC) || (code == 0xC218) || (code == 0xC234) + || (code == 0xC250) || (code == 0xC26C) || (code == 0xC288) + || (code == 0xC2A4) || (code == 0xC2C0) || (code == 0xC2DC) + || (code == 0xC2F8) || (code == 0xC314) || (code == 0xC330) + || (code == 0xC34C) || (code == 0xC368) || (code == 0xC384) + || (code == 0xC3A0) || (code == 0xC3BC) || (code == 0xC3D8) + || (code == 0xC3F4) || (code == 0xC410) || (code == 0xC42C) + || (code == 0xC448) || (code == 0xC464) || (code == 0xC480) + || (code == 0xC49C) || (code == 0xC4B8) || (code == 0xC4D4) + || (code == 0xC4F0) || (code == 0xC50C) || (code == 0xC528) + || (code == 0xC544) || (code == 0xC560) || (code == 0xC57C) + || (code == 0xC598) || (code == 0xC5B4) || (code == 0xC5D0) + || (code == 0xC5EC) || (code == 0xC608) || (code == 0xC624) + || (code == 0xC640) || (code == 0xC65C) || (code == 0xC678) + || (code == 0xC694) || (code == 0xC6B0) || (code == 0xC6CC) + || (code == 0xC6E8) || (code == 0xC704) || (code == 0xC720) + || (code == 0xC73C) || (code == 0xC758) || (code == 0xC774) + || (code == 0xC790) || (code == 0xC7AC) || (code == 0xC7C8) + || (code == 0xC7E4) || (code == 0xC800) || (code == 0xC81C) + || (code == 0xC838) || (code == 0xC854) || (code == 0xC870) + || (code == 0xC88C) || (code == 0xC8A8) || (code == 0xC8C4) + || (code == 0xC8E0) || (code == 0xC8FC) || (code == 0xC918) + || (code == 0xC934) || (code == 0xC950) || (code == 0xC96C) + || (code == 0xC988) || (code == 0xC9A4) || (code == 0xC9C0) + || (code == 0xC9DC) || (code == 0xC9F8) || (code == 0xCA14) + || (code == 0xCA30) || (code == 0xCA4C) || (code == 0xCA68) + || (code == 0xCA84) || (code == 0xCAA0) || (code == 0xCABC) + || (code == 0xCAD8) || (code == 0xCAF4) || (code == 0xCB10) + || (code == 0xCB2C) || (code == 0xCB48) || (code == 0xCB64) + || (code == 0xCB80) || (code == 0xCB9C) || (code == 0xCBB8) + || (code == 0xCBD4) || (code == 0xCBF0) || (code == 0xCC0C) + || (code == 0xCC28) || (code == 0xCC44) || (code == 0xCC60) + || (code == 0xCC7C) || (code == 0xCC98) || (code == 0xCCB4) + || (code == 0xCCD0) || (code == 0xCCEC) || (code == 0xCD08) + || (code == 0xCD24) || (code == 0xCD40) || (code == 0xCD5C) + || (code == 0xCD78) || (code == 0xCD94) || (code == 0xCDB0) + || (code == 0xCDCC) || (code == 0xCDE8) || (code == 0xCE04) + || (code == 0xCE20) || (code == 0xCE3C) || (code == 0xCE58) + || (code == 0xCE74) || (code == 0xCE90) || (code == 0xCEAC) + || (code == 0xCEC8) || (code == 0xCEE4) || (code == 0xCF00) + || (code == 0xCF1C) || (code == 0xCF38) || (code == 0xCF54) + || (code == 0xCF70) || (code == 0xCF8C) || (code == 0xCFA8) + || (code == 0xCFC4) || (code == 0xCFE0) || (code == 0xCFFC) + || (code == 0xD018) || (code == 0xD034) || (code == 0xD050) + || (code == 0xD06C) || (code == 0xD088) || (code == 0xD0A4) + || (code == 0xD0C0) || (code == 0xD0DC) || (code == 0xD0F8) + || (code == 0xD114) || (code == 0xD130) || (code == 0xD14C) + || (code == 0xD168) || (code == 0xD184) || (code == 0xD1A0) + || (code == 0xD1BC) || (code == 0xD1D8) || (code == 0xD1F4) + || (code == 0xD210) || (code == 0xD22C) || (code == 0xD248) + || (code == 0xD264) || (code == 0xD280) || (code == 0xD29C) + || (code == 0xD2B8) || (code == 0xD2D4) || (code == 0xD2F0) + || (code == 0xD30C) || (code == 0xD328) || (code == 0xD344) + || (code == 0xD360) || (code == 0xD37C) || (code == 0xD398) + || (code == 0xD3B4) || (code == 0xD3D0) || (code == 0xD3EC) + || (code == 0xD408) || (code == 0xD424) || (code == 0xD440) + || (code == 0xD45C) || (code == 0xD478) || (code == 0xD494) + || (code == 0xD4B0) || (code == 0xD4CC) || (code == 0xD4E8) + || (code == 0xD504) || (code == 0xD520) || (code == 0xD53C) + || (code == 0xD558) || (code == 0xD574) || (code == 0xD590) + || (code == 0xD5AC) || (code == 0xD5C8) || (code == 0xD5E4) + || (code == 0xD600) || (code == 0xD61C) || (code == 0xD638) + || (code == 0xD654) || (code == 0xD670) || (code == 0xD68C) + || (code == 0xD6A8) || (code == 0xD6C4) || (code == 0xD6E0) + || (code == 0xD6FC) || (code == 0xD718) || (code == 0xD734) + || (code == 0xD750) || (code == 0xD76C) || (code == 0xD788)) + return HANGUL_LEAD_VOWEL_TYPE; + + if (((0xAC01 <= code) && (code <= 0xAC1B)) + || ((0xAC1D <= code) && (code <= 0xAC37)) + || ((0xAC39 <= code) && (code <= 0xAC53)) + || ((0xAC55 <= code) && (code <= 0xAC6F)) + || ((0xAC71 <= code) && (code <= 0xAC8B)) + || ((0xAC8D <= code) && (code <= 0xACA7)) + || ((0xACA9 <= code) && (code <= 0xACC3)) + || ((0xACC5 <= code) && (code <= 0xACDF)) + || ((0xACE1 <= code) && (code <= 0xACFB)) + || ((0xACFD <= code) && (code <= 0xAD17)) + || ((0xAD19 <= code) && (code <= 0xAD33)) + || ((0xAD35 <= code) && (code <= 0xAD4F)) + || ((0xAD51 <= code) && (code <= 0xAD6B)) + || ((0xAD6D <= code) && (code <= 0xAD87)) + || ((0xAD89 <= code) && (code <= 0xADA3)) + || ((0xADA5 <= code) && (code <= 0xADBF)) + || ((0xADC1 <= code) && (code <= 0xADDB)) + || ((0xADDD <= code) && (code <= 0xADF7)) + || ((0xADF9 <= code) && (code <= 0xAE13)) + || ((0xAE15 <= code) && (code <= 0xAE2F)) + || ((0xAE31 <= code) && (code <= 0xAE4B)) + || ((0xAE4D <= code) && (code <= 0xAE67)) + || ((0xAE69 <= code) && (code <= 0xAE83)) + || ((0xAE85 <= code) && (code <= 0xAE9F)) + || ((0xAEA1 <= code) && (code <= 0xAEBB)) + || ((0xAEBD <= code) && (code <= 0xAED7)) + || ((0xAED9 <= code) && (code <= 0xAEF3)) + || ((0xAEF5 <= code) && (code <= 0xAF0F)) + || ((0xAF11 <= code) && (code <= 0xAF2B)) + || ((0xAF2D <= code) && (code <= 0xAF47)) + || ((0xAF49 <= code) && (code <= 0xAF63)) + || ((0xAF65 <= code) && (code <= 0xAF7F)) + || ((0xAF81 <= code) && (code <= 0xAF9B)) + || ((0xAF9D <= code) && (code <= 0xAFB7)) + || ((0xAFB9 <= code) && (code <= 0xAFD3)) + || ((0xAFD5 <= code) && (code <= 0xAFEF)) + || ((0xAFF1 <= code) && (code <= 0xB00B)) + || ((0xB00D <= code) && (code <= 0xB027)) + || ((0xB029 <= code) && (code <= 0xB043)) + || ((0xB045 <= code) && (code <= 0xB05F)) + || ((0xB061 <= code) && (code <= 0xB07B)) + || ((0xB07D <= code) && (code <= 0xB097)) + || ((0xB099 <= code) && (code <= 0xB0B3)) + || ((0xB0B5 <= code) && (code <= 0xB0CF)) + || ((0xB0D1 <= code) && (code <= 0xB0EB)) + || ((0xB0ED <= code) && (code <= 0xB107)) + || ((0xB109 <= code) && (code <= 0xB123)) + || ((0xB125 <= code) && (code <= 0xB13F)) + || ((0xB141 <= code) && (code <= 0xB15B)) + || ((0xB15D <= code) && (code <= 0xB177)) + || ((0xB179 <= code) && (code <= 0xB193)) + || ((0xB195 <= code) && (code <= 0xB1AF)) + || ((0xB1B1 <= code) && (code <= 0xB1CB)) + || ((0xB1CD <= code) && (code <= 0xB1E7)) + || ((0xB1E9 <= code) && (code <= 0xB203)) + || ((0xB205 <= code) && (code <= 0xB21F)) + || ((0xB221 <= code) && (code <= 0xB23B)) + || ((0xB23D <= code) && (code <= 0xB257)) + || ((0xB259 <= code) && (code <= 0xB273)) + || ((0xB275 <= code) && (code <= 0xB28F)) + || ((0xB291 <= code) && (code <= 0xB2AB)) + || ((0xB2AD <= code) && (code <= 0xB2C7)) + || ((0xB2C9 <= code) && (code <= 0xB2E3)) + || ((0xB2E5 <= code) && (code <= 0xB2FF)) + || ((0xB301 <= code) && (code <= 0xB31B)) + || ((0xB31D <= code) && (code <= 0xB337)) + || ((0xB339 <= code) && (code <= 0xB353)) + || ((0xB355 <= code) && (code <= 0xB36F)) + || ((0xB371 <= code) && (code <= 0xB38B)) + || ((0xB38D <= code) && (code <= 0xB3A7)) + || ((0xB3A9 <= code) && (code <= 0xB3C3)) + || ((0xB3C5 <= code) && (code <= 0xB3DF)) + || ((0xB3E1 <= code) && (code <= 0xB3FB)) + || ((0xB3FD <= code) && (code <= 0xB417)) + || ((0xB419 <= code) && (code <= 0xB433)) + || ((0xB435 <= code) && (code <= 0xB44F)) + || ((0xB451 <= code) && (code <= 0xB46B)) + || ((0xB46D <= code) && (code <= 0xB487)) + || ((0xB489 <= code) && (code <= 0xB4A3)) + || ((0xB4A5 <= code) && (code <= 0xB4BF)) + || ((0xB4C1 <= code) && (code <= 0xB4DB)) + || ((0xB4DD <= code) && (code <= 0xB4F7)) + || ((0xB4F9 <= code) && (code <= 0xB513)) + || ((0xB515 <= code) && (code <= 0xB52F)) + || ((0xB531 <= code) && (code <= 0xB54B)) + || ((0xB54D <= code) && (code <= 0xB567)) + || ((0xB569 <= code) && (code <= 0xB583)) + || ((0xB585 <= code) && (code <= 0xB59F)) + || ((0xB5A1 <= code) && (code <= 0xB5BB)) + || ((0xB5BD <= code) && (code <= 0xB5D7)) + || ((0xB5D9 <= code) && (code <= 0xB5F3)) + || ((0xB5F5 <= code) && (code <= 0xB60F)) + || ((0xB611 <= code) && (code <= 0xB62B)) + || ((0xB62D <= code) && (code <= 0xB647)) + || ((0xB649 <= code) && (code <= 0xB663)) + || ((0xB665 <= code) && (code <= 0xB67F)) + || ((0xB681 <= code) && (code <= 0xB69B)) + || ((0xB69D <= code) && (code <= 0xB6B7)) + || ((0xB6B9 <= code) && (code <= 0xB6D3)) + || ((0xB6D5 <= code) && (code <= 0xB6EF)) + || ((0xB6F1 <= code) && (code <= 0xB70B)) + || ((0xB70D <= code) && (code <= 0xB727)) + || ((0xB729 <= code) && (code <= 0xB743)) + || ((0xB745 <= code) && (code <= 0xB75F)) + || ((0xB761 <= code) && (code <= 0xB77B)) + || ((0xB77D <= code) && (code <= 0xB797)) + || ((0xB799 <= code) && (code <= 0xB7B3)) + || ((0xB7B5 <= code) && (code <= 0xB7CF)) + || ((0xB7D1 <= code) && (code <= 0xB7EB)) + || ((0xB7ED <= code) && (code <= 0xB807)) + || ((0xB809 <= code) && (code <= 0xB823)) + || ((0xB825 <= code) && (code <= 0xB83F)) + || ((0xB841 <= code) && (code <= 0xB85B)) + || ((0xB85D <= code) && (code <= 0xB877)) + || ((0xB879 <= code) && (code <= 0xB893)) + || ((0xB895 <= code) && (code <= 0xB8AF)) + || ((0xB8B1 <= code) && (code <= 0xB8CB)) + || ((0xB8CD <= code) && (code <= 0xB8E7)) + || ((0xB8E9 <= code) && (code <= 0xB903)) + || ((0xB905 <= code) && (code <= 0xB91F)) + || ((0xB921 <= code) && (code <= 0xB93B)) + || ((0xB93D <= code) && (code <= 0xB957)) + || ((0xB959 <= code) && (code <= 0xB973)) + || ((0xB975 <= code) && (code <= 0xB98F)) + || ((0xB991 <= code) && (code <= 0xB9AB)) + || ((0xB9AD <= code) && (code <= 0xB9C7)) + || ((0xB9C9 <= code) && (code <= 0xB9E3)) + || ((0xB9E5 <= code) && (code <= 0xB9FF)) + || ((0xBA01 <= code) && (code <= 0xBA1B)) + || ((0xBA1D <= code) && (code <= 0xBA37)) + || ((0xBA39 <= code) && (code <= 0xBA53)) + || ((0xBA55 <= code) && (code <= 0xBA6F)) + || ((0xBA71 <= code) && (code <= 0xBA8B)) + || ((0xBA8D <= code) && (code <= 0xBAA7)) + || ((0xBAA9 <= code) && (code <= 0xBAC3)) + || ((0xBAC5 <= code) && (code <= 0xBADF)) + || ((0xBAE1 <= code) && (code <= 0xBAFB)) + || ((0xBAFD <= code) && (code <= 0xBB17)) + || ((0xBB19 <= code) && (code <= 0xBB33)) + || ((0xBB35 <= code) && (code <= 0xBB4F)) + || ((0xBB51 <= code) && (code <= 0xBB6B)) + || ((0xBB6D <= code) && (code <= 0xBB87)) + || ((0xBB89 <= code) && (code <= 0xBBA3)) + || ((0xBBA5 <= code) && (code <= 0xBBBF)) + || ((0xBBC1 <= code) && (code <= 0xBBDB)) + || ((0xBBDD <= code) && (code <= 0xBBF7)) + || ((0xBBF9 <= code) && (code <= 0xBC13)) + || ((0xBC15 <= code) && (code <= 0xBC2F)) + || ((0xBC31 <= code) && (code <= 0xBC4B)) + || ((0xBC4D <= code) && (code <= 0xBC67)) + || ((0xBC69 <= code) && (code <= 0xBC83)) + || ((0xBC85 <= code) && (code <= 0xBC9F)) + || ((0xBCA1 <= code) && (code <= 0xBCBB)) + || ((0xBCBD <= code) && (code <= 0xBCD7)) + || ((0xBCD9 <= code) && (code <= 0xBCF3)) + || ((0xBCF5 <= code) && (code <= 0xBD0F)) + || ((0xBD11 <= code) && (code <= 0xBD2B)) + || ((0xBD2D <= code) && (code <= 0xBD47)) + || ((0xBD49 <= code) && (code <= 0xBD63)) + || ((0xBD65 <= code) && (code <= 0xBD7F)) + || ((0xBD81 <= code) && (code <= 0xBD9B)) + || ((0xBD9D <= code) && (code <= 0xBDB7)) + || ((0xBDB9 <= code) && (code <= 0xBDD3)) + || ((0xBDD5 <= code) && (code <= 0xBDEF)) + || ((0xBDF1 <= code) && (code <= 0xBE0B)) + || ((0xBE0D <= code) && (code <= 0xBE27)) + || ((0xBE29 <= code) && (code <= 0xBE43)) + || ((0xBE45 <= code) && (code <= 0xBE5F)) + || ((0xBE61 <= code) && (code <= 0xBE7B)) + || ((0xBE7D <= code) && (code <= 0xBE97)) + || ((0xBE99 <= code) && (code <= 0xBEB3)) + || ((0xBEB5 <= code) && (code <= 0xBECF)) + || ((0xBED1 <= code) && (code <= 0xBEEB)) + || ((0xBEED <= code) && (code <= 0xBF07)) + || ((0xBF09 <= code) && (code <= 0xBF23)) + || ((0xBF25 <= code) && (code <= 0xBF3F)) + || ((0xBF41 <= code) && (code <= 0xBF5B)) + || ((0xBF5D <= code) && (code <= 0xBF77)) + || ((0xBF79 <= code) && (code <= 0xBF93)) + || ((0xBF95 <= code) && (code <= 0xBFAF)) + || ((0xBFB1 <= code) && (code <= 0xBFCB)) + || ((0xBFCD <= code) && (code <= 0xBFE7)) + || ((0xBFE9 <= code) && (code <= 0xC003)) + || ((0xC005 <= code) && (code <= 0xC01F)) + || ((0xC021 <= code) && (code <= 0xC03B)) + || ((0xC03D <= code) && (code <= 0xC057)) + || ((0xC059 <= code) && (code <= 0xC073)) + || ((0xC075 <= code) && (code <= 0xC08F)) + || ((0xC091 <= code) && (code <= 0xC0AB)) + || ((0xC0AD <= code) && (code <= 0xC0C7)) + || ((0xC0C9 <= code) && (code <= 0xC0E3)) + || ((0xC0E5 <= code) && (code <= 0xC0FF)) + || ((0xC101 <= code) && (code <= 0xC11B)) + || ((0xC11D <= code) && (code <= 0xC137)) + || ((0xC139 <= code) && (code <= 0xC153)) + || ((0xC155 <= code) && (code <= 0xC16F)) + || ((0xC171 <= code) && (code <= 0xC18B)) + || ((0xC18D <= code) && (code <= 0xC1A7)) + || ((0xC1A9 <= code) && (code <= 0xC1C3)) + || ((0xC1C5 <= code) && (code <= 0xC1DF)) + || ((0xC1E1 <= code) && (code <= 0xC1FB)) + || ((0xC1FD <= code) && (code <= 0xC217)) + || ((0xC219 <= code) && (code <= 0xC233)) + || ((0xC235 <= code) && (code <= 0xC24F)) + || ((0xC251 <= code) && (code <= 0xC26B)) + || ((0xC26D <= code) && (code <= 0xC287)) + || ((0xC289 <= code) && (code <= 0xC2A3)) + || ((0xC2A5 <= code) && (code <= 0xC2BF)) + || ((0xC2C1 <= code) && (code <= 0xC2DB)) + || ((0xC2DD <= code) && (code <= 0xC2F7)) + || ((0xC2F9 <= code) && (code <= 0xC313)) + || ((0xC315 <= code) && (code <= 0xC32F)) + || ((0xC331 <= code) && (code <= 0xC34B)) + || ((0xC34D <= code) && (code <= 0xC367)) + || ((0xC369 <= code) && (code <= 0xC383)) + || ((0xC385 <= code) && (code <= 0xC39F)) + || ((0xC3A1 <= code) && (code <= 0xC3BB)) + || ((0xC3BD <= code) && (code <= 0xC3D7)) + || ((0xC3D9 <= code) && (code <= 0xC3F3)) + || ((0xC3F5 <= code) && (code <= 0xC40F)) + || ((0xC411 <= code) && (code <= 0xC42B)) + || ((0xC42D <= code) && (code <= 0xC447)) + || ((0xC449 <= code) && (code <= 0xC463)) + || ((0xC465 <= code) && (code <= 0xC47F)) + || ((0xC481 <= code) && (code <= 0xC49B)) + || ((0xC49D <= code) && (code <= 0xC4B7)) + || ((0xC4B9 <= code) && (code <= 0xC4D3)) + || ((0xC4D5 <= code) && (code <= 0xC4EF)) + || ((0xC4F1 <= code) && (code <= 0xC50B)) + || ((0xC50D <= code) && (code <= 0xC527)) + || ((0xC529 <= code) && (code <= 0xC543)) + || ((0xC545 <= code) && (code <= 0xC55F)) + || ((0xC561 <= code) && (code <= 0xC57B)) + || ((0xC57D <= code) && (code <= 0xC597)) + || ((0xC599 <= code) && (code <= 0xC5B3)) + || ((0xC5B5 <= code) && (code <= 0xC5CF)) + || ((0xC5D1 <= code) && (code <= 0xC5EB)) + || ((0xC5ED <= code) && (code <= 0xC607)) + || ((0xC609 <= code) && (code <= 0xC623)) + || ((0xC625 <= code) && (code <= 0xC63F)) + || ((0xC641 <= code) && (code <= 0xC65B)) + || ((0xC65D <= code) && (code <= 0xC677)) + || ((0xC679 <= code) && (code <= 0xC693)) + || ((0xC695 <= code) && (code <= 0xC6AF)) + || ((0xC6B1 <= code) && (code <= 0xC6CB)) + || ((0xC6CD <= code) && (code <= 0xC6E7)) + || ((0xC6E9 <= code) && (code <= 0xC703)) + || ((0xC705 <= code) && (code <= 0xC71F)) + || ((0xC721 <= code) && (code <= 0xC73B)) + || ((0xC73D <= code) && (code <= 0xC757)) + || ((0xC759 <= code) && (code <= 0xC773)) + || ((0xC775 <= code) && (code <= 0xC78F)) + || ((0xC791 <= code) && (code <= 0xC7AB)) + || ((0xC7AD <= code) && (code <= 0xC7C7)) + || ((0xC7C9 <= code) && (code <= 0xC7E3)) + || ((0xC7E5 <= code) && (code <= 0xC7FF)) + || ((0xC801 <= code) && (code <= 0xC81B)) + || ((0xC81D <= code) && (code <= 0xC837)) + || ((0xC839 <= code) && (code <= 0xC853)) + || ((0xC855 <= code) && (code <= 0xC86F)) + || ((0xC871 <= code) && (code <= 0xC88B)) + || ((0xC88D <= code) && (code <= 0xC8A7)) + || ((0xC8A9 <= code) && (code <= 0xC8C3)) + || ((0xC8C5 <= code) && (code <= 0xC8DF)) + || ((0xC8E1 <= code) && (code <= 0xC8FB)) + || ((0xC8FD <= code) && (code <= 0xC917)) + || ((0xC919 <= code) && (code <= 0xC933)) + || ((0xC935 <= code) && (code <= 0xC94F)) + || ((0xC951 <= code) && (code <= 0xC96B)) + || ((0xC96D <= code) && (code <= 0xC987)) + || ((0xC989 <= code) && (code <= 0xC9A3)) + || ((0xC9A5 <= code) && (code <= 0xC9BF)) + || ((0xC9C1 <= code) && (code <= 0xC9DB)) + || ((0xC9DD <= code) && (code <= 0xC9F7)) + || ((0xC9F9 <= code) && (code <= 0xCA13)) + || ((0xCA15 <= code) && (code <= 0xCA2F)) + || ((0xCA31 <= code) && (code <= 0xCA4B)) + || ((0xCA4D <= code) && (code <= 0xCA67)) + || ((0xCA69 <= code) && (code <= 0xCA83)) + || ((0xCA85 <= code) && (code <= 0xCA9F)) + || ((0xCAA1 <= code) && (code <= 0xCABB)) + || ((0xCABD <= code) && (code <= 0xCAD7)) + || ((0xCAD9 <= code) && (code <= 0xCAF3)) + || ((0xCAF5 <= code) && (code <= 0xCB0F)) + || ((0xCB11 <= code) && (code <= 0xCB2B)) + || ((0xCB2D <= code) && (code <= 0xCB47)) + || ((0xCB49 <= code) && (code <= 0xCB63)) + || ((0xCB65 <= code) && (code <= 0xCB7F)) + || ((0xCB81 <= code) && (code <= 0xCB9B)) + || ((0xCB9D <= code) && (code <= 0xCBB7)) + || ((0xCBB9 <= code) && (code <= 0xCBD3)) + || ((0xCBD5 <= code) && (code <= 0xCBEF)) + || ((0xCBF1 <= code) && (code <= 0xCC0B)) + || ((0xCC0D <= code) && (code <= 0xCC27)) + || ((0xCC29 <= code) && (code <= 0xCC43)) + || ((0xCC45 <= code) && (code <= 0xCC5F)) + || ((0xCC61 <= code) && (code <= 0xCC7B)) + || ((0xCC7D <= code) && (code <= 0xCC97)) + || ((0xCC99 <= code) && (code <= 0xCCB3)) + || ((0xCCB5 <= code) && (code <= 0xCCCF)) + || ((0xCCD1 <= code) && (code <= 0xCCEB)) + || ((0xCCED <= code) && (code <= 0xCD07)) + || ((0xCD09 <= code) && (code <= 0xCD23)) + || ((0xCD25 <= code) && (code <= 0xCD3F)) + || ((0xCD41 <= code) && (code <= 0xCD5B)) + || ((0xCD5D <= code) && (code <= 0xCD77)) + || ((0xCD79 <= code) && (code <= 0xCD93)) + || ((0xCD95 <= code) && (code <= 0xCDAF)) + || ((0xCDB1 <= code) && (code <= 0xCDCB)) + || ((0xCDCD <= code) && (code <= 0xCDE7)) + || ((0xCDE9 <= code) && (code <= 0xCE03)) + || ((0xCE05 <= code) && (code <= 0xCE1F)) + || ((0xCE21 <= code) && (code <= 0xCE3B)) + || ((0xCE3D <= code) && (code <= 0xCE57)) + || ((0xCE59 <= code) && (code <= 0xCE73)) + || ((0xCE75 <= code) && (code <= 0xCE8F)) + || ((0xCE91 <= code) && (code <= 0xCEAB)) + || ((0xCEAD <= code) && (code <= 0xCEC7)) + || ((0xCEC9 <= code) && (code <= 0xCEE3)) + || ((0xCEE5 <= code) && (code <= 0xCEFF)) + || ((0xCF01 <= code) && (code <= 0xCF1B)) + || ((0xCF1D <= code) && (code <= 0xCF37)) + || ((0xCF39 <= code) && (code <= 0xCF53)) + || ((0xCF55 <= code) && (code <= 0xCF6F)) + || ((0xCF71 <= code) && (code <= 0xCF8B)) + || ((0xCF8D <= code) && (code <= 0xCFA7)) + || ((0xCFA9 <= code) && (code <= 0xCFC3)) + || ((0xCFC5 <= code) && (code <= 0xCFDF)) + || ((0xCFE1 <= code) && (code <= 0xCFFB)) + || ((0xCFFD <= code) && (code <= 0xD017)) + || ((0xD019 <= code) && (code <= 0xD033)) + || ((0xD035 <= code) && (code <= 0xD04F)) + || ((0xD051 <= code) && (code <= 0xD06B)) + || ((0xD06D <= code) && (code <= 0xD087)) + || ((0xD089 <= code) && (code <= 0xD0A3)) + || ((0xD0A5 <= code) && (code <= 0xD0BF)) + || ((0xD0C1 <= code) && (code <= 0xD0DB)) + || ((0xD0DD <= code) && (code <= 0xD0F7)) + || ((0xD0F9 <= code) && (code <= 0xD113)) + || ((0xD115 <= code) && (code <= 0xD12F)) + || ((0xD131 <= code) && (code <= 0xD14B)) + || ((0xD14D <= code) && (code <= 0xD167)) + || ((0xD169 <= code) && (code <= 0xD183)) + || ((0xD185 <= code) && (code <= 0xD19F)) + || ((0xD1A1 <= code) && (code <= 0xD1BB)) + || ((0xD1BD <= code) && (code <= 0xD1D7)) + || ((0xD1D9 <= code) && (code <= 0xD1F3)) + || ((0xD1F5 <= code) && (code <= 0xD20F)) + || ((0xD211 <= code) && (code <= 0xD22B)) + || ((0xD22D <= code) && (code <= 0xD247)) + || ((0xD249 <= code) && (code <= 0xD263)) + || ((0xD265 <= code) && (code <= 0xD27F)) + || ((0xD281 <= code) && (code <= 0xD29B)) + || ((0xD29D <= code) && (code <= 0xD2B7)) + || ((0xD2B9 <= code) && (code <= 0xD2D3)) + || ((0xD2D5 <= code) && (code <= 0xD2EF)) + || ((0xD2F1 <= code) && (code <= 0xD30B)) + || ((0xD30D <= code) && (code <= 0xD327)) + || ((0xD329 <= code) && (code <= 0xD343)) + || ((0xD345 <= code) && (code <= 0xD35F)) + || ((0xD361 <= code) && (code <= 0xD37B)) + || ((0xD37D <= code) && (code <= 0xD397)) + || ((0xD399 <= code) && (code <= 0xD3B3)) + || ((0xD3B5 <= code) && (code <= 0xD3CF)) + || ((0xD3D1 <= code) && (code <= 0xD3EB)) + || ((0xD3ED <= code) && (code <= 0xD407)) + || ((0xD409 <= code) && (code <= 0xD423)) + || ((0xD425 <= code) && (code <= 0xD43F)) + || ((0xD441 <= code) && (code <= 0xD45B)) + || ((0xD45D <= code) && (code <= 0xD477)) + || ((0xD479 <= code) && (code <= 0xD493)) + || ((0xD495 <= code) && (code <= 0xD4AF)) + || ((0xD4B1 <= code) && (code <= 0xD4CB)) + || ((0xD4CD <= code) && (code <= 0xD4E7)) + || ((0xD4E9 <= code) && (code <= 0xD503)) + || ((0xD505 <= code) && (code <= 0xD51F)) + || ((0xD521 <= code) && (code <= 0xD53B)) + || ((0xD53D <= code) && (code <= 0xD557)) + || ((0xD559 <= code) && (code <= 0xD573)) + || ((0xD575 <= code) && (code <= 0xD58F)) + || ((0xD591 <= code) && (code <= 0xD5AB)) + || ((0xD5AD <= code) && (code <= 0xD5C7)) + || ((0xD5C9 <= code) && (code <= 0xD5E3)) + || ((0xD5E5 <= code) && (code <= 0xD5FF)) + || ((0xD601 <= code) && (code <= 0xD61B)) + || ((0xD61D <= code) && (code <= 0xD637)) + || ((0xD639 <= code) && (code <= 0xD653)) + || ((0xD655 <= code) && (code <= 0xD66F)) + || ((0xD671 <= code) && (code <= 0xD68B)) + || ((0xD68D <= code) && (code <= 0xD6A7)) + || ((0xD6A9 <= code) && (code <= 0xD6C3)) + || ((0xD6C5 <= code) && (code <= 0xD6DF)) + || ((0xD6E1 <= code) && (code <= 0xD6FB)) + || ((0xD6FD <= code) && (code <= 0xD717)) + || ((0xD719 <= code) && (code <= 0xD733)) + || ((0xD735 <= code) && (code <= 0xD74F)) + || ((0xD751 <= code) && (code <= 0xD76B)) + || ((0xD76D <= code) && (code <= 0xD787)) + || ((0xD789 <= code) && (code <= 0xD7A3))) + return HANGUL_LEAD_VOWEL_TRAIL_TYPE; + + return (int) 0; +} + + +#endif /* _NORM_H_ */ Added: soc2014/ghostmansd/head/lib/libc/string/strnorm.3 ============================================================================== --- /dev/null 00:00:00 1970 (empty, because file is newly added) +++ soc2014/ghostmansd/head/lib/libc/string/strnorm.3 Mon Jun 16 22:53:46 2014 (r269660) @@ -0,0 +1,102 @@ +.\" Copyright (c) 2014 Dmitry Selyutin <ghostmansd@FreeBSD.org> +.\" Lomonosov Moscow State University. All rights reserved. +.\" +.\" Redistribution and use in source and binary forms, with or without +.\" modification, are permitted provided that the following conditions +.\" are met: +.\" 1. Redistributions of source code must retain the above copyright +.\" notice, this list of conditions and the following disclaimer. +.\" 2. Redistributions in binary form must reproduce the above copyright +.\" notice, this list of conditions and the following disclaimer in the +.\" documentation and/or other materials provided with the distribution. +.\" 3. Neither the name of the University nor the names of its contributors +.\" may be used to endorse or promote products derived from this software +.\" without specific prior written permission. +.\" +.\" THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND +.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +.\" ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE +.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS +.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) +.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY +.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF +.\" SUCH DAMAGE. +.\" +.Dd June 14, 2014 +.Dt __STRNORM 3 +.Os +.Sh NAME +.Nm __strnorm +.Nd normalize string according to normalization form +.Sh LIBRARY +.Lb libc +.Sh SYNOPSIS +.In wchar.h +.Ft size_t +.Fn __strnorm "char *buffer" "size_t size" "char const *str" "int form" +.Sh DESCRIPTION +The +.Fn __strnorm +function normalizes wide string according to normalization form. +.br +The caller must ensure that buffer is large enough to store text along with +null-terminating character. +.br +Normalization form can be one of \fB__NORM_NFD\fP, \fB__NORM_NFKD\fP, +\fB__NORM_NFC\fP, \fB__NORM_NFKC\fP macros. +.br +.br +If \fB_UNICODE_ADDENDA\fP macro is defined, then it is available +under +.Fn strnorm +name. +.br +If \fB_UNICODE_ADDENDA\fP macro is defined, normalization forms also +appear without an underscore prefix. +.Sh NORMALIZATION FORMS +.Bl -tag -width Er +.It \fB__NORM_NFD\fP +Normalization Form D +(canonical decomposition) +.It \fB__NORM_NFKD\fP +Normalization Form KD +(compatibility decomposition) +.It \fB__NORM_NFC\fP +Normalization Form C +(canonical decomposition, followed by canonical composition) +.It \fB__NORM_NFKC\fP +Normalization Form KC +(compatibility decomposition, followed by canonical composition) +.Sh RETURN VALUES +The +.Fn __strnorm +function returns either size of buffer required to store normalized form +of the string or 0 on error. +.br +If buffer is too small to store normalized form, +function returns the required size (including a space for null-terminating +character) and sets errno to \fBERANGE\fP. On success, function return number +of characters written. +.Sh ERRORS +The +.Fn __strnorm +function will fail if: +.Bl -tag -width Er +.It Er \fBEINVAL\fP +Function received an illegal argument. +.It Er \fBEILSEQ\fP +Error during string decode or encode process. +.It Er \fBERANGE\fP +Buffer is not large enough to store normalized form of the string. +.It Er \fBENOMEM\fP +Cannot allocate enough memory for temporary buffers. +.El +.Sh SEE ALSO +.Xr strnorm 3 +.Sh STANDARDS +The +.Fn __strnorm +is a non-standart extension, used by Unicode Collation Algorithm. Added: soc2014/ghostmansd/head/lib/libc/string/strnorm.c ============================================================================== --- /dev/null 00:00:00 1970 (empty, because file is newly added) +++ soc2014/ghostmansd/head/lib/libc/string/strnorm.c Mon Jun 16 22:53:46 2014 (r269660) @@ -0,0 +1,150 @@ +/* + * Copyright (c) 2014 Dmitry Selyutin <ghostmansd@FreeBSD.org> + * at Lomonosov Moscow State University - www.msu.ru + * All rights reserved. + * + * Copyright (c) 2014 The FreeBSD Foundation + * All rights reserved. + * Portions of this software were developed by David Chisnall + * under sponsorship from the FreeBSD Foundation. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include <errno.h> +#include <stdlib.h> +#include <wchar.h> +#include <xlocale.h> +#include "xlocale_private.h" +#include "normalization.h" + + +static size_t __norm_encode(char *buffer, size_t size, wchar_t const *wstr, + locale_t locale) +{ + static const mbstate_t initial; + size_t length = 0; + char *mbs = NULL; + wchar_t const *wcs = wstr; + mbstate_t state = initial; + + FIX_LOCALE(locale); + length = wcsrtombs_l(NULL, &wcs, 0, &state, locale); + if (length == ((size_t)-1)) + return 0; + else if (length > size) + { + if (size == 0) + return length; + errno = ERANGE; + return 0; + } + return wcsrtombs_l(mbs, &wstr, length, &state, locale); +} + + +static wchar_t *__norm_decode(char const *str, locale_t locale) +{ + static const mbstate_t initial; + size_t length = 0; + wchar_t *wcs = NULL; + const char *mbs = str; + mbstate_t state = initial; + + FIX_LOCALE(locale); + length = mbsrtowcs_l(NULL, &mbs, 0, &state, locale); + if (length == ((size_t)-1)) + return NULL; + if ((wcs = malloc((length + 1) * sizeof(wchar_t))) == NULL) + return NULL; + mbsrtowcs_l(wcs, &str, length, &state, locale); + wcs[length] = 0; + state = initial; + return wcs; +} + + +size_t __strnorm_l(char *buffer, size_t size, char const *str, int form, + locale_t locale) +{ + int error = 0; + size_t wsize = 0; + size_t reqsize = 0; + size_t wreqsize = 0; + wchar_t *wstr = NULL; + wchar_t *wbuffer = NULL; + +#define __strnorm_failure() \ +do { \ + error = errno; \ + free(wbuffer); \ + free(wstr); \ + errno = error; \ + return 0; \ +} while (0) + + /* Check initial arguments. */ + if ((str == NULL) || ((buffer != NULL) && (size == 0))) + { + errno = EINVAL; + return 0; + } + switch (form) + { + case __NORM_NFD: + case __NORM_NFC: + case __NORM_NFKD: + case __NORM_NFKC: + break; + default: + errno = EINVAL; + return 0; + } + + /* Acquire __wcsnorm() arguments. */ + FIX_LOCALE(locale); + if ((wstr = __norm_decode(str, locale)) == NULL) + __strnorm_failure(); + if ((wsize = __wcsnorm(NULL, 0, wstr, form) == 0)) + __strnorm_failure(); + if ((wbuffer = malloc(wsize * sizeof(wchar_t))) == NULL) + __strnorm_failure(); + + /* Normalize the wide string. */ + wreqsize = __wcsnorm(wbuffer, wsize, wstr, form); + if ((wreqsize == 0) || (wreqsize > wsize)) + __strnorm_failure(); + + /* Check if byte buffer is large enough. */ + reqsize = __norm_encode(buffer, size, wbuffer, locale); + if (reqsize == ((size_t)-1)) + __strnorm_failure(); + error = errno; + free(wbuffer); + free(wstr); + errno = error; + return reqsize; +} + + +size_t __strnorm(char *buffer, size_t size, char const *str, int form) +{ return __strnorm_l(buffer, size, str, form, __get_locale()); } Added: soc2014/ghostmansd/head/lib/libc/string/wcsnorm.3 ============================================================================== --- /dev/null 00:00:00 1970 (empty, because file is newly added) +++ soc2014/ghostmansd/head/lib/libc/string/wcsnorm.3 Mon Jun 16 22:53:46 2014 (r269660) @@ -0,0 +1,100 @@ +.\" Copyright (c) 2014 Dmitry Selyutin <ghostmansd@FreeBSD.org> +.\" Lomonosov Moscow State University. All rights reserved. +.\" +.\" Redistribution and use in source and binary forms, with or without +.\" modification, are permitted provided that the following conditions +.\" are met: +.\" 1. Redistributions of source code must retain the above copyright +.\" notice, this list of conditions and the following disclaimer. +.\" 2. Redistributions in binary form must reproduce the above copyright +.\" notice, this list of conditions and the following disclaimer in the +.\" documentation and/or other materials provided with the distribution. +.\" 3. Neither the name of the University nor the names of its contributors +.\" may be used to endorse or promote products derived from this software +.\" without specific prior written permission. +.\" +.\" THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND +.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +.\" ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE +.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS +.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) +.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY +.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF +.\" SUCH DAMAGE. +.\" +.Dd June 14, 2014 +.Dt __WCSNORM 3 +.Os +.Sh NAME +.Nm __wcsnorm +.Nd normalize wide string according to normalization form +.Sh LIBRARY +.Lb libc +.Sh SYNOPSIS +.In wchar.h +.Ft size_t +.Fn __wcsnorm "wchar_t *buffer" "size_t size" "wchar_t const *str" "int form" +.Sh DESCRIPTION +The +.Fn __wcsnorm +function normalizes wide string according to normalization form. +.br +The caller must ensure that buffer is large enough to store text along with +null-terminating character. +.br +Normalization form can be one of \fB__NORM_NFD\fP, \fB__NORM_NFKD\fP, +\fB__NORM_NFC\fP, \fB__NORM_NFKC\fP macros. +.br *** DIFF OUTPUT TRUNCATED AT 1000 LINES ***
Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?201406162253.s5GMrl69059804>