Date: Fri, 21 Sep 2007 22:02:23 +0400 From: Andrey Chernov <ache@nagual.pp.ru> To: Petr Hroudn?? <petr.hroudny@gmail.com> Cc: Taku YAMAMOTO <taku@tackymt.homeip.net>, i18n@freebsd.org, current@freebsd.org, perky@freebsd.org Subject: Re: Ctype patch for review Message-ID: <20070921180223.GA38675@nagual.pp.ru> In-Reply-To: <ab8fc7f50709210004r2e823d2ex3a735d51e35e9360@mail.gmail.com> References: <20070917092130.GA24424@nagual.pp.ru> <20070918020100.d43beb0b.taku@tackymt.homeip.net> <20070917171633.GA31179@nagual.pp.ru> <20070919111207.f37653fc.taku@tackymt.homeip.net> <20070919022555.GA70617@nagual.pp.ru> <20070919023625.GA70891@nagual.pp.ru> <20070919051830.GA72429@nagual.pp.ru> <20070919121024.GA81606@nagual.pp.ru> <20070921024107.GA21223@nagual.pp.ru> <ab8fc7f50709210004r2e823d2ex3a735d51e35e9360@mail.gmail.com>
next in thread | previous in thread | raw e-mail | index | archive | help
--LQksG6bCIzRHxTLp Content-Type: text/plain; charset=us-ascii Content-Disposition: inline On Fri, Sep 21, 2007 at 09:04:44AM +0200, Petr Hroudn?? wrote: > I believe your patch needs some adjustments for CJK charsets. You are setting > __mb_sch_limit to 256 for all multibyte locales except UTF-8, but I believe it > should be 128 also for Big5, GB18030, GBK. For GB2312 too. Thanx for pointing out, here is adjusted patch. And __mb_sch_limit renamed to _mb_sb_limit to better match functions prefix. -- http://ache.pp.ru/ --LQksG6bCIzRHxTLp Content-Type: text/x-diff; charset=us-ascii Content-Disposition: attachment; filename="ctype.patch" --- Symbol.map.old 2007-09-19 22:37:21.000000000 +0400 +++ Symbol.map 2007-09-21 21:52:23.000000000 +0400 @@ -60,12 +60,17 @@ nextwctype; nl_langinfo; __maskrune; + __sbmaskrune; __istype; + __sbistype; __isctype; __toupper; + __sbtoupper; __tolower; + __sbtolower; __wcwidth; __mb_cur_max; + __mb_sb_limit; rpmatch; ___runetype; setlocale; --- _ctype.h.old 2007-09-16 21:13:59.000000000 +0400 +++ _ctype.h 2007-09-21 21:44:31.000000000 +0400 @@ -87,6 +87,8 @@ #define __inline #endif +extern int __mb_sb_limit; + /* * Use inline functions if we are allowed to and the compiler supports them. */ @@ -103,15 +105,28 @@ } static __inline int +__sbmaskrune(__ct_rune_t _c, unsigned long _f) +{ + return (_c < 0 || _c >= __mb_sb_limit) ? 0 : + _CurrentRuneLocale->__runetype[_c] & _f; +} + +static __inline int __istype(__ct_rune_t _c, unsigned long _f) { return (!!__maskrune(_c, _f)); } static __inline int +__sbistype(__ct_rune_t _c, unsigned long _f) +{ + return (!!__sbmasksrune(_c, _f)); +} + +static __inline int __isctype(__ct_rune_t _c, unsigned long _f) { - return (_c < 0 || _c >= _CACHED_RUNES) ? 0 : + return (_c < 0 || _c >= __mb_sb_limit) ? 0 : !!(_DefaultRuneLocale.__runetype[_c] & _f); } @@ -123,12 +138,26 @@ } static __inline __ct_rune_t +__sbtoupper(__ct_rune_t _c) +{ + return (_c < 0 || _c >= __mb_sb_limit) ? _c : + _CurrentRuneLocale->__mapupper[_c]; +} + +static __inline __ct_rune_t __tolower(__ct_rune_t _c) { return (_c < 0 || _c >= _CACHED_RUNES) ? ___tolower(_c) : _CurrentRuneLocale->__maplower[_c]; } +static __inline __ct_rune_t +__sbtolower(__ct_rune_t _c) +{ + return (_c < 0 || _c >= __mb_sb_limit) ? _c : + _CurrentRuneLocale->__maplower[_c]; +} + static __inline int __wcwidth(__ct_rune_t _c) { @@ -146,10 +175,14 @@ __BEGIN_DECLS int __maskrune(__ct_rune_t, unsigned long); +int __sbmaskrune(__ct_rune_t, unsigned long); int __istype(__ct_rune_t, unsigned long); +int __sbistype(__ct_rune_t, unsigned long); int __isctype(__ct_rune_t, unsigned long); __ct_rune_t __toupper(__ct_rune_t); +__ct_rune_t __sbtoupper(__ct_rune_t); __ct_rune_t __tolower(__ct_rune_t); +__ct_rune_t __sbtolower(__ct_rune_t); int __wcwidth(__ct_rune_t); __END_DECLS #endif /* using inlines */ --- big5.c.old 2007-09-19 08:48:55.000000000 +0400 +++ big5.c 2007-09-21 21:44:31.000000000 +0400 @@ -49,6 +49,8 @@ #include <wchar.h> #include "mblocal.h" +extern int __mb_sb_limit; + static size_t _BIG5_mbrtowc(wchar_t * __restrict, const char * __restrict, size_t, mbstate_t * __restrict); static int _BIG5_mbsinit(const mbstate_t *); @@ -68,6 +70,7 @@ __mbsinit = _BIG5_mbsinit; _CurrentRuneLocale = rl; __mb_cur_max = 2; + __mb_sb_limit = 128; return (0); } --- ctype.h.old 2007-09-16 22:03:55.000000000 +0400 +++ ctype.h 2007-09-21 06:26:26.000000000 +0400 @@ -86,19 +86,19 @@ #endif __END_DECLS -#define isalnum(c) __istype((c), _CTYPE_A|_CTYPE_D) -#define isalpha(c) __istype((c), _CTYPE_A) -#define iscntrl(c) __istype((c), _CTYPE_C) +#define isalnum(c) __sbistype((c), _CTYPE_A|_CTYPE_D) +#define isalpha(c) __sbistype((c), _CTYPE_A) +#define iscntrl(c) __sbistype((c), _CTYPE_C) #define isdigit(c) __isctype((c), _CTYPE_D) /* ANSI -- locale independent */ -#define isgraph(c) __istype((c), _CTYPE_G) -#define islower(c) __istype((c), _CTYPE_L) -#define isprint(c) __istype((c), _CTYPE_R) -#define ispunct(c) __istype((c), _CTYPE_P) -#define isspace(c) __istype((c), _CTYPE_S) -#define isupper(c) __istype((c), _CTYPE_U) +#define isgraph(c) __sbistype((c), _CTYPE_G) +#define islower(c) __sbistype((c), _CTYPE_L) +#define isprint(c) __sbistype((c), _CTYPE_R) +#define ispunct(c) __sbistype((c), _CTYPE_P) +#define isspace(c) __sbistype((c), _CTYPE_S) +#define isupper(c) __sbistype((c), _CTYPE_U) #define isxdigit(c) __isctype((c), _CTYPE_X) /* ANSI -- locale independent */ -#define tolower(c) __tolower(c) -#define toupper(c) __toupper(c) +#define tolower(c) __sbtolower(c) +#define toupper(c) __sbtoupper(c) #if __XSI_VISIBLE /* @@ -112,24 +112,24 @@ * * XXX isascii() and toascii() should similarly be undocumented. */ -#define _tolower(c) __tolower(c) -#define _toupper(c) __toupper(c) +#define _tolower(c) __sbtolower(c) +#define _toupper(c) __sbtoupper(c) #define isascii(c) (((c) & ~0x7F) == 0) #define toascii(c) ((c) & 0x7F) #endif #if __ISO_C_VISIBLE >= 1999 -#define isblank(c) __istype((c), _CTYPE_B) +#define isblank(c) __sbistype((c), _CTYPE_B) #endif #if __BSD_VISIBLE -#define digittoint(c) __maskrune((c), 0xFF) -#define ishexnumber(c) __istype((c), _CTYPE_X) -#define isideogram(c) __istype((c), _CTYPE_I) -#define isnumber(c) __istype((c), _CTYPE_D) -#define isphonogram(c) __istype((c), _CTYPE_Q) -#define isrune(c) __istype((c), 0xFFFFFF00L) -#define isspecial(c) __istype((c), _CTYPE_T) +#define digittoint(c) __sbmaskrune((c), 0xFF) +#define ishexnumber(c) __sbistype((c), _CTYPE_X) +#define isideogram(c) __sbistype((c), _CTYPE_I) +#define isnumber(c) __sbistype((c), _CTYPE_D) +#define isphonogram(c) __sbistype((c), _CTYPE_Q) +#define isrune(c) __sbistype((c), 0xFFFFFF00L) +#define isspecial(c) __sbistype((c), _CTYPE_T) #endif #endif /* !_CTYPE_H_ */ --- euc.c.old 2007-09-19 08:50:57.000000000 +0400 +++ euc.c 2007-09-21 21:44:31.000000000 +0400 @@ -49,6 +49,8 @@ #include <wchar.h> #include "mblocal.h" +extern int __mb_sb_limit; + static size_t _EUC_mbrtowc(wchar_t * __restrict, const char * __restrict, size_t, mbstate_t * __restrict); static int _EUC_mbsinit(const mbstate_t *); @@ -116,6 +118,7 @@ __mbrtowc = _EUC_mbrtowc; __wcrtomb = _EUC_wcrtomb; __mbsinit = _EUC_mbsinit; + __mb_sb_limit = 256; return (0); } --- gb18030.c.old 2007-09-19 08:59:01.000000000 +0400 +++ gb18030.c 2007-09-21 21:44:31.000000000 +0400 @@ -39,6 +39,8 @@ #include <wchar.h> #include "mblocal.h" +extern int __mb_sb_limit; + static size_t _GB18030_mbrtowc(wchar_t * __restrict, const char * __restrict, size_t, mbstate_t * __restrict); static int _GB18030_mbsinit(const mbstate_t *); @@ -59,6 +61,7 @@ __mbsinit = _GB18030_mbsinit; _CurrentRuneLocale = rl; __mb_cur_max = 4; + __mb_sb_limit = 128; return (0); } --- gb2312.c.old 2007-09-19 09:00:35.000000000 +0400 +++ gb2312.c 2007-09-21 21:44:31.000000000 +0400 @@ -35,6 +35,8 @@ #include <wchar.h> #include "mblocal.h" +extern int __mb_sb_limit; + static size_t _GB2312_mbrtowc(wchar_t * __restrict, const char * __restrict, size_t, mbstate_t * __restrict); static int _GB2312_mbsinit(const mbstate_t *); @@ -55,6 +57,7 @@ __wcrtomb = _GB2312_wcrtomb; __mbsinit = _GB2312_mbsinit; __mb_cur_max = 2; + __mb_sb_limit = 128; return (0); } --- gbk.c.old 2007-09-19 09:01:33.000000000 +0400 +++ gbk.c 2007-09-21 21:44:31.000000000 +0400 @@ -42,6 +42,8 @@ #include <wchar.h> #include "mblocal.h" +extern int __mb_sb_limit; + static size_t _GBK_mbrtowc(wchar_t * __restrict, const char * __restrict, size_t, mbstate_t * __restrict); static int _GBK_mbsinit(const mbstate_t *); @@ -61,6 +63,7 @@ __mbsinit = _GBK_mbsinit; _CurrentRuneLocale = rl; __mb_cur_max = 2; + __mb_sb_limit = 128; return (0); } --- isctype.c.old 2007-09-16 22:31:26.000000000 +0400 +++ isctype.c 2007-09-21 06:28:30.000000000 +0400 @@ -48,7 +48,7 @@ digittoint(c) int c; { - return (__maskrune(c, 0xFF)); + return (__sbmaskrune(c, 0xFF)); } #undef isalnum @@ -56,7 +56,7 @@ isalnum(c) int c; { - return (__istype(c, _CTYPE_A|_CTYPE_D)); + return (__sbistype(c, _CTYPE_A|_CTYPE_D)); } #undef isalpha @@ -64,7 +64,7 @@ isalpha(c) int c; { - return (__istype(c, _CTYPE_A)); + return (__sbistype(c, _CTYPE_A)); } #undef isascii @@ -80,7 +80,7 @@ isblank(c) int c; { - return (__istype(c, _CTYPE_B)); + return (__sbistype(c, _CTYPE_B)); } #undef iscntrl @@ -88,7 +88,7 @@ iscntrl(c) int c; { - return (__istype(c, _CTYPE_C)); + return (__sbistype(c, _CTYPE_C)); } #undef isdigit @@ -104,7 +104,7 @@ isgraph(c) int c; { - return (__istype(c, _CTYPE_G)); + return (__sbistype(c, _CTYPE_G)); } #undef ishexnumber @@ -112,7 +112,7 @@ ishexnumber(c) int c; { - return (__istype(c, _CTYPE_X)); + return (__sbistype(c, _CTYPE_X)); } #undef isideogram @@ -120,7 +120,7 @@ isideogram(c) int c; { - return (__istype(c, _CTYPE_I)); + return (__sbistype(c, _CTYPE_I)); } #undef islower @@ -128,7 +128,7 @@ islower(c) int c; { - return (__istype(c, _CTYPE_L)); + return (__sbistype(c, _CTYPE_L)); } #undef isnumber @@ -136,7 +136,7 @@ isnumber(c) int c; { - return (__istype(c, _CTYPE_D)); + return (__sbistype(c, _CTYPE_D)); } #undef isphonogram @@ -144,7 +144,7 @@ isphonogram(c) int c; { - return (__istype(c, _CTYPE_Q)); + return (__sbistype(c, _CTYPE_Q)); } #undef isprint @@ -152,7 +152,7 @@ isprint(c) int c; { - return (__istype(c, _CTYPE_R)); + return (__sbistype(c, _CTYPE_R)); } #undef ispunct @@ -160,7 +160,7 @@ ispunct(c) int c; { - return (__istype(c, _CTYPE_P)); + return (__sbistype(c, _CTYPE_P)); } #undef isrune @@ -168,7 +168,7 @@ isrune(c) int c; { - return (__istype(c, 0xFFFFFF00L)); + return (__sbistype(c, 0xFFFFFF00L)); } #undef isspace @@ -176,7 +176,7 @@ isspace(c) int c; { - return (__istype(c, _CTYPE_S)); + return (__sbistype(c, _CTYPE_S)); } #undef isspecial @@ -184,7 +184,7 @@ isspecial(c) int c; { - return (__istype(c, _CTYPE_T)); + return (__sbistype(c, _CTYPE_T)); } #undef isupper @@ -192,7 +192,7 @@ isupper(c) int c; { - return (__istype(c, _CTYPE_U)); + return (__sbistype(c, _CTYPE_U)); } #undef isxdigit @@ -216,7 +216,7 @@ tolower(c) int c; { - return (__tolower(c)); + return (__sbtolower(c)); } #undef toupper @@ -224,6 +224,6 @@ toupper(c) int c; { - return (__toupper(c)); + return (__sbtoupper(c)); } --- iswctype.c.old 2007-09-16 22:31:30.000000000 +0400 +++ iswctype.c 2007-09-21 06:29:59.000000000 +0400 @@ -61,7 +61,7 @@ iswascii(wc) wint_t wc; { - return ((wc & ~0x7F) == 0); + return (wc < 0x80); } #undef iswblank --- mskanji.c.old 2007-09-19 09:02:56.000000000 +0400 +++ mskanji.c 2007-09-21 21:44:31.000000000 +0400 @@ -47,6 +47,8 @@ #include <wchar.h> #include "mblocal.h" +extern int __mb_sb_limit; + static size_t _MSKanji_mbrtowc(wchar_t * __restrict, const char * __restrict, size_t, mbstate_t * __restrict); static int _MSKanji_mbsinit(const mbstate_t *); @@ -66,6 +68,7 @@ __mbsinit = _MSKanji_mbsinit; _CurrentRuneLocale = rl; __mb_cur_max = 2; + __mb_sb_limit = 256; return (0); } --- none.c.old 2007-09-19 08:56:40.000000000 +0400 +++ none.c 2007-09-21 21:44:31.000000000 +0400 @@ -58,6 +58,11 @@ static size_t _none_wcsnrtombs(char * __restrict, const wchar_t ** __restrict, size_t, size_t, mbstate_t * __restrict); +/* setup defaults */ + +int __mb_cur_max = 1; +int __mb_sb_limit = 256; /* Expected to be <= _CACHED_RUNES */ + int _none_init(_RuneLocale *rl) { @@ -69,6 +74,7 @@ __wcsnrtombs = _none_wcsnrtombs; _CurrentRuneLocale = rl; __mb_cur_max = 1; + __mb_sb_limit = 256; return(0); } @@ -176,7 +182,6 @@ /* setup defaults */ -int __mb_cur_max = 1; size_t (*__mbrtowc)(wchar_t * __restrict, const char * __restrict, size_t, mbstate_t * __restrict) = _none_mbrtowc; int (*__mbsinit)(const mbstate_t *) = _none_mbsinit; --- setrunelocale.c.old 2007-09-19 09:03:59.000000000 +0400 +++ setrunelocale.c 2007-09-21 21:44:31.000000000 +0400 @@ -45,6 +45,8 @@ #include "mblocal.h" #include "setlocale.h" +extern int __mb_sb_limit; + extern _RuneLocale *_Read_RuneMagi(FILE *); static int __setrunelocale(const char *); @@ -59,6 +61,7 @@ static char ctype_encoding[ENCODING_LEN + 1]; static _RuneLocale *CachedRuneLocale; static int Cached__mb_cur_max; + static int Cached__mb_sb_limit; static size_t (*Cached__mbrtowc)(wchar_t * __restrict, const char * __restrict, size_t, mbstate_t * __restrict); static size_t (*Cached__wcrtomb)(char * __restrict, wchar_t, @@ -85,6 +88,7 @@ strcmp(encoding, ctype_encoding) == 0) { _CurrentRuneLocale = CachedRuneLocale; __mb_cur_max = Cached__mb_cur_max; + __mb_sb_limit = Cached__mb_sb_limit; __mbrtowc = Cached__mbrtowc; __mbsinit = Cached__mbsinit; __mbsnrtowcs = Cached__mbsnrtowcs; @@ -147,6 +151,7 @@ } CachedRuneLocale = _CurrentRuneLocale; Cached__mb_cur_max = __mb_cur_max; + Cached__mb_sb_limit = __mb_sb_limit; Cached__mbrtowc = __mbrtowc; Cached__mbsinit = __mbsinit; Cached__mbsnrtowcs = __mbsnrtowcs; --- utf8.c.old 2007-09-19 08:18:40.000000000 +0400 +++ utf8.c 2007-09-21 21:44:31.000000000 +0400 @@ -35,6 +35,8 @@ #include <wchar.h> #include "mblocal.h" +extern int __mb_sb_limit; + static size_t _UTF8_mbrtowc(wchar_t * __restrict, const char * __restrict, size_t, mbstate_t * __restrict); static int _UTF8_mbsinit(const mbstate_t *); @@ -63,6 +65,7 @@ __wcsnrtombs = _UTF8_wcsnrtombs; _CurrentRuneLocale = rl; __mb_cur_max = 6; + __mb_sb_limit = 128; return (0); } --- wctype.h.old 2007-09-16 21:59:37.000000000 +0400 +++ wctype.h 2007-09-21 06:08:40.000000000 +0400 @@ -106,7 +106,7 @@ #define towupper(wc) __toupper(wc) #if __BSD_VISIBLE -#define iswascii(wc) (((wc) & ~0x7F) == 0) +#define iswascii(wc) ((wc) < 0x80) #define iswhexnumber(wc) __istype((wc), _CTYPE_X) #define iswideogram(wc) __istype((wc), _CTYPE_I) #define iswnumber(wc) __istype((wc), _CTYPE_D) --LQksG6bCIzRHxTLp--
Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?20070921180223.GA38675>