Date: Wed, 19 Sep 2007 09:18:30 +0400 From: Andrey Chernov <ache@nagual.pp.ru> To: Taku YAMAMOTO <taku@tackymt.homeip.net>, Petr Hroudn?? <petr.hroudny@gmail.com>, current@FreeBSD.ORG, perky@FreeBSD.ORG, i18n@FreeBSD.ORG Subject: Re: Ctype patch for review Message-ID: <20070919051830.GA72429@nagual.pp.ru> In-Reply-To: <20070919023625.GA70891@nagual.pp.ru> References: <20070916192924.GA12678@nagual.pp.ru> <ab8fc7f50709170129p6f436069iffaf697e83a34e3c@mail.gmail.com> <20070917092130.GA24424@nagual.pp.ru> <20070918020100.d43beb0b.taku@tackymt.homeip.net> <20070917171633.GA31179@nagual.pp.ru> <20070919111207.f37653fc.taku@tackymt.homeip.net> <20070919022555.GA70617@nagual.pp.ru> <20070919023625.GA70891@nagual.pp.ru>
next in thread | previous in thread | raw e-mail | index | archive | help
--OgqxwSJOaUobr8KG Content-Type: text/plain; charset=us-ascii Content-Disposition: inline On Wed, Sep 19, 2007 at 06:36:25AM +0400, Andrey Chernov wrote: > only UTF-8.src not following the rules. I'll send regenerated UTF-8.src > a bit later. I change my mind again, now I use new __mb_bit8_override flag specific to UTF-8 encoding (other bit8 overriding encodings could use it too). New patch attached. -- http://ache.pp.ru/ --OgqxwSJOaUobr8KG Content-Type: text/x-diff; charset=us-ascii Content-Disposition: attachment; filename="ctype.patch" --- _ctype.h.old 2007-09-16 21:13:59.000000000 +0400 +++ _ctype.h 2007-09-19 08:46:35.000000000 +0400 @@ -63,6 +63,7 @@ #define _CTYPE_I 0x00080000L /* Ideogram */ #define _CTYPE_T 0x00100000L /* Special */ #define _CTYPE_Q 0x00200000L /* Phonogram */ +#define _CTYPE_WID 0x10000000L /* wide character function */ #define _CTYPE_SW0 0x20000000L /* 0 width character */ #define _CTYPE_SW1 0x40000000L /* 1 width character */ #define _CTYPE_SW2 0x80000000L /* 2 width character */ @@ -87,6 +88,8 @@ #define __inline #endif +extern int __mb_bit8_override; + /* * Use inline functions if we are allowed to and the compiler supports them. */ @@ -98,8 +101,11 @@ static __inline int __maskrune(__ct_rune_t _c, unsigned long _f) { - return ((_c < 0 || _c >= _CACHED_RUNES) ? ___runetype(_c) : + return __mb_bit8_override && !(_f & _CTYPE_WID) && (_c >= 0x80) ? 0 : + ((_c < 0 || _c >= _CACHED_RUNES) ? ___runetype(_c) : _CurrentRuneLocale->__runetype[_c]) & _f; + /* We never set _CTYPE_WID in the locale data, */ + /* so can skip ... & (_f & ~_CTYPE_WID). */ } static __inline int @@ -111,8 +117,11 @@ static __inline int __isctype(__ct_rune_t _c, unsigned long _f) { - return (_c < 0 || _c >= _CACHED_RUNES) ? 0 : + return __mb_bit8_override && !(_f & _CTYPE_WID) && (_c >= 0x80) ? 0 : + (_c < 0 || _c >= _CACHED_RUNES) ? 0 : !!(_DefaultRuneLocale.__runetype[_c] & _f); + /* We never set _CTYPE_WID in the locale data, */ + /* so can skip ... & (_f & ~_CTYPE_WID). */ } static __inline __ct_rune_t @@ -129,6 +138,22 @@ _CurrentRuneLocale->__maplower[_c]; } +static __inline __ct_rune_t +__tosupper(__ct_rune_t _c) +{ + return __mb_bit8_override && (_c >= 0x80) ? _c : + (_c < 0 || _c >= _CACHED_RUNES) ? ___toupper(_c) : + _CurrentRuneLocale->__mapupper[_c]; +} + +static __inline __ct_rune_t +__toslower(__ct_rune_t _c) +{ + return __mb_bit8_override && (_c >= 0x80) ? _c : + (_c < 0 || _c >= _CACHED_RUNES) ? ___tolower(_c) : + _CurrentRuneLocale->__maplower[_c]; +} + static __inline int __wcwidth(__ct_rune_t _c) { @@ -150,6 +175,8 @@ int __isctype(__ct_rune_t, unsigned long); __ct_rune_t __toupper(__ct_rune_t); __ct_rune_t __tolower(__ct_rune_t); +__ct_rune_t __tosupper(__ct_rune_t); +__ct_rune_t __toslower(__ct_rune_t); int __wcwidth(__ct_rune_t); __END_DECLS #endif /* using inlines */ --- big5.c.old 2007-09-19 08:48:55.000000000 +0400 +++ big5.c 2007-09-19 08:56:12.000000000 +0400 @@ -49,6 +49,8 @@ #include <wchar.h> #include "mblocal.h" +extern int __mb_bit8_override; + static size_t _BIG5_mbrtowc(wchar_t * __restrict, const char * __restrict, size_t, mbstate_t * __restrict); static int _BIG5_mbsinit(const mbstate_t *); @@ -68,6 +70,7 @@ __mbsinit = _BIG5_mbsinit; _CurrentRuneLocale = rl; __mb_cur_max = 2; + __mb_bit8_override = 0; return (0); } --- ctype.h.old 2007-09-16 22:03:55.000000000 +0400 +++ ctype.h 2007-09-16 22:56:10.000000000 +0400 @@ -97,8 +97,8 @@ #define isspace(c) __istype((c), _CTYPE_S) #define isupper(c) __istype((c), _CTYPE_U) #define isxdigit(c) __isctype((c), _CTYPE_X) /* ANSI -- locale independent */ -#define tolower(c) __tolower(c) -#define toupper(c) __toupper(c) +#define tolower(c) __toslower(c) +#define toupper(c) __tosupper(c) #if __XSI_VISIBLE /* @@ -112,8 +112,8 @@ * * XXX isascii() and toascii() should similarly be undocumented. */ -#define _tolower(c) __tolower(c) -#define _toupper(c) __toupper(c) +#define _tolower(c) __toslower(c) +#define _toupper(c) __tosupper(c) #define isascii(c) (((c) & ~0x7F) == 0) #define toascii(c) ((c) & 0x7F) #endif @@ -128,7 +128,7 @@ #define isideogram(c) __istype((c), _CTYPE_I) #define isnumber(c) __istype((c), _CTYPE_D) #define isphonogram(c) __istype((c), _CTYPE_Q) -#define isrune(c) __istype((c), 0xFFFFFF00L) +#define isrune(c) __istype((c), 0xFFFFFF00L & ~_CTYPE_WID) #define isspecial(c) __istype((c), _CTYPE_T) #endif --- euc.c.old 2007-09-19 08:50:57.000000000 +0400 +++ euc.c 2007-09-19 08:56:12.000000000 +0400 @@ -49,6 +49,8 @@ #include <wchar.h> #include "mblocal.h" +extern int __mb_bit8_override; + static size_t _EUC_mbrtowc(wchar_t * __restrict, const char * __restrict, size_t, mbstate_t * __restrict); static int _EUC_mbsinit(const mbstate_t *); @@ -116,6 +118,7 @@ __mbrtowc = _EUC_mbrtowc; __wcrtomb = _EUC_wcrtomb; __mbsinit = _EUC_mbsinit; + __mb_bit8_override = 0; return (0); } --- gb18030.c.old 2007-09-19 08:59:01.000000000 +0400 +++ gb18030.c 2007-09-19 09:00:10.000000000 +0400 @@ -39,6 +39,8 @@ #include <wchar.h> #include "mblocal.h" +extern int __mb_bit8_override; + static size_t _GB18030_mbrtowc(wchar_t * __restrict, const char * __restrict, size_t, mbstate_t * __restrict); static int _GB18030_mbsinit(const mbstate_t *); @@ -59,6 +61,7 @@ __mbsinit = _GB18030_mbsinit; _CurrentRuneLocale = rl; __mb_cur_max = 4; + __mb_bit8_override = 0; return (0); } --- gb2312.c.old 2007-09-19 09:00:35.000000000 +0400 +++ gb2312.c 2007-09-19 09:01:05.000000000 +0400 @@ -35,6 +35,8 @@ #include <wchar.h> #include "mblocal.h" +extern int __mb_bit8_override; + static size_t _GB2312_mbrtowc(wchar_t * __restrict, const char * __restrict, size_t, mbstate_t * __restrict); static int _GB2312_mbsinit(const mbstate_t *); @@ -55,6 +57,7 @@ __wcrtomb = _GB2312_wcrtomb; __mbsinit = _GB2312_mbsinit; __mb_cur_max = 2; + __mb_bit8_override = 0; return (0); } --- gbk.c.old 2007-09-19 09:01:33.000000000 +0400 +++ gbk.c 2007-09-19 09:02:03.000000000 +0400 @@ -42,6 +42,8 @@ #include <wchar.h> #include "mblocal.h" +extern int __mb_bit8_override; + static size_t _GBK_mbrtowc(wchar_t * __restrict, const char * __restrict, size_t, mbstate_t * __restrict); static int _GBK_mbsinit(const mbstate_t *); @@ -61,6 +63,7 @@ __mbsinit = _GBK_mbsinit; _CurrentRuneLocale = rl; __mb_cur_max = 2; + __mb_bit8_override = 0; return (0); } --- isctype.c.old 2007-09-16 22:31:26.000000000 +0400 +++ isctype.c 2007-09-16 22:37:54.000000000 +0400 @@ -168,7 +168,7 @@ isrune(c) int c; { - return (__istype(c, 0xFFFFFF00L)); + return (__istype(c, 0xFFFFFF00L & ~_CTYPE_WID)); } #undef isspace @@ -216,7 +216,7 @@ tolower(c) int c; { - return (__tolower(c)); + return (__toslower(c)); } #undef toupper @@ -224,6 +224,6 @@ toupper(c) int c; { - return (__toupper(c)); + return (__tosupper(c)); } --- iswctype.c.old 2007-09-16 22:31:30.000000000 +0400 +++ iswctype.c 2007-09-16 22:41:39.000000000 +0400 @@ -45,7 +45,7 @@ iswalnum(wc) wint_t wc; { - return (__istype(wc, _CTYPE_A|_CTYPE_D)); + return (__istype(wc, _CTYPE_A|_CTYPE_D|_CTYPE_WID)); } #undef iswalpha @@ -53,7 +53,7 @@ iswalpha(wc) wint_t wc; { - return (__istype(wc, _CTYPE_A)); + return (__istype(wc, _CTYPE_A|_CTYPE_WID))); } #undef iswascii @@ -61,7 +61,7 @@ iswascii(wc) wint_t wc; { - return ((wc & ~0x7F) == 0); + return (wc < 0x80); } #undef iswblank @@ -69,7 +69,7 @@ iswblank(wc) wint_t wc; { - return (__istype(wc, _CTYPE_B)); + return (__istype(wc, _CTYPE_B|_CTYPE_WID))); } #undef iswcntrl @@ -77,7 +77,7 @@ iswcntrl(wc) wint_t wc; { - return (__istype(wc, _CTYPE_C)); + return (__istype(wc, _CTYPE_C|_CTYPE_WID))); } #undef iswdigit @@ -85,7 +85,7 @@ iswdigit(wc) wint_t wc; { - return (__isctype(wc, _CTYPE_D)); + return (__isctype(wc, _CTYPE_D|_CTYPE_WID))); } #undef iswgraph @@ -93,7 +93,7 @@ iswgraph(wc) wint_t wc; { - return (__istype(wc, _CTYPE_G)); + return (__istype(wc, _CTYPE_G|_CTYPE_WID))); } #undef iswhexnumber @@ -101,7 +101,7 @@ iswhexnumber(wc) wint_t wc; { - return (__istype(wc, _CTYPE_X)); + return (__istype(wc, _CTYPE_X|_CTYPE_WID))); } #undef iswideogram @@ -109,7 +109,7 @@ iswideogram(wc) wint_t wc; { - return (__istype(wc, _CTYPE_I)); + return (__istype(wc, _CTYPE_I|_CTYPE_WID))); } #undef iswlower @@ -117,7 +117,7 @@ iswlower(wc) wint_t wc; { - return (__istype(wc, _CTYPE_L)); + return (__istype(wc, _CTYPE_L|_CTYPE_WID))); } #undef iswnumber @@ -125,7 +125,7 @@ iswnumber(wc) wint_t wc; { - return (__istype(wc, _CTYPE_D)); + return (__istype(wc, _CTYPE_D|_CTYPE_WID))); } #undef iswphonogram @@ -133,7 +133,7 @@ iswphonogram(wc) wint_t wc; { - return (__istype(wc, _CTYPE_Q)); + return (__istype(wc, _CTYPE_Q|_CTYPE_WID))); } #undef iswprint @@ -141,7 +141,7 @@ iswprint(wc) wint_t wc; { - return (__istype(wc, _CTYPE_R)); + return (__istype(wc, _CTYPE_R|_CTYPE_WID))); } #undef iswpunct @@ -149,7 +149,7 @@ iswpunct(wc) wint_t wc; { - return (__istype(wc, _CTYPE_P)); + return (__istype(wc, _CTYPE_P|_CTYPE_WID))); } #undef iswrune @@ -157,7 +157,7 @@ iswrune(wc) wint_t wc; { - return (__istype(wc, 0xFFFFFF00L)); + return (__istype(wc, 0xFFFFFF00L)); /* already have _CTYPE_WID */ } #undef iswspace @@ -165,7 +165,7 @@ iswspace(wc) wint_t wc; { - return (__istype(wc, _CTYPE_S)); + return (__istype(wc, _CTYPE_S|_CTYPE_WID))); } #undef iswspecial @@ -173,7 +173,7 @@ iswspecial(wc) wint_t wc; { - return (__istype(wc, _CTYPE_T)); + return (__istype(wc, _CTYPE_T|_CTYPE_WID))); } #undef iswupper @@ -181,7 +181,7 @@ iswupper(wc) wint_t wc; { - return (__istype(wc, _CTYPE_U)); + return (__istype(wc, _CTYPE_U|_CTYPE_WID))); } #undef iswxdigit @@ -189,7 +189,7 @@ iswxdigit(wc) wint_t wc; { - return (__isctype(wc, _CTYPE_X)); + return (__isctype(wc, _CTYPE_X|_CTYPE_WID))); } #undef towlower --- mskanji.c.old 2007-09-19 09:02:56.000000000 +0400 +++ mskanji.c 2007-09-19 09:03:26.000000000 +0400 @@ -47,6 +47,8 @@ #include <wchar.h> #include "mblocal.h" +extern int __mb_bit8_override; + static size_t _MSKanji_mbrtowc(wchar_t * __restrict, const char * __restrict, size_t, mbstate_t * __restrict); static int _MSKanji_mbsinit(const mbstate_t *); @@ -66,6 +68,7 @@ __mbsinit = _MSKanji_mbsinit; _CurrentRuneLocale = rl; __mb_cur_max = 2; + __mb_bit8_override = 0; return (0); } --- none.c.old 2007-09-19 08:56:40.000000000 +0400 +++ none.c 2007-09-19 08:58:23.000000000 +0400 @@ -69,6 +69,7 @@ __wcsnrtombs = _none_wcsnrtombs; _CurrentRuneLocale = rl; __mb_cur_max = 1; + __mb_bit8_override = 0; return(0); } @@ -177,6 +178,7 @@ /* setup defaults */ int __mb_cur_max = 1; +int __mb_bit8_override = 0; size_t (*__mbrtowc)(wchar_t * __restrict, const char * __restrict, size_t, mbstate_t * __restrict) = _none_mbrtowc; int (*__mbsinit)(const mbstate_t *) = _none_mbsinit; --- setrunelocale.c.old 2007-09-19 09:03:59.000000000 +0400 +++ setrunelocale.c 2007-09-19 09:06:45.000000000 +0400 @@ -45,6 +45,8 @@ #include "mblocal.h" #include "setlocale.h" +extern int __mb_bit8_override; + extern _RuneLocale *_Read_RuneMagi(FILE *); static int __setrunelocale(const char *); @@ -59,6 +61,7 @@ static char ctype_encoding[ENCODING_LEN + 1]; static _RuneLocale *CachedRuneLocale; static int Cached__mb_cur_max; + static int Cached__mb_bit8_override; static size_t (*Cached__mbrtowc)(wchar_t * __restrict, const char * __restrict, size_t, mbstate_t * __restrict); static size_t (*Cached__wcrtomb)(char * __restrict, wchar_t, @@ -85,6 +88,7 @@ strcmp(encoding, ctype_encoding) == 0) { _CurrentRuneLocale = CachedRuneLocale; __mb_cur_max = Cached__mb_cur_max; + __mb_bit8_override = Cached__mb_bit8_override; __mbrtowc = Cached__mbrtowc; __mbsinit = Cached__mbsinit; __mbsnrtowcs = Cached__mbsnrtowcs; @@ -147,6 +151,7 @@ } CachedRuneLocale = _CurrentRuneLocale; Cached__mb_cur_max = __mb_cur_max; + Cached__mb_bit8_override = __mb_bit8_override; Cached__mbrtowc = __mbrtowc; Cached__mbsinit = __mbsinit; Cached__mbsnrtowcs = __mbsnrtowcs; --- utf8.c.old 2007-09-19 08:18:40.000000000 +0400 +++ utf8.c 2007-09-19 08:56:12.000000000 +0400 @@ -35,6 +35,8 @@ #include <wchar.h> #include "mblocal.h" +extern int __mb_bit8_override; + static size_t _UTF8_mbrtowc(wchar_t * __restrict, const char * __restrict, size_t, mbstate_t * __restrict); static int _UTF8_mbsinit(const mbstate_t *); @@ -63,6 +65,7 @@ __wcsnrtombs = _UTF8_wcsnrtombs; _CurrentRuneLocale = rl; __mb_cur_max = 6; + __mb_bit8_override = 1; return (0); } --- wctype.h.old 2007-09-16 21:59:37.000000000 +0400 +++ wctype.h 2007-09-16 22:56:44.000000000 +0400 @@ -89,30 +89,30 @@ #endif __END_DECLS -#define iswalnum(wc) __istype((wc), _CTYPE_A|_CTYPE_D) -#define iswalpha(wc) __istype((wc), _CTYPE_A) -#define iswblank(wc) __istype((wc), _CTYPE_B) -#define iswcntrl(wc) __istype((wc), _CTYPE_C) -#define iswctype(wc, charclass) __istype((wc), (charclass)) -#define iswdigit(wc) __isctype((wc), _CTYPE_D) -#define iswgraph(wc) __istype((wc), _CTYPE_G) -#define iswlower(wc) __istype((wc), _CTYPE_L) -#define iswprint(wc) __istype((wc), _CTYPE_R) -#define iswpunct(wc) __istype((wc), _CTYPE_P) -#define iswspace(wc) __istype((wc), _CTYPE_S) -#define iswupper(wc) __istype((wc), _CTYPE_U) -#define iswxdigit(wc) __isctype((wc), _CTYPE_X) +#define iswalnum(wc) __istype((wc), _CTYPE_A|_CTYPE_D|_CTYPE_WID) +#define iswalpha(wc) __istype((wc), _CTYPE_A|_CTYPE_WID) +#define iswblank(wc) __istype((wc), _CTYPE_B|_CTYPE_WID) +#define iswcntrl(wc) __istype((wc), _CTYPE_C|_CTYPE_WID) +#define iswctype(wc, charclass) __istype((wc), (charclass)|_CTYPE_WID) +#define iswdigit(wc) __isctype((wc), _CTYPE_D|_CTYPE_WID) +#define iswgraph(wc) __istype((wc), _CTYPE_G|_CTYPE_WID) +#define iswlower(wc) __istype((wc), _CTYPE_L|_CTYPE_WID) +#define iswprint(wc) __istype((wc), _CTYPE_R|_CTYPE_WID) +#define iswpunct(wc) __istype((wc), _CTYPE_P|_CTYPE_WID) +#define iswspace(wc) __istype((wc), _CTYPE_S|_CTYPE_WID) +#define iswupper(wc) __istype((wc), _CTYPE_U|_CTYPE_WID) +#define iswxdigit(wc) __isctype((wc), _CTYPE_X|_CTYPE_WID) #define towlower(wc) __tolower(wc) #define towupper(wc) __toupper(wc) #if __BSD_VISIBLE -#define iswascii(wc) (((wc) & ~0x7F) == 0) -#define iswhexnumber(wc) __istype((wc), _CTYPE_X) -#define iswideogram(wc) __istype((wc), _CTYPE_I) -#define iswnumber(wc) __istype((wc), _CTYPE_D) -#define iswphonogram(wc) __istype((wc), _CTYPE_Q) -#define iswrune(wc) __istype((wc), 0xFFFFFF00L) -#define iswspecial(wc) __istype((wc), _CTYPE_T) +#define iswascii(wc) ((wc) < 0x80) +#define iswhexnumber(wc) __istype((wc), _CTYPE_X|_CTYPE_WID) +#define iswideogram(wc) __istype((wc), _CTYPE_I|_CTYPE_WID) +#define iswnumber(wc) __istype((wc), _CTYPE_D|_CTYPE_WID) +#define iswphonogram(wc) __istype((wc), _CTYPE_Q|_CTYPE_WID) +#define iswrune(wc) __istype((wc), 0xFFFFFF00L) /* already have _CTYPE_WID */ +#define iswspecial(wc) __istype((wc), _CTYPE_T|_CTYPE_WID) #endif #endif /* _WCTYPE_H_ */ --OgqxwSJOaUobr8KG--
Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?20070919051830.GA72429>