From owner-svn-src-head@FreeBSD.ORG Sun Apr 29 16:28:40 2012 Return-Path: Delivered-To: svn-src-head@freebsd.org Received: from mx1.freebsd.org (mx1.freebsd.org [IPv6:2001:4f8:fff6::34]) by hub.freebsd.org (Postfix) with ESMTP id 448B1106564A; Sun, 29 Apr 2012 16:28:40 +0000 (UTC) (envelope-from das@FreeBSD.org) Received: from svn.freebsd.org (svn.freebsd.org [IPv6:2001:4f8:fff6::2c]) by mx1.freebsd.org (Postfix) with ESMTP id 2E76A8FC0C; Sun, 29 Apr 2012 16:28:40 +0000 (UTC) Received: from svn.freebsd.org (localhost [127.0.0.1]) by svn.freebsd.org (8.14.4/8.14.4) with ESMTP id q3TGSeOm022205; Sun, 29 Apr 2012 16:28:40 GMT (envelope-from das@svn.freebsd.org) Received: (from das@localhost) by svn.freebsd.org (8.14.4/8.14.4/Submit) id q3TGSdlr022201; Sun, 29 Apr 2012 16:28:39 GMT (envelope-from das@svn.freebsd.org) Message-Id: <201204291628.q3TGSdlr022201@svn.freebsd.org> From: David Schultz Date: Sun, 29 Apr 2012 16:28:39 +0000 (UTC) To: src-committers@freebsd.org, svn-src-all@freebsd.org, svn-src-head@freebsd.org X-SVN-Group: head MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Cc: Subject: svn commit: r234799 - head/lib/libc/stdio X-BeenThere: svn-src-head@freebsd.org X-Mailman-Version: 2.1.5 Precedence: list List-Id: SVN commit messages for the src tree for head/-current List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , X-List-Received-Date: Sun, 29 Apr 2012 16:28:40 -0000 Author: das Date: Sun Apr 29 16:28:39 2012 New Revision: 234799 URL: http://svn.freebsd.org/changeset/base/234799 Log: Previously, vfscanf()'s wide character processing functions were reading wide characters manually. With this change, they now use fgetwc(). To make this work, we use an internal version of fgetwc() with a few extensions: it takes an mbstate * because non-wide streams don't have a built-in mbstate, and it indicates the number of bytes read. vfscanf() now resembles vfwscanf() more closely. Minor functional improvements include working xlocale support in vfscanf(), setting the stream error indicator on encoding errors, and proper handling of shift-based encodings. (Actually, making shift-based encodings work with non-wide streams is hopeless, but the implementation now matches the broken specification.) Modified: head/lib/libc/stdio/fgetwc.c head/lib/libc/stdio/local.h head/lib/libc/stdio/vfscanf.c Modified: head/lib/libc/stdio/fgetwc.c ============================================================================== --- head/lib/libc/stdio/fgetwc.c Sun Apr 29 16:26:55 2012 (r234798) +++ head/lib/libc/stdio/fgetwc.c Sun Apr 29 16:28:39 2012 (r234799) @@ -59,6 +59,7 @@ fgetwc_l(FILE *fp, locale_t locale) return (r); } + wint_t fgetwc(FILE *fp) { @@ -66,40 +67,45 @@ fgetwc(FILE *fp) } /* - * Non-MT-safe version. + * Internal (non-MPSAFE) version of fgetwc(). This version takes an + * mbstate_t argument specifying the initial conversion state. For + * wide streams, this should always be fp->_mbstate. On return, *nread + * is set to the number of bytes read. */ -wint_t -__fgetwc(FILE *fp, locale_t locale) +wint_t +__fgetwc_mbs(FILE *fp, mbstate_t *mbs, int *nread, locale_t locale) { wchar_t wc; size_t nconv; struct xlocale_ctype *l = XLOCALE_CTYPE(locale); - if (fp->_r <= 0 && __srefill(fp)) + if (fp->_r <= 0 && __srefill(fp)) { + *nread = 0; return (WEOF); + } if (MB_CUR_MAX == 1) { /* Fast path for single-byte encodings. */ wc = *fp->_p++; fp->_r--; + *nread = 1; return (wc); } + *nread = 0; do { - nconv = l->__mbrtowc(&wc, fp->_p, fp->_r, &fp->_mbstate); + nconv = l->__mbrtowc(&wc, fp->_p, fp->_r, mbs); if (nconv == (size_t)-1) break; else if (nconv == (size_t)-2) continue; else if (nconv == 0) { - /* - * Assume that the only valid representation of - * the null wide character is a single null byte. - */ fp->_p++; fp->_r--; + (*nread)++; return (L'\0'); } else { fp->_p += nconv; fp->_r -= nconv; + *nread += nconv; return (wc); } } while (__srefill(fp) == 0); Modified: head/lib/libc/stdio/local.h ============================================================================== --- head/lib/libc/stdio/local.h Sun Apr 29 16:26:55 2012 (r234798) +++ head/lib/libc/stdio/local.h Sun Apr 29 16:28:39 2012 (r234799) @@ -56,7 +56,7 @@ extern int _ftello(FILE *, fpos_t *); extern int _fseeko(FILE *, off_t, int, int); extern int __fflush(FILE *fp); extern void __fcloseall(void); -extern wint_t __fgetwc(FILE *, locale_t); +extern wint_t __fgetwc_mbs(FILE *, mbstate_t *, int *, locale_t); extern wint_t __fputwc(wchar_t, FILE *, locale_t); extern int __sflush(FILE *); extern FILE *__sfp(void); @@ -85,6 +85,13 @@ extern size_t __fread(void * __restrict FILE * __restrict fp); extern int __sdidinit; +static inline wint_t +__fgetwc(FILE *fp, locale_t locale) +{ + int nread; + + return (__fgetwc_mbs(fp, &fp->_mbstate, &nread, locale)); +} /* * Prepare the given FILE for writing, and return 0 iff it Modified: head/lib/libc/stdio/vfscanf.c ============================================================================== --- head/lib/libc/stdio/vfscanf.c Sun Apr 29 16:26:55 2012 (r234798) +++ head/lib/libc/stdio/vfscanf.c Sun Apr 29 16:28:39 2012 (r234799) @@ -127,9 +127,8 @@ static const mbstate_t initial_mbs; static __inline int convert_char(FILE *fp, char * __restrict p, int width) { - int n, nread; + int n; - nread = 0; if (p == SUPPRESS_PTR) { size_t sum = 0; for (;;) { @@ -149,59 +148,34 @@ convert_char(FILE *fp, char * __restrict break; } } - nread += sum; + return (sum); } else { size_t r = __fread(p, 1, width, fp); if (r == 0) return (-1); - nread += r; + return (r); } - return (nread); } static __inline int -convert_wchar(FILE *fp, wchar_t *wcp, int width) +convert_wchar(FILE *fp, wchar_t *wcp, int width, locale_t locale) { mbstate_t mbs; - size_t nconv; int n, nread; - char buf[MB_CUR_MAX]; + wint_t wi; - nread = 0; + mbs = initial_mbs; n = 0; - while (width != 0) { - if (n == MB_CUR_MAX) { - fp->_flags |= __SERR; - return (-1); - } - buf[n++] = *fp->_p; - fp->_p++; - fp->_r--; - mbs = initial_mbs; - nconv = mbrtowc(wcp, buf, n, &mbs); - if (nconv == (size_t)-1) { - fp->_flags |= __SERR; - return (-1); - } - if (nconv == 0 && wcp != SUPPRESS_PTR) - *wcp = L'\0'; - if (nconv != (size_t)-2) { - nread += n; - width--; - if (wcp != SUPPRESS_PTR) - wcp++; - n = 0; - } - if (fp->_r <= 0 && __srefill(fp)) { - if (n != 0) { - fp->_flags |= __SERR; - return (-1); - } - break; - } + while (width-- != 0 && + (wi = __fgetwc_mbs(fp, &mbs, &nread, locale)) != WEOF) { + if (wcp != SUPPRESS_PTR) + *wcp++ = (wchar_t)wi; + n += nread; } - return (nread); + if (n == 0) + return (-1); + return (n); } static __inline int @@ -244,63 +218,34 @@ convert_ccl(FILE *fp, char * __restrict } static __inline int -convert_wccl(FILE *fp, wchar_t *wcp, int width, const char *ccltab) +convert_wccl(FILE *fp, wchar_t *wcp, int width, const char *ccltab, + locale_t locale) { mbstate_t mbs; - wchar_t twc; - int n, nchars, nconv; - char buf[MB_CUR_MAX]; + wint_t wi; + int n, nread; - if (wcp == SUPPRESS_PTR) - wcp = &twc; + mbs = initial_mbs; n = 0; - nchars = 0; - while (width != 0) { - if (n == MB_CUR_MAX) { - fp->_flags |= __SERR; - return (-1); - } - buf[n++] = *fp->_p; - fp->_p++; - fp->_r--; - mbs = initial_mbs; - nconv = mbrtowc(wcp, buf, n, &mbs); - if (nconv == (size_t)-1) { - fp->_flags |= __SERR; - return (-1); - } - if (nconv == 0) - *wcp = L'\0'; - if (nconv != (size_t)-2) { - if (wctob(*wcp) != EOF && !ccltab[wctob(*wcp)]) { - while (n != 0) { - n--; - __ungetc(buf[n], fp); - } - break; - } - width--; - if (wcp != &twc) - wcp++; - nchars++; - n = 0; - } - if (fp->_r <= 0 && __srefill(fp)) { - if (n != 0) { - fp->_flags |= __SERR; - return (-1); - } - break; + if (wcp == SUPPRESS_PTR) { + while ((wi = __fgetwc_mbs(fp, &mbs, &nread, locale)) != WEOF && + width-- != 0 && ccltab[wctob(wi)]) + n += nread; + if (wi != WEOF) + __ungetwc(wi, fp, __get_locale()); + } else { + while ((wi = __fgetwc_mbs(fp, &mbs, &nread, locale)) != WEOF && + width-- != 0 && ccltab[wctob(wi)]) { + *wcp++ = (wchar_t)wi; + n += nread; } + if (wi != WEOF) + __ungetwc(wi, fp, __get_locale()); + if (n == 0) + return (0); + *wcp = 0; } - if (n != 0) { - fp->_flags |= __SERR; - return (-1); - } - if (nchars == 0) - return (0); - *wcp = L'\0'; - return (nchars); + return (n); } static __inline int @@ -335,56 +280,31 @@ convert_string(FILE *fp, char * __restri } static __inline int -convert_wstring(FILE *fp, wchar_t *wcp, int width) +convert_wstring(FILE *fp, wchar_t *wcp, int width, locale_t locale) { mbstate_t mbs; - wchar_t twc; - int n, nconv, nread; - char buf[MB_CUR_MAX]; - - if (wcp == SUPPRESS_PTR) - wcp = &twc; - n = nread = 0; - while (!isspace(*fp->_p) && width != 0) { - if (n == MB_CUR_MAX) { - fp->_flags |= __SERR; - return (-1); - } - buf[n++] = *fp->_p; - fp->_p++; - fp->_r--; - mbs = initial_mbs; - nconv = mbrtowc(wcp, buf, n, &mbs); - if (nconv == (size_t)-1) { - fp->_flags |= __SERR; - return (-1); - } - if (nconv == 0) - *wcp = L'\0'; - if (nconv != (size_t)-2) { - if (iswspace(*wcp)) { - while (n != 0) { - n--; - __ungetc(buf[n], fp); - } - break; - } - nread += n; - width--; - if (wcp != &twc) - wcp++; - n = 0; - } - if (fp->_r <= 0 && __srefill(fp)) { - if (n != 0) { - fp->_flags |= __SERR; - return (-1); - } - break; - } + wint_t wi; + int n, nread; + + mbs = initial_mbs; + n = 0; + if (wcp == SUPPRESS_PTR) { + while ((wi = __fgetwc_mbs(fp, &mbs, &nread, locale)) != WEOF && + width-- != 0 && !iswspace(wi)) + n += nread; + if (wi != WEOF) + __ungetwc(wi, fp, __get_locale()); + } else { + while ((wi = __fgetwc_mbs(fp, &mbs, &nread, locale)) != WEOF && + width-- != 0 && !iswspace(wi)) { + *wcp++ = (wchar_t)wi; + n += nread; + } + if (wi != WEOF) + __ungetwc(wi, fp, __get_locale()); + *wcp = '\0'; } - *wcp = L'\0'; - return (nread); + return (n); } /* @@ -766,7 +686,7 @@ literal: width = 1; if (flags & LONG) { nr = convert_wchar(fp, GETARG(wchar_t *), - width); + width, locale); } else { nr = convert_char(fp, GETARG(char *), width); } @@ -780,7 +700,7 @@ literal: width = (size_t)~0; /* `infinity' */ if (flags & LONG) { nr = convert_wccl(fp, GETARG(wchar_t *), width, - ccltab); + ccltab, locale); } else { nr = convert_ccl(fp, GETARG(char *), width, ccltab); @@ -799,7 +719,7 @@ literal: width = (size_t)~0; if (flags & LONG) { nr = convert_wstring(fp, GETARG(wchar_t *), - width); + width, locale); } else { nr = convert_string(fp, GETARG(char *), width); }