Skip site navigation (1)Skip section navigation (2)
Date:      Sun, 29 Apr 2012 16:28:39 +0000 (UTC)
From:      David Schultz <das@FreeBSD.org>
To:        src-committers@freebsd.org, svn-src-all@freebsd.org, svn-src-head@freebsd.org
Subject:   svn commit: r234799 - head/lib/libc/stdio
Message-ID:  <201204291628.q3TGSdlr022201@svn.freebsd.org>

next in thread | raw e-mail | index | archive | help
Author: das
Date: Sun Apr 29 16:28:39 2012
New Revision: 234799
URL: http://svn.freebsd.org/changeset/base/234799

Log:
  Previously, vfscanf()'s wide character processing functions were
  reading wide characters manually.  With this change, they now use
  fgetwc().  To make this work, we use an internal version of fgetwc()
  with a few extensions: it takes an mbstate * because non-wide streams
  don't have a built-in mbstate, and it indicates the number of bytes
  read.
  
  vfscanf() now resembles vfwscanf() more closely.  Minor functional
  improvements include working xlocale support in vfscanf(), setting the
  stream error indicator on encoding errors, and proper handling of
  shift-based encodings.  (Actually, making shift-based encodings work
  with non-wide streams is hopeless, but the implementation now matches
  the broken specification.)

Modified:
  head/lib/libc/stdio/fgetwc.c
  head/lib/libc/stdio/local.h
  head/lib/libc/stdio/vfscanf.c

Modified: head/lib/libc/stdio/fgetwc.c
==============================================================================
--- head/lib/libc/stdio/fgetwc.c	Sun Apr 29 16:26:55 2012	(r234798)
+++ head/lib/libc/stdio/fgetwc.c	Sun Apr 29 16:28:39 2012	(r234799)
@@ -59,6 +59,7 @@ fgetwc_l(FILE *fp, locale_t locale)
 
 	return (r);
 }
+
 wint_t
 fgetwc(FILE *fp)
 {
@@ -66,40 +67,45 @@ fgetwc(FILE *fp)
 }
 
 /*
- * Non-MT-safe version.
+ * Internal (non-MPSAFE) version of fgetwc().  This version takes an
+ * mbstate_t argument specifying the initial conversion state.  For
+ * wide streams, this should always be fp->_mbstate.  On return, *nread
+ * is set to the number of bytes read.
  */
-wint_t
-__fgetwc(FILE *fp, locale_t locale)
+wint_t 
+__fgetwc_mbs(FILE *fp, mbstate_t *mbs, int *nread, locale_t locale)
 {
 	wchar_t wc;
 	size_t nconv;
 	struct xlocale_ctype *l = XLOCALE_CTYPE(locale);
 
-	if (fp->_r <= 0 && __srefill(fp))
+	if (fp->_r <= 0 && __srefill(fp)) {
+		*nread = 0;
 		return (WEOF);
+	}
 	if (MB_CUR_MAX == 1) {
 		/* Fast path for single-byte encodings. */
 		wc = *fp->_p++;
 		fp->_r--;
+		*nread = 1;
 		return (wc);
 	}
+	*nread = 0;
 	do {
-		nconv = l->__mbrtowc(&wc, fp->_p, fp->_r, &fp->_mbstate);
+		nconv = l->__mbrtowc(&wc, fp->_p, fp->_r, mbs);
 		if (nconv == (size_t)-1)
 			break;
 		else if (nconv == (size_t)-2)
 			continue;
 		else if (nconv == 0) {
-			/*
-			 * Assume that the only valid representation of
-			 * the null wide character is a single null byte.
-			 */
 			fp->_p++;
 			fp->_r--;
+			(*nread)++;
 			return (L'\0');
 		} else {
 			fp->_p += nconv;
 			fp->_r -= nconv;
+			*nread += nconv;
 			return (wc);
 		}
 	} while (__srefill(fp) == 0);

Modified: head/lib/libc/stdio/local.h
==============================================================================
--- head/lib/libc/stdio/local.h	Sun Apr 29 16:26:55 2012	(r234798)
+++ head/lib/libc/stdio/local.h	Sun Apr 29 16:28:39 2012	(r234799)
@@ -56,7 +56,7 @@ extern int	_ftello(FILE *, fpos_t *);
 extern int	_fseeko(FILE *, off_t, int, int);
 extern int	__fflush(FILE *fp);
 extern void	__fcloseall(void);
-extern wint_t	__fgetwc(FILE *, locale_t);
+extern wint_t	__fgetwc_mbs(FILE *, mbstate_t *, int *, locale_t);
 extern wint_t	__fputwc(wchar_t, FILE *, locale_t);
 extern int	__sflush(FILE *);
 extern FILE	*__sfp(void);
@@ -85,6 +85,13 @@ extern size_t	__fread(void * __restrict 
 		FILE * __restrict fp);
 extern int	__sdidinit;
 
+static inline wint_t
+__fgetwc(FILE *fp, locale_t locale)
+{
+	int nread;
+
+	return (__fgetwc_mbs(fp, &fp->_mbstate, &nread, locale));
+}
 
 /*
  * Prepare the given FILE for writing, and return 0 iff it

Modified: head/lib/libc/stdio/vfscanf.c
==============================================================================
--- head/lib/libc/stdio/vfscanf.c	Sun Apr 29 16:26:55 2012	(r234798)
+++ head/lib/libc/stdio/vfscanf.c	Sun Apr 29 16:28:39 2012	(r234799)
@@ -127,9 +127,8 @@ static const mbstate_t initial_mbs;
 static __inline int
 convert_char(FILE *fp, char * __restrict p, int width)
 {
-	int n, nread;
+	int n;
 
-	nread = 0;
 	if (p == SUPPRESS_PTR) {
 		size_t sum = 0;
 		for (;;) {
@@ -149,59 +148,34 @@ convert_char(FILE *fp, char * __restrict
 				break;
 			}
 		}
-		nread += sum;
+		return (sum);
 	} else {
 		size_t r = __fread(p, 1, width, fp);
 		
 		if (r == 0)
 			return (-1);
-		nread += r;
+		return (r);
 	}
-	return (nread);
 }
 
 static __inline int
-convert_wchar(FILE *fp, wchar_t *wcp, int width)
+convert_wchar(FILE *fp, wchar_t *wcp, int width, locale_t locale)
 {
 	mbstate_t mbs;
-	size_t nconv;
 	int n, nread;
-	char buf[MB_CUR_MAX];
+	wint_t wi;
 
-	nread = 0;
+	mbs = initial_mbs;
 	n = 0;
-	while (width != 0) {
-		if (n == MB_CUR_MAX) {
-			fp->_flags |= __SERR;
-			return (-1);
-		}
-		buf[n++] = *fp->_p;
-		fp->_p++;
-		fp->_r--;
-		mbs = initial_mbs;
-		nconv = mbrtowc(wcp, buf, n, &mbs);
-		if (nconv == (size_t)-1) {
-			fp->_flags |= __SERR;
-			return (-1);
-		}
-		if (nconv == 0 && wcp != SUPPRESS_PTR)
-			*wcp = L'\0';
-		if (nconv != (size_t)-2) {
-			nread += n;
-			width--;
-			if (wcp != SUPPRESS_PTR)
-				wcp++;
-			n = 0;
-		}
-		if (fp->_r <= 0 && __srefill(fp)) {
-			if (n != 0) {
-				fp->_flags |= __SERR;
-				return (-1);
-			}
-			break;
-		}
+	while (width-- != 0 &&
+	    (wi = __fgetwc_mbs(fp, &mbs, &nread, locale)) != WEOF) {
+		if (wcp != SUPPRESS_PTR)
+			*wcp++ = (wchar_t)wi;
+		n += nread;
 	}
-	return (nread);
+	if (n == 0)
+		return (-1);
+	return (n);
 }
 
 static __inline int
@@ -244,63 +218,34 @@ convert_ccl(FILE *fp, char * __restrict 
 }
 
 static __inline int
-convert_wccl(FILE *fp, wchar_t *wcp, int width, const char *ccltab)
+convert_wccl(FILE *fp, wchar_t *wcp, int width, const char *ccltab,
+    locale_t locale)
 {
 	mbstate_t mbs;
-	wchar_t twc;
-	int n, nchars, nconv;
-	char buf[MB_CUR_MAX];
+	wint_t wi;
+	int n, nread;
 
-	if (wcp == SUPPRESS_PTR)
-		wcp = &twc;
+	mbs = initial_mbs;
 	n = 0;
-	nchars = 0;
-	while (width != 0) {
-		if (n == MB_CUR_MAX) {
-			fp->_flags |= __SERR;
-			return (-1);
-		}
-		buf[n++] = *fp->_p;
-		fp->_p++;
-		fp->_r--;
-		mbs = initial_mbs;
-		nconv = mbrtowc(wcp, buf, n, &mbs);
-		if (nconv == (size_t)-1) {
-			fp->_flags |= __SERR;
-			return (-1);
-		}
-		if (nconv == 0)
-			*wcp = L'\0';
-		if (nconv != (size_t)-2) {
-			if (wctob(*wcp) != EOF && !ccltab[wctob(*wcp)]) {
-				while (n != 0) {
-					n--;
-					__ungetc(buf[n], fp);
-				}
-				break;
-			}
-			width--;
-			if (wcp != &twc)
-				wcp++;
-			nchars++;
-			n = 0;
-		}
-		if (fp->_r <= 0 && __srefill(fp)) {
-			if (n != 0) {
-				fp->_flags |= __SERR;
-				return (-1);
-			}
-			break;
+	if (wcp == SUPPRESS_PTR) {
+		while ((wi = __fgetwc_mbs(fp, &mbs, &nread, locale)) != WEOF &&
+		    width-- != 0 && ccltab[wctob(wi)])
+			n += nread;
+		if (wi != WEOF)
+			__ungetwc(wi, fp, __get_locale());
+	} else {
+		while ((wi = __fgetwc_mbs(fp, &mbs, &nread, locale)) != WEOF &&
+		    width-- != 0 && ccltab[wctob(wi)]) {
+			*wcp++ = (wchar_t)wi;
+			n += nread;
 		}
+		if (wi != WEOF)
+			__ungetwc(wi, fp, __get_locale());
+		if (n == 0)
+			return (0);
+		*wcp = 0;
 	}
-	if (n != 0) {
-		fp->_flags |= __SERR;
-		return (-1);
-	}
-	if (nchars == 0)
-		return (0);
-	*wcp = L'\0';
-	return (nchars);
+	return (n);
 }
 
 static __inline int
@@ -335,56 +280,31 @@ convert_string(FILE *fp, char * __restri
 }
 
 static __inline int
-convert_wstring(FILE *fp, wchar_t *wcp, int width)
+convert_wstring(FILE *fp, wchar_t *wcp, int width, locale_t locale)
 {
 	mbstate_t mbs;
-	wchar_t twc;
-	int n, nconv, nread;
-	char buf[MB_CUR_MAX];
-
-	if (wcp == SUPPRESS_PTR)
-		wcp = &twc;
-	n = nread = 0;
-	while (!isspace(*fp->_p) && width != 0) {
-		if (n == MB_CUR_MAX) {
-			fp->_flags |= __SERR;
-			return (-1);
-		}
-		buf[n++] = *fp->_p;
-		fp->_p++;
-		fp->_r--;
-		mbs = initial_mbs;
-		nconv = mbrtowc(wcp, buf, n, &mbs);
-		if (nconv == (size_t)-1) {
-			fp->_flags |= __SERR;
-			return (-1);
-		}
-		if (nconv == 0)
-			*wcp = L'\0';
-		if (nconv != (size_t)-2) {
-			if (iswspace(*wcp)) {
-				while (n != 0) {
-					n--;
-					__ungetc(buf[n], fp);
-				}
-				break;
-			}
-			nread += n;
-			width--;
-			if (wcp != &twc)
-				wcp++;
-			n = 0;
-		}
-		if (fp->_r <= 0 && __srefill(fp)) {
-			if (n != 0) {
-				fp->_flags |= __SERR;
-				return (-1);
-			}
-			break;
-		}
+	wint_t wi;
+	int n, nread;
+
+	mbs = initial_mbs;
+	n = 0;
+	if (wcp == SUPPRESS_PTR) {
+		while ((wi = __fgetwc_mbs(fp, &mbs, &nread, locale)) != WEOF &&
+		    width-- != 0 && !iswspace(wi))
+			n += nread;
+		if (wi != WEOF)
+			__ungetwc(wi, fp, __get_locale());
+	} else {
+		while ((wi = __fgetwc_mbs(fp, &mbs, &nread, locale)) != WEOF &&
+		    width-- != 0 && !iswspace(wi)) {
+			*wcp++ = (wchar_t)wi;
+			n += nread;
+		}
+		if (wi != WEOF)
+			__ungetwc(wi, fp, __get_locale());
+		*wcp = '\0';
 	}
-	*wcp = L'\0';
-	return (nread);
+	return (n);
 }
 
 /*
@@ -766,7 +686,7 @@ literal:
 				width = 1;
 			if (flags & LONG) {
 				nr = convert_wchar(fp, GETARG(wchar_t *),
-				    width);
+				    width, locale);
 			} else {
 				nr = convert_char(fp, GETARG(char *), width);
 			}
@@ -780,7 +700,7 @@ literal:
 				width = (size_t)~0;	/* `infinity' */
 			if (flags & LONG) {
 				nr = convert_wccl(fp, GETARG(wchar_t *), width,
-				    ccltab);
+				    ccltab, locale);
 			} else {
 				nr = convert_ccl(fp, GETARG(char *), width,
 				    ccltab);
@@ -799,7 +719,7 @@ literal:
 				width = (size_t)~0;
 			if (flags & LONG) {
 				nr = convert_wstring(fp, GETARG(wchar_t *),
-				    width);
+				    width, locale);
 			} else {
 				nr = convert_string(fp, GETARG(char *), width);
 			}



Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?201204291628.q3TGSdlr022201>