Skip site navigation (1)Skip section navigation (2)
Date:      Thu, 7 Sep 2023 20:28:45 GMT
From:      Dag-Erling =?utf-8?Q?Sm=C3=B8rgrav?= <des@FreeBSD.org>
To:        src-committers@FreeBSD.org, dev-commits-src-all@FreeBSD.org, dev-commits-src-branches@FreeBSD.org
Subject:   git: bae932999e0f - stable/14 - libc: Implement N2630.
Message-ID:  <202309072028.387KSjrr064522@gitrepo.freebsd.org>

next in thread | raw e-mail | index | archive | help
The branch stable/14 has been updated by des:

URL: https://cgit.FreeBSD.org/src/commit/?id=bae932999e0fa9e9a08d84ca992332c57e5b42be

commit bae932999e0fa9e9a08d84ca992332c57e5b42be
Author:     Dag-Erling Smørgrav <des@FreeBSD.org>
AuthorDate: 2023-08-28 15:32:23 +0000
Commit:     Dag-Erling Smørgrav <des@FreeBSD.org>
CommitDate: 2023-09-07 20:28:30 +0000

    libc: Implement N2630.
    
    This adds formatted input/output of binary integer numbers to the printf(), scanf(), and strtol() families, including their wide-character counterparts.
    
    Reviewed by:    imp, emaste
    Differential Revision:  https://reviews.freebsd.org/D41511
    
    (cherry picked from commit d9dc1603d6e48cca84cad3ebe859129131b8387c)
    
    libc: Add unit tests for N2630 and possible collateral damage.
    
    Reviewed by:    imp, emaste
    Differential Revision:  https://reviews.freebsd.org/D41512
    
    (cherry picked from commit b9385720f34b536ef2568a642e8b1fad0450056f)
    
    libc: Document support for binary integers.
    
    Reviewed by:    debdrup, emaste
    Differential Revision:  https://reviews.freebsd.org/D41522
    
    (cherry picked from commit 76edfabbecdec686a570b8e009d5ea4112f943e0)
    
    libc: Fix fixed-width case in the new integer parser.
    
    Fixes:          d9dc1603d6e4
    Differential Revision:  https://reviews.freebsd.org/D41622
    
    (cherry picked from commit aca3bd1602577591e5cd237c4bb0bb71b3be0c75)
    
    libc: Add a wide version of snprintf_test.
    
    Reviewed by:    imp, emaste
    Differential Revision:  https://reviews.freebsd.org/D41726
    
    (cherry picked from commit 4ec9ee9912765ac4ca57353999caa92a23283d8e)
    
    libc: Suppress format checks on printf() / scanf() tests.
    
    Reviewed by:    jrtc27, markj, emaste
    Differential Revision:  https://reviews.freebsd.org/D41727
    
    (cherry picked from commit 294bd2827e61a78041f6613f4b82235fcc454157)
    
    Approved by:    re (gjb)
---
 contrib/netbsd-tests/lib/libc/stdlib/t_strtol.c |   6 +
 lib/libc/iconv/_strtol.h                        |   7 +
 lib/libc/iconv/_strtoul.h                       |   7 +
 lib/libc/locale/wcstoimax.c                     |   7 +
 lib/libc/locale/wcstol.c                        |   7 +
 lib/libc/locale/wcstoll.c                       |   7 +
 lib/libc/locale/wcstoul.c                       |   7 +
 lib/libc/locale/wcstoull.c                      |   7 +
 lib/libc/locale/wcstoumax.c                     |   7 +
 lib/libc/stdio/printf.3                         |  34 ++-
 lib/libc/stdio/printfcommon.h                   |  14 ++
 lib/libc/stdio/scanf.3                          |  29 ++-
 lib/libc/stdio/vfprintf.c                       |  13 ++
 lib/libc/stdio/vfscanf.c                        | 267 +++++++++++++-----------
 lib/libc/stdio/vfwprintf.c                      |  13 ++
 lib/libc/stdio/vfwscanf.c                       | 263 +++++++++++++----------
 lib/libc/stdlib/strtoimax.c                     |   7 +
 lib/libc/stdlib/strtol.3                        |   4 +-
 lib/libc/stdlib/strtol.c                        |   7 +
 lib/libc/stdlib/strtoll.c                       |  12 +-
 lib/libc/stdlib/strtoul.3                       |   4 +-
 lib/libc/stdlib/strtoul.c                       |   7 +
 lib/libc/stdlib/strtoull.c                      |   7 +
 lib/libc/stdlib/strtoumax.c                     |   7 +
 lib/libc/tests/stdio/Makefile                   |  12 +-
 lib/libc/tests/stdio/snprintf_test.c            | 139 ++++++++++++
 lib/libc/tests/stdio/sscanf_test.c              | 266 +++++++++++++++++++++++
 lib/libc/tests/stdio/swprintf_test.c            | 140 +++++++++++++
 lib/libc/tests/stdio/swscanf_test.c             | 267 ++++++++++++++++++++++++
 29 files changed, 1315 insertions(+), 259 deletions(-)

diff --git a/contrib/netbsd-tests/lib/libc/stdlib/t_strtol.c b/contrib/netbsd-tests/lib/libc/stdlib/t_strtol.c
index 54e190760656..d1027fcc7bb1 100644
--- a/contrib/netbsd-tests/lib/libc/stdlib/t_strtol.c
+++ b/contrib/netbsd-tests/lib/libc/stdlib/t_strtol.c
@@ -94,6 +94,12 @@ ATF_TC_BODY(strtol_base, tc)
 		{ "01234567",			    342391,  0, NULL	},
 		{ "0123456789",			 123456789, 10, NULL	},
 		{ "0x75bcd15",		         123456789,  0, NULL	},
+#ifdef __FreeBSD__
+		{ "0x",				         0,  0, "x"	},
+		{ "0b111010110111100110100010101", 123456789,  0, NULL	},
+		{ "0b0123",			         1,  0, "23"	},
+		{ "0b",				         0,  0, "b"	},
+#endif
 	};
 
 	long long int lli;
diff --git a/lib/libc/iconv/_strtol.h b/lib/libc/iconv/_strtol.h
index d183edbe8c3a..94a13c56db98 100644
--- a/lib/libc/iconv/_strtol.h
+++ b/lib/libc/iconv/_strtol.h
@@ -91,6 +91,13 @@ _FUNCNAME(const char *nptr, char **endptr, int base)
 		s += 2;
 		base = 16;
 	}
+	if ((base == 0 || base == 2) &&
+	    c == '0' && (*s == 'b' || *s == 'B') &&
+	    (s[1] >= '0' && s[1] <= '1')) {
+		c = s[1];
+		s += 2;
+		base = 2;
+	}
 	if (base == 0)
 		base = (c == '0' ? 8 : 10);
 
diff --git a/lib/libc/iconv/_strtoul.h b/lib/libc/iconv/_strtoul.h
index eade72e9c2e6..4944e1fb06e0 100644
--- a/lib/libc/iconv/_strtoul.h
+++ b/lib/libc/iconv/_strtoul.h
@@ -87,6 +87,13 @@ _FUNCNAME(const char *nptr, char **endptr, int base)
 		s += 2;
 		base = 16;
 	}
+	if ((base == 0 || base == 2) &&
+	    c == '0' && (*s == 'b' || *s == 'B') &&
+	    (s[1] >= '0' && s[1] <= '1')) {
+		c = s[1];
+		s += 2;
+		base = 2;
+	}
 	if (base == 0)
 		base = (c == '0' ? 8 : 10);
 
diff --git a/lib/libc/locale/wcstoimax.c b/lib/libc/locale/wcstoimax.c
index 259faa2b011c..5ed949cd0531 100644
--- a/lib/libc/locale/wcstoimax.c
+++ b/lib/libc/locale/wcstoimax.c
@@ -86,6 +86,13 @@ wcstoimax_l(const wchar_t * __restrict nptr, wchar_t ** __restrict endptr,
 		s += 2;
 		base = 16;
 	}
+	if ((base == 0 || base == 2) &&
+	    c == L'0' && (*s == L'b' || *s == L'B') &&
+	    (s[1] >= L'0' && s[1] <= L'1')) {
+		c = s[1];
+		s += 2;
+		base = 2;
+	}
 	if (base == 0)
 		base = c == L'0' ? 8 : 10;
 	acc = any = 0;
diff --git a/lib/libc/locale/wcstol.c b/lib/libc/locale/wcstol.c
index b0b787384f39..1678b615ca1c 100644
--- a/lib/libc/locale/wcstol.c
+++ b/lib/libc/locale/wcstol.c
@@ -80,6 +80,13 @@ wcstol_l(const wchar_t * __restrict nptr, wchar_t ** __restrict endptr, int
 		s += 2;
 		base = 16;
 	}
+	if ((base == 0 || base == 2) &&
+	    c == L'0' && (*s == L'b' || *s == L'B') &&
+	    (s[1] >= L'0' && s[1] <= L'1')) {
+		c = s[1];
+		s += 2;
+		base = 2;
+	}
 	if (base == 0)
 		base = c == L'0' ? 8 : 10;
 	acc = any = 0;
diff --git a/lib/libc/locale/wcstoll.c b/lib/libc/locale/wcstoll.c
index ac07d6c6adbf..ef1e6ef58861 100644
--- a/lib/libc/locale/wcstoll.c
+++ b/lib/libc/locale/wcstoll.c
@@ -86,6 +86,13 @@ wcstoll_l(const wchar_t * __restrict nptr, wchar_t ** __restrict endptr,
 		s += 2;
 		base = 16;
 	}
+	if ((base == 0 || base == 2) &&
+	    c == L'0' && (*s == L'b' || *s == L'B') &&
+	    (s[1] >= L'0' && s[1] <= L'1')) {
+		c = s[1];
+		s += 2;
+		base = 2;
+	}
 	if (base == 0)
 		base = c == L'0' ? 8 : 10;
 	acc = any = 0;
diff --git a/lib/libc/locale/wcstoul.c b/lib/libc/locale/wcstoul.c
index 9f58db799c0e..2c9c8820b1f6 100644
--- a/lib/libc/locale/wcstoul.c
+++ b/lib/libc/locale/wcstoul.c
@@ -80,6 +80,13 @@ wcstoul_l(const wchar_t * __restrict nptr, wchar_t ** __restrict endptr,
 		s += 2;
 		base = 16;
 	}
+	if ((base == 0 || base == 2) &&
+	    c == L'0' && (*s == L'b' || *s == L'B') &&
+	    (s[1] >= L'0' && s[1] <= L'1')) {
+		c = s[1];
+		s += 2;
+		base = 2;
+	}
 	if (base == 0)
 		base = c == L'0' ? 8 : 10;
 	acc = any = 0;
diff --git a/lib/libc/locale/wcstoull.c b/lib/libc/locale/wcstoull.c
index cbc7253f884d..692eb90eef6b 100644
--- a/lib/libc/locale/wcstoull.c
+++ b/lib/libc/locale/wcstoull.c
@@ -86,6 +86,13 @@ wcstoull_l(const wchar_t * __restrict nptr, wchar_t ** __restrict endptr,
 		s += 2;
 		base = 16;
 	}
+	if ((base == 0 || base == 2) &&
+	    c == L'0' && (*s == L'b' || *s == L'B') &&
+	    (s[1] >= L'0' && s[1] <= L'1')) {
+		c = s[1];
+		s += 2;
+		base = 2;
+	}
 	if (base == 0)
 		base = c == L'0' ? 8 : 10;
 	acc = any = 0;
diff --git a/lib/libc/locale/wcstoumax.c b/lib/libc/locale/wcstoumax.c
index 4380cccf2424..c4f2ec3aaf41 100644
--- a/lib/libc/locale/wcstoumax.c
+++ b/lib/libc/locale/wcstoumax.c
@@ -86,6 +86,13 @@ wcstoumax_l(const wchar_t * __restrict nptr, wchar_t ** __restrict endptr,
 		s += 2;
 		base = 16;
 	}
+	if ((base == 0 || base == 2) &&
+	    c == L'0' && (*s == L'b' || *s == L'B') &&
+	    (s[1] >= L'0' && s[1] <= L'1')) {
+		c = s[1];
+		s += 2;
+		base = 2;
+	}
 	if (base == 0)
 		base = c == L'0' ? 8 : 10;
 	acc = any = 0;
diff --git a/lib/libc/stdio/printf.3 b/lib/libc/stdio/printf.3
index 3e5c6ca23511..110851e2a421 100644
--- a/lib/libc/stdio/printf.3
+++ b/lib/libc/stdio/printf.3
@@ -31,7 +31,7 @@
 .\"
 .\"     @(#)printf.3	8.1 (Berkeley) 6/4/93
 .\"
-.Dd May 22, 2018
+.Dd August 21, 2023
 .Dt PRINTF 3
 .Os
 .Sh NAME
@@ -212,6 +212,17 @@ and
 .Cm u
 conversions, this option has no effect.
 For
+.Cm b
+and
+.Cm B
+conversions, a non-zero result has the string
+.Ql 0b
+(or
+.Ql 0B
+for
+.Cm B
+conversions) prepended to it.
+For
 .Cm o
 conversions, the precision of the number is increased to force the first
 character of the output string to a zero.
@@ -245,7 +256,7 @@ For all conversions except
 .Cm n ,
 the converted value is padded on the left with zeros rather than blanks.
 If a precision is given with a numeric conversion
-.Cm ( d , i , o , u , i , x ,
+.Cm ( b , B , d , i , o , u , i , x ,
 and
 .Cm X ) ,
 the
@@ -301,7 +312,7 @@ followed by an
 optional digit string.
 If the digit string is omitted, the precision is taken as zero.
 This gives the minimum number of digits to appear for
-.Cm d , i , o , u , x ,
+.Cm b , B , d , i , o , u , x ,
 and
 .Cm X
 conversions, the number of digits to appear after the decimal-point for
@@ -319,12 +330,12 @@ conversions.
 .It
 An optional length modifier, that specifies the size of the argument.
 The following length modifiers are valid for the
-.Cm d , i , n , o , u , x ,
+.Cm b , B , d , i , n , o , u , x ,
 or
 .Cm X
 conversion:
 .Bl -column ".Cm q Em (deprecated)" ".Vt signed char" ".Vt unsigned long long" ".Vt long long *"
-.It Sy Modifier Ta Cm d , i Ta Cm o , u , x , X Ta Cm n
+.It Sy Modifier Ta Cm d , i Ta Cm b , B , o , u , x , X Ta Cm n
 .It Cm hh Ta Vt "signed char" Ta Vt "unsigned char" Ta Vt "signed char *"
 .It Cm h Ta Vt short Ta Vt "unsigned short" Ta Vt "short *"
 .It Cm l No (ell) Ta Vt long Ta Vt "unsigned long" Ta Vt "long *"
@@ -339,7 +350,7 @@ Note:
 the
 .Cm t
 modifier, when applied to a
-.Cm o , u , x ,
+.Cm b , B , o , u , x ,
 or
 .Cm X
 conversion, indicates that the argument is of an unsigned type
@@ -403,11 +414,16 @@ If a single format directive mixes positional
 and non-positional arguments, the results are undefined.
 .Pp
 The conversion specifiers and their meanings are:
-.Bl -tag -width ".Cm diouxX"
-.It Cm diouxX
+.Bl -tag -width ".Cm bBdiouxX"
+.It Cm bBdiouxX
 The
 .Vt int
-(or appropriate variant) argument is converted to signed decimal
+(or appropriate variant) argument is converted to
+unsigned binary
+.Cm ( b
+and
+.Cm B ) ,
+signed decimal
 .Cm ( d
 and
 .Cm i ) ,
diff --git a/lib/libc/stdio/printfcommon.h b/lib/libc/stdio/printfcommon.h
index ac5aed0a5fcd..411b778dc234 100644
--- a/lib/libc/stdio/printfcommon.h
+++ b/lib/libc/stdio/printfcommon.h
@@ -194,6 +194,13 @@ __ultoa(u_long val, CHAR *endp, int base, int octzero, const char *xdigs)
 		} while (sval != 0);
 		break;
 
+	case 2:
+		do {
+			*--cp = to_char(val & 1);
+			val >>= 1;
+		} while (val);
+		break;
+
 	case 8:
 		do {
 			*--cp = to_char(val & 7);
@@ -244,6 +251,13 @@ __ujtoa(uintmax_t val, CHAR *endp, int base, int octzero, const char *xdigs)
 		} while (sval != 0);
 		break;
 
+	case 2:
+		do {
+			*--cp = to_char(val & 1);
+			val >>= 1;
+		} while (val);
+		break;
+
 	case 8:
 		do {
 			*--cp = to_char(val & 7);
diff --git a/lib/libc/stdio/scanf.3 b/lib/libc/stdio/scanf.3
index b1c50e10a795..6cefdb133983 100644
--- a/lib/libc/stdio/scanf.3
+++ b/lib/libc/stdio/scanf.3
@@ -31,7 +31,7 @@
 .\"
 .\"     @(#)scanf.3	8.2 (Berkeley) 12/11/93
 .\"
-.Dd April 2, 2022
+.Dd August 21, 2023
 .Dt SCANF 3
 .Os
 .Sh NAME
@@ -141,7 +141,7 @@ The conversion that follows occurs as usual, but no pointer is used;
 the result of the conversion is simply discarded.
 .It Cm hh
 Indicates that the conversion will be one of
-.Cm dioux
+.Cm bdioux
 or
 .Cm n
 and the next pointer is a pointer to a
@@ -150,7 +150,7 @@ and the next pointer is a pointer to a
 .Vt int ) .
 .It Cm h
 Indicates that the conversion will be one of
-.Cm dioux
+.Cm bdioux
 or
 .Cm n
 and the next pointer is a pointer to a
@@ -159,7 +159,7 @@ and the next pointer is a pointer to a
 .Vt int ) .
 .It Cm l No (ell)
 Indicates that the conversion will be one of
-.Cm dioux
+.Cm bdioux
 or
 .Cm n
 and the next pointer is a pointer to a
@@ -185,7 +185,7 @@ and the next pointer is a pointer to an array of
 .Vt char ) .
 .It Cm ll No (ell ell)
 Indicates that the conversion will be one of
-.Cm dioux
+.Cm bdioux
 or
 .Cm n
 and the next pointer is a pointer to a
@@ -201,7 +201,7 @@ and the next pointer is a pointer to
 .Vt "long double" .
 .It Cm j
 Indicates that the conversion will be one of
-.Cm dioux
+.Cm bdioux
 or
 .Cm n
 and the next pointer is a pointer to a
@@ -210,7 +210,7 @@ and the next pointer is a pointer to a
 .Vt int ) .
 .It Cm t
 Indicates that the conversion will be one of
-.Cm dioux
+.Cm bdioux
 or
 .Cm n
 and the next pointer is a pointer to a
@@ -219,7 +219,7 @@ and the next pointer is a pointer to a
 .Vt int ) .
 .It Cm z
 Indicates that the conversion will be one of
-.Cm dioux
+.Cm bdioux
 or
 .Cm n
 and the next pointer is a pointer to a
@@ -229,7 +229,7 @@ and the next pointer is a pointer to a
 .It Cm q
 (deprecated.)
 Indicates that the conversion will be one of
-.Cm dioux
+.Cm bdioux
 or
 .Cm n
 and the next pointer is a pointer to a
@@ -273,6 +273,10 @@ matches a single input
 .Ql %
 character.
 No conversion is done, and assignment does not occur.
+.It Cm b , B
+Matches an optionally signed binary integer;
+the next pointer must be a pointer to
+.Vt "unsigned int" .
 .It Cm d
 Matches an optionally signed decimal integer;
 the next pointer must be a pointer to
@@ -281,7 +285,12 @@ the next pointer must be a pointer to
 Matches an optionally signed integer;
 the next pointer must be a pointer to
 .Vt int .
-The integer is read in base 16 if it begins
+The integer is read
+in base 2 if it begins with
+.Ql 0b
+or
+.Ql 0B ,
+in base 16 if it begins
 with
 .Ql 0x
 or
diff --git a/lib/libc/stdio/vfprintf.c b/lib/libc/stdio/vfprintf.c
index ad655c5d78d4..5e5a9b5e31c1 100644
--- a/lib/libc/stdio/vfprintf.c
+++ b/lib/libc/stdio/vfprintf.c
@@ -613,6 +613,19 @@ reswitch:	switch (ch) {
 		case 'z':
 			flags |= SIZET;
 			goto rflag;
+		case 'B':
+		case 'b':
+			if (flags & INTMAX_SIZE)
+				ujval = UJARG();
+			else
+				ulval = UARG();
+			base = 2;
+			/* leading 0b/B only if non-zero */
+			if (flags & ALT &&
+			    (flags & INTMAX_SIZE ? ujval != 0 : ulval != 0))
+				ox[1] = ch;
+			goto nosign;
+			break;
 		case 'C':
 			flags |= LONGINT;
 			/*FALLTHROUGH*/
diff --git a/lib/libc/stdio/vfscanf.c b/lib/libc/stdio/vfscanf.c
index cc2e1e428321..9727c9e70c34 100644
--- a/lib/libc/stdio/vfscanf.c
+++ b/lib/libc/stdio/vfscanf.c
@@ -6,6 +6,8 @@
  *
  * Copyright (c) 2011 The FreeBSD Foundation
  *
+ * Copyright (c) 2023 Dag-Erling Smørgrav
+ *
  * Portions of this software were developed by David Chisnall
  * under sponsorship from the FreeBSD Foundation.
  *
@@ -80,16 +82,6 @@ static char sccsid[] = "@(#)vfscanf.c	8.1 (Berkeley) 6/4/93";
 #define	SHORTSHORT	0x4000	/* hh: char */
 #define	UNSIGNED	0x8000	/* %[oupxX] conversions */
 
-/*
- * The following are used in integral conversions only:
- * SIGNOK, NDIGITS, PFXOK, and NZDIGITS
- */
-#define	SIGNOK		0x40	/* +/- is (still) legal */
-#define	NDIGITS		0x80	/* no digits detected */
-#define	PFXOK		0x100	/* 0x prefix is (still) legal */
-#define	NZDIGITS	0x200	/* no zero digits detected */
-#define	HAVESIGN	0x10000	/* sign detected */
-
 /*
  * Conversion types.
  */
@@ -307,129 +299,160 @@ convert_wstring(FILE *fp, wchar_t *wcp, int width, locale_t locale)
 	return (n);
 }
 
+enum parseint_state {
+	begin,
+	havesign,
+	havezero,
+	haveprefix,
+	any,
+};
+
+static __inline int
+parseint_fsm(int c, enum parseint_state *state, int *base)
+{
+	switch (c) {
+	case '+':
+	case '-':
+		if (*state == begin) {
+			*state = havesign;
+			return 1;
+		}
+		break;
+	case '0':
+		if (*state == begin || *state == havesign) {
+			*state = havezero;
+		} else {
+			*state = any;
+		}
+		return 1;
+	case '1':
+	case '2':
+	case '3':
+	case '4':
+	case '5':
+	case '6':
+	case '7':
+		if (*state == havezero && *base == 0) {
+			*base = 8;
+		}
+		/* FALL THROUGH */
+	case '8':
+	case '9':
+		if (*state == begin ||
+		    *state == havesign) {
+			if (*base == 0) {
+				*base = 10;
+			}
+		}
+		if (*state == begin ||
+		    *state == havesign ||
+		    *state == havezero ||
+		    *state == haveprefix ||
+		    *state == any) {
+			if (*base > c - '0') {
+				*state = any;
+				return 1;
+			}
+		}
+		break;
+	case 'b':
+		if (*state == havezero) {
+			if (*base == 0 || *base == 2) {
+				*state = haveprefix;
+				*base = 2;
+				return 1;
+			}
+		}
+		/* FALL THROUGH */
+	case 'a':
+	case 'c':
+	case 'd':
+	case 'e':
+	case 'f':
+		if (*state == begin ||
+		    *state == havesign ||
+		    *state == havezero ||
+		    *state == haveprefix ||
+		    *state == any) {
+			if (*base > c - 'a' + 10) {
+				*state = any;
+				return 1;
+			}
+		}
+		break;
+	case 'B':
+		if (*state == havezero) {
+			if (*base == 0 || *base == 2) {
+				*state = haveprefix;
+				*base = 2;
+				return 1;
+			}
+		}
+		/* FALL THROUGH */
+	case 'A':
+	case 'C':
+	case 'D':
+	case 'E':
+	case 'F':
+		if (*state == begin ||
+		    *state == havesign ||
+		    *state == havezero ||
+		    *state == haveprefix ||
+		    *state == any) {
+			if (*base > c - 'A' + 10) {
+				*state = any;
+				return 1;
+			}
+		}
+		break;
+	case 'x':
+	case 'X':
+		if (*state == havezero) {
+			if (*base == 0 || *base == 16) {
+				*state = haveprefix;
+				*base = 16;
+				return 1;
+			}
+		}
+		break;
+	}
+	return 0;
+}
+
 /*
- * Read an integer, storing it in buf.  The only relevant bit in the
- * flags argument is PFXOK.
+ * Read an integer, storing it in buf.
  *
  * Return 0 on a match failure, and the number of characters read
  * otherwise.
  */
 static __inline int
-parseint(FILE *fp, char * __restrict buf, int width, int base, int flags)
+parseint(FILE *fp, char * __restrict buf, int width, int base)
 {
-	/* `basefix' is used to avoid `if' tests */
-	static const short basefix[17] =
-		{ 10, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16 };
+	enum parseint_state state = begin;
 	char *p;
 	int c;
 
-	flags |= SIGNOK | NDIGITS | NZDIGITS;
 	for (p = buf; width; width--) {
-		c = *fp->_p;
-		/*
-		 * Switch on the character; `goto ok' if we accept it
-		 * as a part of number.
-		 */
-		switch (c) {
-
-		/*
-		 * The digit 0 is always legal, but is special.  For
-		 * %i conversions, if no digits (zero or nonzero) have
-		 * been scanned (only signs), we will have base==0.
-		 * In that case, we should set it to 8 and enable 0x
-		 * prefixing.  Also, if we have not scanned zero
-		 * digits before this, do not turn off prefixing
-		 * (someone else will turn it off if we have scanned
-		 * any nonzero digits).
-		 */
-		case '0':
-			if (base == 0) {
-				base = 8;
-				flags |= PFXOK;
-			}
-			if (flags & NZDIGITS)
-				flags &= ~(SIGNOK|NZDIGITS|NDIGITS);
-			else
-				flags &= ~(SIGNOK|PFXOK|NDIGITS);
-			goto ok;
-
-		/* 1 through 7 always legal */
-		case '1': case '2': case '3':
-		case '4': case '5': case '6': case '7':
-			base = basefix[base];
-			flags &= ~(SIGNOK | PFXOK | NDIGITS);
-			goto ok;
-
-		/* digits 8 and 9 ok iff decimal or hex */
-		case '8': case '9':
-			base = basefix[base];
-			if (base <= 8)
-				break;	/* not legal here */
-			flags &= ~(SIGNOK | PFXOK | NDIGITS);
-			goto ok;
-
-		/* letters ok iff hex */
-		case 'A': case 'B': case 'C':
-		case 'D': case 'E': case 'F':
-		case 'a': case 'b': case 'c':
-		case 'd': case 'e': case 'f':
-			/* no need to fix base here */
-			if (base <= 10)
-				break;	/* not legal here */
-			flags &= ~(SIGNOK | PFXOK | NDIGITS);
-			goto ok;
-
-		/* sign ok only as first character */
-		case '+': case '-':
-			if (flags & SIGNOK) {
-				flags &= ~SIGNOK;
-				flags |= HAVESIGN;
-				goto ok;
-			}
+		c = __sgetc(fp);
+		if (c == EOF)
 			break;
-
-		/*
-		 * x ok iff flag still set & 2nd char (or 3rd char if
-		 * we have a sign).
-		 */
-		case 'x': case 'X':
-			if (flags & PFXOK && p ==
-			    buf + 1 + !!(flags & HAVESIGN)) {
-				base = 16;	/* if %i */
-				flags &= ~PFXOK;
-				goto ok;
-			}
+		if (!parseint_fsm(c, &state, &base))
 			break;
-		}
-
-		/*
-		 * If we got here, c is not a legal character for a
-		 * number.  Stop accumulating digits.
-		 */
-		break;
-	ok:
-		/*
-		 * c is legal: store it and look at the next.
-		 */
 		*p++ = c;
-		if (--fp->_r > 0)
-			fp->_p++;
-		else if (__srefill(fp))
-			break;		/* EOF */
 	}
 	/*
-	 * If we had only a sign, it is no good; push back the sign.
-	 * If the number ends in `x', it was [sign] '0' 'x', so push
-	 * back the x and treat it as [sign] '0'.
+	 * If we only had a sign, push it back.  If we only had a 0b or 0x
+	 * prefix (possibly preceded by a sign), we view it as "0" and
+	 * push back the letter.  In all other cases, if we stopped
+	 * because we read a non-number character, push it back.
 	 */
-	if (flags & NDIGITS) {
-		if (p > buf)
-			(void) __ungetc(*(u_char *)--p, fp);
-		return (0);
-	}
-	c = ((u_char *)p)[-1];
-	if (c == 'x' || c == 'X') {
-		--p;
+	if (state == havesign) {
+		p--;
+		(void) __ungetc(*(u_char *)p, fp);
+	} else if (state == haveprefix) {
+		p--;
+		(void) __ungetc(c, fp);
+	} else if (width && c != EOF) {
 		(void) __ungetc(c, fp);
 	}
 	return (p - buf);
@@ -554,6 +577,13 @@ literal:
 		/*
 		 * Conversions.
 		 */
+		case 'B':
+		case 'b':
+			c = CT_INT;
+			flags |= UNSIGNED;
+			base = 2;
+			break;
+
 		case 'd':
 			c = CT_INT;
 			base = 10;
@@ -578,7 +608,6 @@ literal:
 
 		case 'X':
 		case 'x':
-			flags |= PFXOK;	/* enable 0x prefixing */
 			c = CT_INT;
 			flags |= UNSIGNED;
 			base = 16;
@@ -613,7 +642,7 @@ literal:
 			break;
 
 		case 'p':	/* pointer format is like hex */
-			flags |= POINTER | PFXOK;
+			flags |= POINTER;
 			c = CT_INT;		/* assumes sizeof(uintmax_t) */
 			flags |= UNSIGNED;	/*      >= sizeof(uintptr_t) */
 			base = 16;
@@ -738,7 +767,7 @@ literal:
 				width = sizeof(buf) - 2;
 			width++;
 #endif
-			nr = parseint(fp, buf, width, base, flags);
+			nr = parseint(fp, buf, width, base);
 			if (nr == 0)
 				goto match_failure;
 			if ((flags & SUPPRESS) == 0) {
diff --git a/lib/libc/stdio/vfwprintf.c b/lib/libc/stdio/vfwprintf.c
index fc681e8d0575..259a86467ea7 100644
--- a/lib/libc/stdio/vfwprintf.c
+++ b/lib/libc/stdio/vfwprintf.c
@@ -684,6 +684,19 @@ reswitch:	switch (ch) {
 		case 'z':
 			flags |= SIZET;
 			goto rflag;
+		case 'B':
+		case 'b':
+			if (flags & INTMAX_SIZE)
+				ujval = UJARG();
+			else
+				ulval = UARG();
+			base = 2;
+			/* leading 0b/B only if non-zero */
+			if (flags & ALT &&
+			    (flags & INTMAX_SIZE ? ujval != 0 : ulval != 0))
+				ox[1] = ch;
+			goto nosign;
+			break;
 		case 'C':
 			flags |= LONGINT;
 			/*FALLTHROUGH*/
diff --git a/lib/libc/stdio/vfwscanf.c b/lib/libc/stdio/vfwscanf.c
index 1a28ff665247..b03c9dba0699 100644
--- a/lib/libc/stdio/vfwscanf.c
+++ b/lib/libc/stdio/vfwscanf.c
@@ -9,6 +9,8 @@
  *
  * Copyright (c) 2011 The FreeBSD Foundation
  *
+ * Copyright (c) 2023 Dag-Erling Smørgrav
+ *
  * Portions of this software were developed by David Chisnall
  * under sponsorship from the FreeBSD Foundation.
  *
@@ -78,16 +80,6 @@ static char sccsid[] = "@(#)vfscanf.c	8.1 (Berkeley) 6/4/93";
 #define	SHORTSHORT	0x4000	/* hh: char */
 #define	UNSIGNED	0x8000	/* %[oupxX] conversions */
 
-/*
- * The following are used in integral conversions only:
- * SIGNOK, NDIGITS, PFXOK, and NZDIGITS
- */
-#define	SIGNOK		0x40	/* +/- is (still) legal */
-#define	NDIGITS		0x80	/* no digits detected */
-#define	PFXOK		0x100	/* 0x prefix is (still) legal */
-#define	NZDIGITS	0x200	/* no zero digits detected */
-#define	HAVESIGN	0x10000	/* sign detected */
-
 /*
  * Conversion types.
  */
@@ -289,128 +281,161 @@ convert_wstring(FILE *fp, wchar_t *wcp, int width, locale_t locale)
 	return (nread);
 }
 
+enum parseint_state {
+	begin,
+	havesign,
+	havezero,
+	haveprefix,
+	any,
+};
+
+static __inline int
+parseint_fsm(wchar_t c, enum parseint_state *state, int *base)
+{
+	switch (c) {
+	case '+':
+	case '-':
+		if (*state == begin) {
+			*state = havesign;
+			return 1;
+		}
+		break;
+	case '0':
+		if (*state == begin || *state == havesign) {
+			*state = havezero;
+		} else {
+			*state = any;
+		}
+		return 1;
+	case '1':
+	case '2':
+	case '3':
+	case '4':
+	case '5':
+	case '6':
+	case '7':
+		if (*state == havezero && *base == 0) {
+			*base = 8;
+		}
+		/* FALL THROUGH */
+	case '8':
+	case '9':
+		if (*state == begin ||
+		    *state == havesign) {
+			if (*base == 0) {
+				*base = 10;
+			}
+		}
+		if (*state == begin ||
+		    *state == havesign ||
+		    *state == havezero ||
+		    *state == haveprefix ||
+		    *state == any) {
+			if (*base > c - '0') {
+				*state = any;
+				return 1;
+			}
+		}
+		break;
+	case 'b':
+		if (*state == havezero) {
+			if (*base == 0 || *base == 2) {
+				*state = haveprefix;
+				*base = 2;
+				return 1;
+			}
+		}
+		/* FALL THROUGH */
+	case 'a':
+	case 'c':
+	case 'd':
+	case 'e':
+	case 'f':
+		if (*state == begin ||
+		    *state == havesign ||
+		    *state == havezero ||
+		    *state == haveprefix ||
+		    *state == any) {
+			if (*base > c - 'a' + 10) {
+				*state = any;
+				return 1;
+			}
+		}
+		break;
+	case 'B':
+		if (*state == havezero) {
+			if (*base == 0 || *base == 2) {
+				*state = haveprefix;
*** 1248 LINES SKIPPED ***



Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?202309072028.387KSjrr064522>