Skip site navigation (1)Skip section navigation (2)
Date:      Sun, 8 Apr 2001 22:53:20 -0700
From:      "Chris L. Bond" <cbond@twistedcircus.org>
To:        freebsd-hackers@freebsd.org
Subject:   [PATCH] Automatic string allocation for scanf() family
Message-ID:  <20010408225320.A8157@twistedcircus.org>

next in thread | raw e-mail | index | archive | help

--9jxsPFA5p3P2qPhR
Content-Type: text/plain; charset=us-ascii
Content-Disposition: inline

Perhaps not in the true Unix spirit of fixed-size buffers, I've
written a patch to add automatic storage allocation to the
scanf() family of functions when dealing with strings.  I later
found out that GNU had a very similar interface in its glibc
scanf() functions and made sure that behavior was similar in
an effort to maximize portability.  The result is attached.

With this patch, something like this is now possible:

  char *buf;
  sscanf(string, "%as bar", &buf);
or
  sscanf(string, "%a[^ ]", &buf);

  (providing string was "foo bar", buf would now contain "foo").

Resulting strings can be freed by free(3).

The only difference between my implementation and the one in
GNU libc is that glibc will return with an error if one of the
stdlib memory allocation functions fails, whereas this will
continue to run through the string and simply place a NULL at
the address where the resulting string would normally have gone.
This could be changed if one were so inclined.

If this sort of thing is frowned upon for whatever reason, feel
free to ignore this message -- I just thought it may be useful.

(By the way, this patch is meant for RELENG_4, not HEAD.)

--9jxsPFA5p3P2qPhR
Content-Type: text/plain; charset=us-ascii
Content-Disposition: attachment; filename="vfscanf-as.patch"

--- /usr/src/lib/libc/stdio/vfscanf.c.orig	Sun Apr  8 21:19:16 2001
+++ /usr/src/lib/libc/stdio/vfscanf.c	Sun Apr  8 21:19:21 2001
@@ -69,6 +69,7 @@
 #define	SUPPRESS	0x08	/* suppress assignment */
 #define	POINTER		0x10	/* weird %p pointer (`fake hex') */
 #define	NOSKIP		0x20	/* do not skip blanks */
+#define	ALLOC		0x800	/* allocate space for string assignment */
 #define	QUAD		0x400
 
 /*
@@ -170,6 +171,9 @@
 		case 'L':
 			flags |= LONGDBL;
 			goto again;
+		case 'a':
+			flags |= ALLOC;
+			goto again;
 		case 'h':
 			flags |= SHORT;
 			goto again;
@@ -361,7 +365,7 @@
 			if (flags & SUPPRESS) {
 				n = 0;
 				while (ccltab[*fp->_p]) {
-					n++, fp->_r--, fp->_p++;
+					n++, nread++, fp->_r--, fp->_p++;
 					if (--width == 0)
 						break;
 					if (fp->_r <= 0 && __srefill(fp)) {
@@ -373,10 +377,32 @@
 				if (n == 0)
 					goto match_failure;
 			} else {
-				p0 = p = va_arg(ap, char *);
+				if (flags & ALLOC) {
+					/*
+					 * 64 is optimistic.  If malloc fails,
+					 * we'll skip this string as if SUPPRESS
+					 * was set in ``flags."
+					 */
+					n = 64;
+					p0 = p = (char *)malloc(n);
+				} else
+					p0 = p = va_arg(ap, char *);
+
 				while (ccltab[*fp->_p]) {
 					fp->_r--;
-					*p++ = *fp->_p++;
+					if (p0)
+						*p++ = *fp->_p;
+					fp->_p++, nread++;
+					if (flags & ALLOC && p0 && p == p0 + n){
+						size_t p0_off = p - p0;
+						/*
+						 * Retain the offset of p, and
+						 * reallocate p0.
+						 */
+						n += 64;
+						p0 = (char *)reallocf(p0, n);
+						p  = p0 + p0_off;
+					}
 					if (--width == 0)
 						break;
 					if (fp->_r <= 0 && __srefill(fp)) {
@@ -385,13 +411,14 @@
 						break;
 					}
 				}
-				n = p - p0;
-				if (n == 0)
+				if (p == p0)
 					goto match_failure;
-				*p = 0;
+				if (p0)
+					*p = 0;
+				if (flags & ALLOC)
+					*va_arg(ap, char **) = p0;
 				nassigned++;
 			}
-			nread += n;
 			nconversions++;
 			break;
 
@@ -410,17 +437,36 @@
 				}
 				nread += n;
 			} else {
-				p0 = p = va_arg(ap, char *);
+				if (flags & ALLOC) {
+					n = 64;
+					p0 = p = (char *)malloc(n);
+				} else
+					p0 = p = va_arg(ap, char *);
+
 				while (!isspace(*fp->_p)) {
 					fp->_r--;
-					*p++ = *fp->_p++;
+					if (p0)
+						*p++ = *fp->_p;
+					fp->_p++, nread++;
+					if (flags & ALLOC && p0 && p == p0 + n){
+						size_t p0_off = p - p0;
+						/*
+						 * We ran out of buffer;
+						 * reallocate.
+						 */
+						n += 64;
+						p0 = (char *)reallocf(p0, n);
+						p  = p0 + p0_off;
+					}
 					if (--width == 0)
 						break;
 					if (fp->_r <= 0 && __srefill(fp))
 						break;
 				}
-				*p = 0;
-				nread += p - p0;
+				if (p0)
+					*p = 0;
+				if (flags & ALLOC)
+					*va_arg(ap, char **) = p0;
 				nassigned++;
 			}
 			nconversions++;
--- /usr/src/lib/libc/stdio/scanf.3.orig	Sun Apr  8 22:37:14 2001
+++ /usr/src/lib/libc/stdio/scanf.3	Sun Apr  8 22:26:18 2001
@@ -174,6 +174,12 @@
 (This type is not implemented; the
 .Cm L
 flag is currently ignored.)
+.It Cm a
+(Applicable only to string conversions; see below.)  Indicates that storage for
+any resulting string should be retrieved automatically.  The next pointer must
+be a pointer to a string
+.Em ( "char *" ) ;
+a pointer to the newly allocated storage will be placed at this address.
 .It Cm q
 Indicates either that the conversion will be one of
 .Cm dioux

--9jxsPFA5p3P2qPhR--

To Unsubscribe: send mail to majordomo@FreeBSD.org
with "unsubscribe freebsd-hackers" in the body of the message




Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?20010408225320.A8157>