Skip site navigation (1)Skip section navigation (2)
Date:      Sat, 4 Apr 2009 13:06:33 +0200
From:      Jilles Tjoelker <jilles@stack.nl>
To:        bug-followup@FreeBSD.org, vsevolod@highsecure.ru, freebsd-standards@freebsd.org, stefanf@freebsd.org
Subject:   Re: standards/79067: /bin/sh should be more intelligent about IFS
Message-ID:  <20090404110633.GA25666@stack.nl>

next in thread | raw e-mail | index | archive | help

--3V7upXqbjpZ4EhLz
Content-Type: text/plain; charset=us-ascii
Content-Disposition: inline

A patch similar to the 'read' part of this one was committed to
-CURRENT.

The other part of the patch in the PR is a bit different from NetBSD,
and is wrong: it drops a final empty argument in "$@". For example,
set -- a ''; set -- "$@"; echo $#  should give 2, but gives 1.

So I put in the NetBSD code, which does not have this issue
(sh-ifs-expand-netbsd.patch). Additionally, it cleans up the code a bit.

However, the NetBSD code has a related issue: it drops a final empty
argument in "$@"$s where $s contains one or more characters of IFS
whitespace.  For example,  s=' '; set -- a ''; set -- "$@"; echo $#
should give 2, but gives 1. A fix for this is in sh-ifs-expand.patch.

With the read change and both patches, the tests from
http://www.research.att.com/~gsf/public/ifs.sh all pass. Some more tests
with "$@" and IFS are in moreifs.sh.

-- 
Jilles Tjoelker

--3V7upXqbjpZ4EhLz
Content-Type: text/plain; charset=us-ascii
Content-Disposition: attachment; filename="sh-ifs-expand-netbsd.patch"

--- src/bin/sh/expand.c.orig	2008-09-04 19:34:53.000000000 +0200
+++ src/bin/sh/expand.c	2009-04-04 01:32:07.000000000 +0200
@@ -82,7 +82,7 @@
 	struct ifsregion *next;	/* next region in list */
 	int begoff;		/* offset of start of region */
 	int endoff;		/* offset of end of region */
-	int nulonly;		/* search for nul bytes only */
+	int inquotes;		/* search for nul bytes only */
 };
 
 
@@ -936,13 +936,19 @@
  */
 
 STATIC void
-recordregion(int start, int end, int nulonly)
+recordregion(int start, int end, int inquotes)
 {
 	struct ifsregion *ifsp;
 
 	if (ifslastp == NULL) {
 		ifsp = &ifsfirst;
 	} else {
+		if (ifslastp->endoff == start
+		    && ifslastp->inquotes == inquotes) {
+			/* extend previous area */
+			ifslastp->endoff = end;
+			return;
+		}
 		ifsp = (struct ifsregion *)ckmalloc(sizeof (struct ifsregion));
 		ifslastp->next = ifsp;
 	}
@@ -950,7 +956,7 @@
 	ifslastp->next = NULL;
 	ifslastp->begoff = start;
 	ifslastp->endoff = end;
-	ifslastp->nulonly = nulonly;
+	ifslastp->inquotes = inquotes;
 }
 
 
@@ -969,75 +975,88 @@
 	char *p;
 	char *q;
 	char *ifs;
-	int ifsspc;
-	int nulonly;
-
+	const char *ifsspc;
+	int had_param_ch = 0;
 
 	start = string;
-	ifsspc = 0;
-	nulonly = 0;
-	if (ifslastp != NULL) {
-		ifsp = &ifsfirst;
-		do {
-			p = string + ifsp->begoff;
-			nulonly = ifsp->nulonly;
-			ifs = nulonly ? nullstr :
-				( ifsset() ? ifsval() : " \t\n" );
-			ifsspc = 0;
-			while (p < string + ifsp->endoff) {
-				q = p;
-				if (*p == CTLESC)
+
+	if (ifslastp == NULL) {
+		/* Return entire argument, IFS doesn't apply to any of it */
+		sp = (struct strlist *)stalloc(sizeof *sp);
+		sp->text = start;
+		*arglist->lastp = sp;
+		arglist->lastp = &sp->next;
+		return;
+	}
+
+	ifs = ifsset() ? ifsval() : " \t\n";
+
+	for (ifsp = &ifsfirst; ifsp != NULL; ifsp = ifsp->next) {
+		p = string + ifsp->begoff;
+		while (p < string + ifsp->endoff) {
+			had_param_ch = 1;
+			q = p;
+			if (*p == CTLESC)
+				p++;
+			if (ifsp->inquotes) {
+				/* Only NULs (should be from "$@") end args */
+				if (*p != 0) {
 					p++;
-				if (strchr(ifs, *p)) {
-					if (!nulonly)
-						ifsspc = (strchr(" \t\n", *p) != NULL);
-					/* Ignore IFS whitespace at start */
-					if (q == start && ifsspc) {
-						p++;
-						start = p;
-						continue;
-					}
-					*q = '\0';
-					sp = (struct strlist *)stalloc(sizeof *sp);
-					sp->text = start;
-					*arglist->lastp = sp;
-					arglist->lastp = &sp->next;
+					continue;
+				}
+				ifsspc = NULL;
+			} else {
+				if (!strchr(ifs, *p)) {
 					p++;
-					if (!nulonly) {
-						for (;;) {
-							if (p >= string + ifsp->endoff) {
-								break;
-							}
-							q = p;
-							if (*p == CTLESC)
-								p++;
-							if (strchr(ifs, *p) == NULL ) {
-								p = q;
-								break;
-							} else if (strchr(" \t\n",*p) == NULL) {
-								if (ifsspc) {
-									p++;
-									ifsspc = 0;
-								} else {
-									p = q;
-									break;
-								}
-							} else
-								p++;
-						}
-					}
-					start = p;
-				} else
+					continue;
+				}
+				had_param_ch = 0;
+				ifsspc = strchr(" \t\n", *p);
+
+				/* Ignore IFS whitespace at start */
+				if (q == start && ifsspc != NULL) {
 					p++;
+					start = p;
+					continue;
+				}
 			}
-		} while ((ifsp = ifsp->next) != NULL);
-		if (*start || (!ifsspc && start > string)) {
+
+			/* Save this argument... */
+			*q = '\0';
 			sp = (struct strlist *)stalloc(sizeof *sp);
 			sp->text = start;
 			*arglist->lastp = sp;
 			arglist->lastp = &sp->next;
+			p++;
+
+			if (ifsspc != NULL) {
+				/* Ignore further trailing IFS whitespace */
+				for (; p < string + ifsp->endoff; p++) {
+					q = p;
+					if (*p == CTLESC)
+						p++;
+					if (strchr(ifs, *p) == NULL) {
+						p = q;
+						break;
+					}
+					if (strchr(" \t\n", *p) == NULL) {
+						p++;
+						break;
+					}
+				}
+			}
+			start = p;
 		}
-	} else {
+	}
+
+	/*
+	 * Save anything left as an argument.
+	 * Traditionally we have treated 'IFS=':'; set -- x$IFS' as
+	 * generating 2 arguments, the second of which is empty.
+	 * Some recent clarification of the Posix spec say that it
+	 * should only generate one....
+	 */
+	if (had_param_ch || *start != 0) {
 		sp = (struct strlist *)stalloc(sizeof *sp);
 		sp->text = start;
 		*arglist->lastp = sp;

--3V7upXqbjpZ4EhLz
Content-Type: text/plain; charset=us-ascii
Content-Disposition: attachment; filename="sh-ifs-expand.patch"

--- src/bin/sh/expand.c.netbsd	2009-04-04 01:32:07.000000000 +0200
+++ src/bin/sh/expand.c	2009-04-04 02:01:54.000000000 +0200
@@ -994,12 +994,12 @@
 	for (ifsp = &ifsfirst; ifsp != NULL; ifsp = ifsp->next) {
 		p = string + ifsp->begoff;
 		while (p < string + ifsp->endoff) {
-			had_param_ch = 1;
 			q = p;
 			if (*p == CTLESC)
 				p++;
 			if (ifsp->inquotes) {
 				/* Only NULs (should be from "$@") end args */
+				had_param_ch = 1;
 				if (*p != 0) {
 					p++;
 					continue;
@@ -1007,10 +1007,10 @@
 				ifsspc = NULL;
 			} else {
 				if (!strchr(ifs, *p)) {
+					had_param_ch = 1;
 					p++;
 					continue;
 				}
-				had_param_ch = 0;
 				ifsspc = strchr(" \t\n", *p);
 
 				/* Ignore IFS whitespace at start */
@@ -1019,6 +1019,7 @@
 					start = p;
 					continue;
 				}
+				had_param_ch = 0;
 			}
 
 			/* Save this argument... */

--3V7upXqbjpZ4EhLz
Content-Type: text/plain; charset=us-ascii
Content-Disposition: attachment; filename="moreifs.sh"

#!/bin/sh

# Copyright (c) 2009 Jilles Tjoelker
# All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
# 1. Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
# 2. Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#
# THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
# ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
# OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
# HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
# LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
# OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
# SUCH DAMAGE.
#

c=: e= s=' '
failures=''
ok=''

check_result() {
	if [ "x$2" = "x$3" ]; then
		ok=x$ok
	else
		failures=x$failures
		echo "For $1, expected $3 actual $2"
	fi
}

IFS=' 	
'
set -- a ''
set -- "$@"
check_result 'set -- "$@"' "($#)($1)($2)" "(2)(a)()"

set -- a ''
set -- "$@"$e
check_result 'set -- "$@"$e' "($#)($1)($2)" "(2)(a)()"

set -- a ''
set -- "$@"$s
check_result 'set -- "$@"$s' "($#)($1)($2)" "(2)(a)()"

IFS="$c"
set -- a ''
set -- "$@"$c
check_result 'set -- "$@"$c' "($#)($1)($2)" "(2)(a)()"

test "x$failures" = x

--3V7upXqbjpZ4EhLz--



Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?20090404110633.GA25666>