Date: Sat, 4 Apr 2009 13:06:33 +0200 From: Jilles Tjoelker <jilles@stack.nl> To: bug-followup@FreeBSD.org, vsevolod@highsecure.ru, freebsd-standards@freebsd.org, stefanf@freebsd.org Subject: Re: standards/79067: /bin/sh should be more intelligent about IFS Message-ID: <20090404110633.GA25666@stack.nl>
next in thread | raw e-mail | index | archive | help
--3V7upXqbjpZ4EhLz Content-Type: text/plain; charset=us-ascii Content-Disposition: inline A patch similar to the 'read' part of this one was committed to -CURRENT. The other part of the patch in the PR is a bit different from NetBSD, and is wrong: it drops a final empty argument in "$@". For example, set -- a ''; set -- "$@"; echo $# should give 2, but gives 1. So I put in the NetBSD code, which does not have this issue (sh-ifs-expand-netbsd.patch). Additionally, it cleans up the code a bit. However, the NetBSD code has a related issue: it drops a final empty argument in "$@"$s where $s contains one or more characters of IFS whitespace. For example, s=' '; set -- a ''; set -- "$@"; echo $# should give 2, but gives 1. A fix for this is in sh-ifs-expand.patch. With the read change and both patches, the tests from http://www.research.att.com/~gsf/public/ifs.sh all pass. Some more tests with "$@" and IFS are in moreifs.sh. -- Jilles Tjoelker --3V7upXqbjpZ4EhLz Content-Type: text/plain; charset=us-ascii Content-Disposition: attachment; filename="sh-ifs-expand-netbsd.patch" --- src/bin/sh/expand.c.orig 2008-09-04 19:34:53.000000000 +0200 +++ src/bin/sh/expand.c 2009-04-04 01:32:07.000000000 +0200 @@ -82,7 +82,7 @@ struct ifsregion *next; /* next region in list */ int begoff; /* offset of start of region */ int endoff; /* offset of end of region */ - int nulonly; /* search for nul bytes only */ + int inquotes; /* search for nul bytes only */ }; @@ -936,13 +936,19 @@ */ STATIC void -recordregion(int start, int end, int nulonly) +recordregion(int start, int end, int inquotes) { struct ifsregion *ifsp; if (ifslastp == NULL) { ifsp = &ifsfirst; } else { + if (ifslastp->endoff == start + && ifslastp->inquotes == inquotes) { + /* extend previous area */ + ifslastp->endoff = end; + return; + } ifsp = (struct ifsregion *)ckmalloc(sizeof (struct ifsregion)); ifslastp->next = ifsp; } @@ -950,7 +956,7 @@ ifslastp->next = NULL; ifslastp->begoff = start; ifslastp->endoff = end; - ifslastp->nulonly = nulonly; + ifslastp->inquotes = inquotes; } @@ -969,75 +975,88 @@ char *p; char *q; char *ifs; - int ifsspc; - int nulonly; - + const char *ifsspc; + int had_param_ch = 0; start = string; - ifsspc = 0; - nulonly = 0; - if (ifslastp != NULL) { - ifsp = &ifsfirst; - do { - p = string + ifsp->begoff; - nulonly = ifsp->nulonly; - ifs = nulonly ? nullstr : - ( ifsset() ? ifsval() : " \t\n" ); - ifsspc = 0; - while (p < string + ifsp->endoff) { - q = p; - if (*p == CTLESC) + + if (ifslastp == NULL) { + /* Return entire argument, IFS doesn't apply to any of it */ + sp = (struct strlist *)stalloc(sizeof *sp); + sp->text = start; + *arglist->lastp = sp; + arglist->lastp = &sp->next; + return; + } + + ifs = ifsset() ? ifsval() : " \t\n"; + + for (ifsp = &ifsfirst; ifsp != NULL; ifsp = ifsp->next) { + p = string + ifsp->begoff; + while (p < string + ifsp->endoff) { + had_param_ch = 1; + q = p; + if (*p == CTLESC) + p++; + if (ifsp->inquotes) { + /* Only NULs (should be from "$@") end args */ + if (*p != 0) { p++; - if (strchr(ifs, *p)) { - if (!nulonly) - ifsspc = (strchr(" \t\n", *p) != NULL); - /* Ignore IFS whitespace at start */ - if (q == start && ifsspc) { - p++; - start = p; - continue; - } - *q = '\0'; - sp = (struct strlist *)stalloc(sizeof *sp); - sp->text = start; - *arglist->lastp = sp; - arglist->lastp = &sp->next; + continue; + } + ifsspc = NULL; + } else { + if (!strchr(ifs, *p)) { p++; - if (!nulonly) { - for (;;) { - if (p >= string + ifsp->endoff) { - break; - } - q = p; - if (*p == CTLESC) - p++; - if (strchr(ifs, *p) == NULL ) { - p = q; - break; - } else if (strchr(" \t\n",*p) == NULL) { - if (ifsspc) { - p++; - ifsspc = 0; - } else { - p = q; - break; - } - } else - p++; - } - } - start = p; - } else + continue; + } + had_param_ch = 0; + ifsspc = strchr(" \t\n", *p); + + /* Ignore IFS whitespace at start */ + if (q == start && ifsspc != NULL) { p++; + start = p; + continue; + } } - } while ((ifsp = ifsp->next) != NULL); - if (*start || (!ifsspc && start > string)) { + + /* Save this argument... */ + *q = '\0'; sp = (struct strlist *)stalloc(sizeof *sp); sp->text = start; *arglist->lastp = sp; arglist->lastp = &sp->next; + p++; + + if (ifsspc != NULL) { + /* Ignore further trailing IFS whitespace */ + for (; p < string + ifsp->endoff; p++) { + q = p; + if (*p == CTLESC) + p++; + if (strchr(ifs, *p) == NULL) { + p = q; + break; + } + if (strchr(" \t\n", *p) == NULL) { + p++; + break; + } + } + } + start = p; } - } else { + } + + /* + * Save anything left as an argument. + * Traditionally we have treated 'IFS=':'; set -- x$IFS' as + * generating 2 arguments, the second of which is empty. + * Some recent clarification of the Posix spec say that it + * should only generate one.... + */ + if (had_param_ch || *start != 0) { sp = (struct strlist *)stalloc(sizeof *sp); sp->text = start; *arglist->lastp = sp; --3V7upXqbjpZ4EhLz Content-Type: text/plain; charset=us-ascii Content-Disposition: attachment; filename="sh-ifs-expand.patch" --- src/bin/sh/expand.c.netbsd 2009-04-04 01:32:07.000000000 +0200 +++ src/bin/sh/expand.c 2009-04-04 02:01:54.000000000 +0200 @@ -994,12 +994,12 @@ for (ifsp = &ifsfirst; ifsp != NULL; ifsp = ifsp->next) { p = string + ifsp->begoff; while (p < string + ifsp->endoff) { - had_param_ch = 1; q = p; if (*p == CTLESC) p++; if (ifsp->inquotes) { /* Only NULs (should be from "$@") end args */ + had_param_ch = 1; if (*p != 0) { p++; continue; @@ -1007,10 +1007,10 @@ ifsspc = NULL; } else { if (!strchr(ifs, *p)) { + had_param_ch = 1; p++; continue; } - had_param_ch = 0; ifsspc = strchr(" \t\n", *p); /* Ignore IFS whitespace at start */ @@ -1019,6 +1019,7 @@ start = p; continue; } + had_param_ch = 0; } /* Save this argument... */ --3V7upXqbjpZ4EhLz Content-Type: text/plain; charset=us-ascii Content-Disposition: attachment; filename="moreifs.sh" #!/bin/sh # Copyright (c) 2009 Jilles Tjoelker # All rights reserved. # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions # are met: # 1. Redistributions of source code must retain the above copyright # notice, this list of conditions and the following disclaimer. # 2. Redistributions in binary form must reproduce the above copyright # notice, this list of conditions and the following disclaimer in the # documentation and/or other materials provided with the distribution. # # THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND # ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE # ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE # FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL # DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS # OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) # HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT # LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY # OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF # SUCH DAMAGE. # c=: e= s=' ' failures='' ok='' check_result() { if [ "x$2" = "x$3" ]; then ok=x$ok else failures=x$failures echo "For $1, expected $3 actual $2" fi } IFS=' ' set -- a '' set -- "$@" check_result 'set -- "$@"' "($#)($1)($2)" "(2)(a)()" set -- a '' set -- "$@"$e check_result 'set -- "$@"$e' "($#)($1)($2)" "(2)(a)()" set -- a '' set -- "$@"$s check_result 'set -- "$@"$s' "($#)($1)($2)" "(2)(a)()" IFS="$c" set -- a '' set -- "$@"$c check_result 'set -- "$@"$c' "($#)($1)($2)" "(2)(a)()" test "x$failures" = x --3V7upXqbjpZ4EhLz--
Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?20090404110633.GA25666>