Skip site navigation (1)Skip section navigation (2)
Date:      Sat, 3 Apr 2010 21:00:17 GMT
From:      dfilter@FreeBSD.ORG (dfilter service)
To:        freebsd-bugs@FreeBSD.org
Subject:   Re: bin/57554: commit references a PR
Message-ID:  <201004032100.o33L0H68084939@freefall.freebsd.org>

next in thread | raw e-mail | index | archive | help
The following reply was made to PR bin/57554; it has been noted by GNATS.

From: dfilter@FreeBSD.ORG (dfilter service)
To: bug-followup@FreeBSD.org
Cc:  
Subject: Re: bin/57554: commit references a PR
Date: Sat,  3 Apr 2010 20:56:11 +0000 (UTC)

 Author: jilles
 Date: Sat Apr  3 20:55:56 2010
 New Revision: 206145
 URL: http://svn.freebsd.org/changeset/base/206145
 
 Log:
   sh: Fix various things about expansions:
   * remove the backslash from \} inside double quotes inside +-=?
     substitutions, e.g. "${$+\}a}"
   * maintain separate double-quote state for ${v#...} and ${v%...};
     single and double quotes are special inside, even in a double-quoted
     string or here document
   * keep track of correct order of substitutions and arithmetic
   
   This is different from dash's approach, which does not track individual
   double quotes in the parser, trying to fix this up during expansion.
   This treats single quotes inside "${v#...}" incorrectly, however.
   
   This is similar to NetBSD's approach (as submitted in PR bin/57554), but
   recognizes the difference between +-=? and #% substitutions hinted at in
   POSIX and is more refined for arithmetic expansion and here documents.
   
   PR:		bin/57554
   Exp-run done by:	erwin (with some other sh(1) changes)
 
 Added:
   head/tools/regression/bin/sh/expansion/plus-minus2.0   (contents, props changed)
   head/tools/regression/bin/sh/parser/heredoc2.0   (contents, props changed)
 Modified:
   head/bin/sh/parser.c
 
 Modified: head/bin/sh/parser.c
 ==============================================================================
 --- head/bin/sh/parser.c	Sat Apr  3 20:35:39 2010	(r206144)
 +++ head/bin/sh/parser.c	Sat Apr  3 20:55:56 2010	(r206145)
 @@ -79,6 +79,10 @@ struct heredoc {
  	int striptabs;		/* if set, strip leading tabs */
  };
  
 +struct parser_temp {
 +	struct parser_temp *next;
 +	void *data;
 +};
  
  
  STATIC struct heredoc *heredoclist;	/* list of here documents to read */
 @@ -94,6 +98,7 @@ STATIC struct heredoc *heredoc;
  STATIC int quoteflag;		/* set if (part of) last token was quoted */
  STATIC int startlinno;		/* line # where last token started */
  STATIC int funclinno;		/* line # where the current function started */
 +STATIC struct parser_temp *parser_temp;
  
  /* XXX When 'noaliases' is set to one, no alias expansion takes place. */
  static int noaliases = 0;
 @@ -117,6 +122,73 @@ STATIC void synerror(const char *);
  STATIC void setprompt(int);
  
  
 +STATIC void *
 +parser_temp_alloc(size_t len)
 +{
 +	struct parser_temp *t;
 +
 +	INTOFF;
 +	t = ckmalloc(sizeof(*t));
 +	t->data = NULL;
 +	t->next = parser_temp;
 +	parser_temp = t;
 +	t->data = ckmalloc(len);
 +	INTON;
 +	return t->data;
 +}
 +
 +
 +STATIC void *
 +parser_temp_realloc(void *ptr, size_t len)
 +{
 +	struct parser_temp *t;
 +
 +	INTOFF;
 +	t = parser_temp;
 +	if (ptr != t->data)
 +		error("bug: parser_temp_realloc misused");
 +	t->data = ckrealloc(t->data, len);
 +	INTON;
 +	return t->data;
 +}
 +
 +
 +STATIC void
 +parser_temp_free_upto(void *ptr)
 +{
 +	struct parser_temp *t;
 +	int done = 0;
 +
 +	INTOFF;
 +	while (parser_temp != NULL && !done) {
 +		t = parser_temp;
 +		parser_temp = t->next;
 +		done = t->data == ptr;
 +		ckfree(t->data);
 +		ckfree(t);
 +	}
 +	INTON;
 +	if (!done)
 +		error("bug: parser_temp_free_upto misused");
 +}
 +
 +
 +STATIC void
 +parser_temp_free_all(void)
 +{
 +	struct parser_temp *t;
 +
 +	INTOFF;
 +	while (parser_temp != NULL) {
 +		t = parser_temp;
 +		parser_temp = t->next;
 +		ckfree(t->data);
 +		ckfree(t);
 +	}
 +	INTON;
 +}
 +
 +
  /*
   * Read and parse a command.  Returns NEOF on end of file.  (NULL is a
   * valid parse tree indicating a blank line.)
 @@ -127,6 +199,11 @@ parsecmd(int interact)
  {
  	int t;
  
 +	/* This assumes the parser is not re-entered,
 +	 * which could happen if we add command substitution on PS1/PS2.
 +	 */
 +	parser_temp_free_all();
 +
  	tokpushback = 0;
  	doprompt = interact;
  	if (doprompt)
 @@ -863,6 +940,21 @@ breakloop:
  }
  
  
 +#define MAXNEST_STATIC 8
 +struct tokenstate
 +{
 +	const char *syntax; /* *SYNTAX */
 +	int parenlevel; /* levels of parentheses in arithmetic */
 +	enum tokenstate_category
 +	{
 +		TSTATE_TOP,
 +		TSTATE_VAR_OLD, /* ${var+-=?}, inherits dquotes */
 +		TSTATE_VAR_NEW, /* other ${var...}, own dquote state */
 +		TSTATE_ARITH
 +	} category;
 +};
 +
 +
  /*
   * Called to parse command substitutions.
   */
 @@ -1040,7 +1132,7 @@ done:
  #define	PARSEARITH()	{goto parsearith; parsearith_return:;}
  
  STATIC int
 -readtoken1(int firstc, char const *syntax, char *eofmark, int striptabs)
 +readtoken1(int firstc, char const *initialsyntax, char *eofmark, int striptabs)
  {
  	int c = firstc;
  	char *out;
 @@ -1048,22 +1140,21 @@ readtoken1(int firstc, char const *synta
  	char line[EOFMARKLEN + 1];
  	struct nodelist *bqlist;
  	int quotef;
 -	int dblquote;
 -	int varnest;	/* levels of variables expansion */
 -	int arinest;	/* levels of arithmetic expansion */
 -	int parenlevel;	/* levels of parens in arithmetic */
 -	char const *prevsyntax;	/* syntax before arithmetic */
 +	int newvarnest;
 +	int level;
  	int synentry;
 +	struct tokenstate state_static[MAXNEST_STATIC];
 +	int maxnest = MAXNEST_STATIC;
 +	struct tokenstate *state = state_static;
  
  	startlinno = plinno;
 -	dblquote = 0;
 -	if (syntax == DQSYNTAX)
 -		dblquote = 1;
  	quotef = 0;
  	bqlist = NULL;
 -	varnest = 0;
 -	arinest = 0;
 -	parenlevel = 0;
 +	newvarnest = 0;
 +	level = 0;
 +	state[level].syntax = initialsyntax;
 +	state[level].parenlevel = 0;
 +	state[level].category = TSTATE_TOP;
  
  	STARTSTACKSTR(out);
  	loop: {	/* for each line, until end of word */
 @@ -1071,11 +1162,11 @@ readtoken1(int firstc, char const *synta
  		for (;;) {	/* until end of line or end of word */
  			CHECKSTRSPACE(3, out);	/* permit 3 calls to USTPUTC */
  
 -			synentry = syntax[c];
 +			synentry = state[level].syntax[c];
  
  			switch(synentry) {
  			case CNL:	/* '\n' */
 -				if (syntax == BASESYNTAX)
 +				if (state[level].syntax == BASESYNTAX)
  					goto endword;	/* exit outer loop */
  				USTPUTC(c, out);
  				plinno++;
 @@ -1089,7 +1180,7 @@ readtoken1(int firstc, char const *synta
  				USTPUTC(c, out);
  				break;
  			case CCTL:
 -				if (eofmark == NULL || dblquote)
 +				if (eofmark == NULL || initialsyntax != SQSYNTAX)
  					USTPUTC(CTLESC, out);
  				USTPUTC(c, out);
  				break;
 @@ -1105,41 +1196,37 @@ readtoken1(int firstc, char const *synta
  					else
  						setprompt(0);
  				} else {
 -					if (dblquote && c != '\\' &&
 -					    c != '`' && c != '$' &&
 -					    (c != '"' || eofmark != NULL))
 +					if (state[level].syntax == DQSYNTAX &&
 +					    c != '\\' && c != '`' && c != '$' &&
 +					    (c != '"' || (eofmark != NULL &&
 +						newvarnest == 0)) &&
 +					    (c != '}' || state[level].category != TSTATE_VAR_OLD))
  						USTPUTC('\\', out);
  					if (SQSYNTAX[c] == CCTL)
  						USTPUTC(CTLESC, out);
 -					else if (eofmark == NULL)
 +					else if (eofmark == NULL ||
 +					    newvarnest > 0)
  						USTPUTC(CTLQUOTEMARK, out);
  					USTPUTC(c, out);
  					quotef++;
  				}
  				break;
  			case CSQUOTE:
 -				if (eofmark == NULL)
 -					USTPUTC(CTLQUOTEMARK, out);
 -				syntax = SQSYNTAX;
 +				USTPUTC(CTLQUOTEMARK, out);
 +				state[level].syntax = SQSYNTAX;
  				break;
  			case CDQUOTE:
 -				if (eofmark == NULL)
 -					USTPUTC(CTLQUOTEMARK, out);
 -				syntax = DQSYNTAX;
 -				dblquote = 1;
 +				USTPUTC(CTLQUOTEMARK, out);
 +				state[level].syntax = DQSYNTAX;
  				break;
  			case CENDQUOTE:
 -				if (eofmark != NULL && arinest == 0 &&
 -				    varnest == 0) {
 +				if (eofmark != NULL && newvarnest == 0)
  					USTPUTC(c, out);
 -				} else {
 -					if (arinest) {
 -						syntax = ARISYNTAX;
 -						dblquote = 0;
 -					} else if (eofmark == NULL) {
 -						syntax = BASESYNTAX;
 -						dblquote = 0;
 -					}
 +				else {
 +					if (state[level].category == TSTATE_ARITH)
 +						state[level].syntax = ARISYNTAX;
 +					else
 +						state[level].syntax = BASESYNTAX;
  					quotef++;
  				}
  				break;
 @@ -1147,30 +1234,33 @@ readtoken1(int firstc, char const *synta
  				PARSESUB();		/* parse substitution */
  				break;
  			case CENDVAR:	/* '}' */
 -				if (varnest > 0) {
 -					varnest--;
 +				if (level > 0 &&
 +				    (state[level].category == TSTATE_VAR_OLD ||
 +				    state[level].category == TSTATE_VAR_NEW)) {
 +					if (state[level].category == TSTATE_VAR_OLD)
 +						state[level - 1].syntax = state[level].syntax;
 +					else
 +						newvarnest--;
 +					level--;
  					USTPUTC(CTLENDVAR, out);
  				} else {
  					USTPUTC(c, out);
  				}
  				break;
  			case CLP:	/* '(' in arithmetic */
 -				parenlevel++;
 +				state[level].parenlevel++;
  				USTPUTC(c, out);
  				break;
  			case CRP:	/* ')' in arithmetic */
 -				if (parenlevel > 0) {
 +				if (state[level].parenlevel > 0) {
  					USTPUTC(c, out);
 -					--parenlevel;
 +					--state[level].parenlevel;
  				} else {
  					if (pgetc() == ')') {
 -						if (--arinest == 0) {
 +						if (level > 0 &&
 +						    state[level].category == TSTATE_ARITH) {
 +							level--;
  							USTPUTC(CTLENDARI, out);
 -							syntax = prevsyntax;
 -							if (syntax == DQSYNTAX)
 -								dblquote = 1;
 -							else
 -								dblquote = 0;
  						} else
  							USTPUTC(')', out);
  					} else {
 @@ -1184,13 +1274,15 @@ readtoken1(int firstc, char const *synta
  				}
  				break;
  			case CBQUOTE:	/* '`' */
 -				out = parsebackq(out, &bqlist, 1, dblquote,
 -						arinest || dblquote);
 +				out = parsebackq(out, &bqlist, 1,
 +				    state[level].syntax == DQSYNTAX &&
 +				    (eofmark == NULL || newvarnest > 0),
 +				    state[level].syntax == DQSYNTAX || state[level].syntax == ARISYNTAX);
  				break;
  			case CEOF:
  				goto endword;		/* exit outer loop */
  			default:
 -				if (varnest == 0)
 +				if (level == 0)
  					goto endword;	/* exit outer loop */
  				USTPUTC(c, out);
  			}
 @@ -1198,14 +1290,17 @@ readtoken1(int firstc, char const *synta
  		}
  	}
  endword:
 -	if (syntax == ARISYNTAX)
 +	if (state[level].syntax == ARISYNTAX)
  		synerror("Missing '))'");
 -	if (syntax != BASESYNTAX && eofmark == NULL)
 +	if (state[level].syntax != BASESYNTAX && eofmark == NULL)
  		synerror("Unterminated quoted string");
 -	if (varnest != 0) {
 +	if (state[level].category == TSTATE_VAR_OLD ||
 +	    state[level].category == TSTATE_VAR_NEW) {
  		startlinno = plinno;
  		synerror("Missing '}'");
  	}
 +	if (state != state_static)
 +		parser_temp_free_upto(state);
  	USTPUTC('\0', out);
  	len = out - stackblock();
  	out = stackblock();
 @@ -1228,7 +1323,6 @@ endword:
  /* end of readtoken routine */
  
  
 -
  /*
   * Check to see whether we are at the end of the here document.  When this
   * is called, c is set to the first character of the next input line.  If
 @@ -1345,8 +1439,11 @@ parsesub: {
  			PARSEARITH();
  		} else {
  			pungetc();
 -			out = parsebackq(out, &bqlist, 0, dblquote,
 -					arinest || dblquote);
 +			out = parsebackq(out, &bqlist, 0,
 +			    state[level].syntax == DQSYNTAX &&
 +			    (eofmark == NULL || newvarnest > 0),
 +			    state[level].syntax == DQSYNTAX ||
 +			    state[level].syntax == ARISYNTAX);
  		}
  	} else {
  		USTPUTC(CTLVAR, out);
 @@ -1446,11 +1543,44 @@ parsesub: {
  			pungetc();
  		}
  		STPUTC('=', out);
 -		if (subtype != VSLENGTH && (dblquote || arinest))
 +		if (subtype != VSLENGTH && (state[level].syntax == DQSYNTAX ||
 +		    state[level].syntax == ARISYNTAX))
  			flags |= VSQUOTE;
  		*(stackblock() + typeloc) = subtype | flags;
 -		if (subtype != VSNORMAL)
 -			varnest++;
 +		if (subtype != VSNORMAL) {
 +			if (level + 1 >= maxnest) {
 +				maxnest *= 2;
 +				if (state == state_static) {
 +					state = parser_temp_alloc(
 +					    maxnest * sizeof(*state));
 +					memcpy(state, state_static,
 +					    MAXNEST_STATIC * sizeof(*state));
 +				} else
 +					state = parser_temp_realloc(state,
 +					    maxnest * sizeof(*state));
 +			}
 +			level++;
 +			state[level].parenlevel = 0;
 +			if (subtype == VSMINUS || subtype == VSPLUS ||
 +			    subtype == VSQUESTION || subtype == VSASSIGN) {
 +				/*
 +				 * For operators that were in the Bourne shell,
 +				 * inherit the double-quote state.
 +				 */
 +				state[level].syntax = state[level - 1].syntax;
 +				state[level].category = TSTATE_VAR_OLD;
 +			} else {
 +				/*
 +				 * The other operators take a pattern,
 +				 * so go to BASESYNTAX.
 +				 * Also, ' and " are now special, even
 +				 * in here documents.
 +				 */
 +				state[level].syntax = BASESYNTAX;
 +				state[level].category = TSTATE_VAR_NEW;
 +				newvarnest++;
 +			}
 +		}
  	}
  	goto parsesub_return;
  }
 @@ -1461,21 +1591,26 @@ parsesub: {
   */
  parsearith: {
  
 -	if (++arinest == 1) {
 -		prevsyntax = syntax;
 -		syntax = ARISYNTAX;
 -		USTPUTC(CTLARI, out);
 -		if (dblquote)
 -			USTPUTC('"',out);
 -		else
 -			USTPUTC(' ',out);
 -	} else {
 -		/*
 -		 * we collapse embedded arithmetic expansion to
 -		 * parenthesis, which should be equivalent
 -		 */
 -		USTPUTC('(', out);
 +	if (level + 1 >= maxnest) {
 +		maxnest *= 2;
 +		if (state == state_static) {
 +			state = parser_temp_alloc(
 +			    maxnest * sizeof(*state));
 +			memcpy(state, state_static,
 +			    MAXNEST_STATIC * sizeof(*state));
 +		} else
 +			state = parser_temp_realloc(state,
 +			    maxnest * sizeof(*state));
  	}
 +	level++;
 +	state[level].syntax = ARISYNTAX;
 +	state[level].parenlevel = 0;
 +	state[level].category = TSTATE_ARITH;
 +	USTPUTC(CTLARI, out);
 +	if (state[level - 1].syntax == DQSYNTAX)
 +		USTPUTC('"',out);
 +	else
 +		USTPUTC(' ',out);
  	goto parsearith_return;
  }
  
 
 Added: head/tools/regression/bin/sh/expansion/plus-minus2.0
 ==============================================================================
 --- /dev/null	00:00:00 1970	(empty, because file is newly added)
 +++ head/tools/regression/bin/sh/expansion/plus-minus2.0	Sat Apr  3 20:55:56 2010	(r206145)
 @@ -0,0 +1,4 @@
 +# $FreeBSD$
 +
 +e=
 +test "${e:-\}}" = '}'
 
 Added: head/tools/regression/bin/sh/parser/heredoc2.0
 ==============================================================================
 --- /dev/null	00:00:00 1970	(empty, because file is newly added)
 +++ head/tools/regression/bin/sh/parser/heredoc2.0	Sat Apr  3 20:55:56 2010	(r206145)
 @@ -0,0 +1,44 @@
 +# $FreeBSD$
 +
 +failures=0
 +
 +check() {
 +	if ! eval "[ $* ]"; then
 +		echo "Failed: $*"
 +		: $((failures += 1))
 +	fi
 +}
 +
 +s='ast*que?non' sq=\' dq=\"
 +
 +check '"$(cat <<EOF
 +${s}
 +EOF
 +)" = "ast*que?non"'
 +
 +check '"$(cat <<EOF
 +${s+"x"}
 +EOF
 +)" = ${dq}x${dq}'
 +
 +check '"$(cat <<EOF
 +${s+'$sq'x'$sq'}
 +EOF
 +)" = ${sq}x${sq}'
 +
 +check '"$(cat <<EOF
 +${s#ast}
 +EOF
 +)" = "*que?non"'
 +
 +check '"$(cat <<EOF
 +${s##"ast"}
 +EOF
 +)" = "*que?non"'
 +
 +check '"$(cat <<EOF
 +${s##'$sq'ast'$sq'}
 +EOF
 +)" = "*que?non"'
 +
 +exit $((failures != 0))
 _______________________________________________
 svn-src-all@freebsd.org mailing list
 http://lists.freebsd.org/mailman/listinfo/svn-src-all
 To unsubscribe, send any mail to "svn-src-all-unsubscribe@freebsd.org"
 



Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?201004032100.o33L0H68084939>