Skip site navigation (1)Skip section navigation (2)
Date:      Sun, 7 Jun 2020 06:31:04 -0700 (PDT)
From:      "Rodney W. Grimes" <freebsd@gndrsh.dnsmgr.net>
To:        Kyle Evans <kevans@freebsd.org>
Cc:        src-committers@freebsd.org, svn-src-all@freebsd.org, svn-src-head@freebsd.org
Subject:   Re: svn commit: r361884 - in head/usr.bin/sed: . tests
Message-ID:  <202006071331.057DV4Vo040383@gndrsh.dnsmgr.net>
In-Reply-To: <202006070432.0574Wc1L063319@repo.freebsd.org>

next in thread | previous in thread | raw e-mail | index | archive | help
> Author: kevans
> Date: Sun Jun  7 04:32:38 2020
> New Revision: 361884
> URL: https://svnweb.freebsd.org/changeset/base/361884
> 
> Log:
>   sed: attempt to learn about hex escapes (e.g. \x27)
>   
>   Somewhat predictably, software often wants to use \x27/\x24 among others so
>   that they can decline worrying about ugly escaping, if said escaping is even
>   possible. Right now, this software is using these and getting the wrong
>   results, as we'll interpret those as x27 and x24 respectively. Some examples
>   of this, when an exp-run was ran, were science/octopus and misc/vifm.
>   
>   Go ahead and process these at all times.  We allow either one or two digits,
>   and the tests account for both.  If extra digits are specified, e.g. \x2727,
>   then the third and fourth digits are interpreted literally as one might
>   expect.

Does it work to do \\x27, ie I want it to NOT do \x27 so I can sed
on files that contain sequences of escapes.

>   
>   PR:		229925
>   MFC after:	2 weeks
> 
> Modified:
>   head/usr.bin/sed/compile.c
>   head/usr.bin/sed/tests/sed2_test.sh
> 
> Modified: head/usr.bin/sed/compile.c
> ==============================================================================
> --- head/usr.bin/sed/compile.c	Sun Jun  7 03:11:34 2020	(r361883)
> +++ head/usr.bin/sed/compile.c	Sun Jun  7 04:32:38 2020	(r361884)
> @@ -49,6 +49,7 @@ static const char sccsid[] = "@(#)compile.c	8.1 (Berke
>  #include <fcntl.h>
>  #include <limits.h>
>  #include <regex.h>
> +#include <stdbool.h>
>  #include <stdio.h>
>  #include <stdlib.h>
>  #include <string.h>
> @@ -365,6 +366,51 @@ nonsel:		/* Now parse the command */
>  	}
>  }
>  
> +static int
> +hex2char(const char *in, char *out, int len)
> +{
> +	long ord;
> +	char *endptr, hexbuf[3];
> +
> +	hexbuf[0] = in[0];
> +	hexbuf[1] = len > 1 ? in[1] : '\0';
> +	hexbuf[2] = '\0';
> +
> +	errno = 0;
> +	ord = strtol(hexbuf, &endptr, 16);
> +	if (*endptr != '\0' || errno != 0)
> +		return (ERANGE);
> +	*out = (char)ord;
> +	return (0);
> +}
> +
> +static bool
> +hexdigit(char c)
> +{
> +	int lc;
> +
> +	lc = tolower(c);
> +	return isdigit(lc) || (lc >= 'a' && lc <= 'f');
> +}
> +
> +static bool
> +dohex(const char *in, char *out, int *len)
> +{
> +	int tmplen;
> +
> +	if (!hexdigit(in[0]))
> +		return (false);
> +	tmplen = 1;
> +	if (hexdigit(in[1]))
> +		++tmplen;
> +	if (hex2char(in, out, tmplen) == 0) {
> +		*len = tmplen;
> +		return (true);
> +	}
> +
> +	return (false);
> +}
> +
>  /*
>   * Get a delimited string.  P points to the delimiter of the string; d points
>   * to a buffer area.  Newline and delimiter escapes are processed; other
> @@ -377,6 +423,7 @@ nonsel:		/* Now parse the command */
>  static char *
>  compile_delimited(char *p, char *d, int is_tr)
>  {
> +	int hexlen;
>  	char c;
>  
>  	c = *p++;
> @@ -412,6 +459,12 @@ compile_delimited(char *p, char *d, int is_tr)
>  			}
>  			p += 2;
>  			continue;
> +		} else if (*p == '\\' && p[1] == 'x') {
> +			if (dohex(&p[2], d, &hexlen)) {
> +				++d;
> +				p += hexlen + 2;
> +				continue;
> +			}
>  		} else if (*p == '\\' && p[1] == '\\') {
>  			if (is_tr)
>  				p++;
> @@ -431,7 +484,7 @@ compile_delimited(char *p, char *d, int is_tr)
>  static char *
>  compile_ccl(char **sp, char *t)
>  {
> -	int c, d;
> +	int c, d, hexlen;
>  	char *s = *sp;
>  
>  	*t++ = *s++;
> @@ -459,6 +512,10 @@ compile_ccl(char **sp, char *t)
>  				*t = '\t';
>  				s++;
>  				break;
> +			case 'x':
> +				if (dohex(&s[2], t, &hexlen))
> +					s += hexlen + 1;
> +				break;
>  			}
>  		}
>  	}
> @@ -499,7 +556,7 @@ static char *
>  compile_subst(char *p, struct s_subst *s)
>  {
>  	static char lbuf[_POSIX2_LINE_MAX + 1];
> -	int asize, size;
> +	int asize, hexlen, size;
>  	u_char ref;
>  	char c, *text, *op, *sp;
>  	int more = 1, sawesc = 0;
> @@ -562,6 +619,21 @@ compile_subst(char *p, struct s_subst *s)
>  						break;
>  					case 't':
>  						*p = '\t';
> +						break;
> +					case 'x':
> +#define	ADVANCE_N(s, n)					\
> +	do {						\
> +		char *adv = (s);			\
> +		while (*(adv + (n) - 1) != '\0') {	\
> +			*adv = *(adv + (n));		\
> +			++adv;				\
> +		}					\
> +		*adv = '\0';				\
> +	} while (0);
> +						if (dohex(&p[1], p, &hexlen)) {
> +							ADVANCE_N(p + 1,
> +							    hexlen);
> +						}
>  						break;
>  					}
>  				}
> 
> Modified: head/usr.bin/sed/tests/sed2_test.sh
> ==============================================================================
> --- head/usr.bin/sed/tests/sed2_test.sh	Sun Jun  7 03:11:34 2020	(r361883)
> +++ head/usr.bin/sed/tests/sed2_test.sh	Sun Jun  7 04:32:38 2020	(r361884)
> @@ -88,10 +88,39 @@ escape_subst_body()
>  	atf_check -o 'inline:abcx\n' sed 's/[ \r\t]//g' c
>  }
>  
> +atf_test_case hex_subst
> +hex_subst_head()
> +{
> +	atf_set "descr" "Verify proper conversion of hex escapes"
> +}
> +hex_subst_body()
> +{
> +	printf "test='foo'" > a
> +	printf "test='27foo'" > b
> +	printf "\rn" > c
> +	printf "xx" > d
> +
> +	atf_check -o 'inline:test="foo"' sed 's/\x27/"/g' a
> +	atf_check -o "inline:'test'='foo'" sed 's/test/\x27test\x27/g' a
> +
> +	# Make sure we take trailing digits literally.
> +	atf_check -o "inline:test=\"foo'" sed 's/\x2727/"/g' b
> +
> +	# Single digit \x should work as well.
> +	atf_check -o "inline:xn" sed 's/\xd/x/' c
> +
> +	# Invalid digit should cause us to ignore the sequence.  This test
> +	# invokes UB, escapes of an ordinary character.  A future change will
> +	# make regex(3) on longer tolerate this and we'll need to adjust what
> +	# we're doing, but for now this will suffice.
> +	atf_check -o "inline:" sed 's/\xx//' d
> +}
> +
>  atf_init_test_cases()
>  {
>  	atf_add_test_case inplace_command_q
>  	atf_add_test_case inplace_hardlink_src
>  	atf_add_test_case inplace_symlink_src
>  	atf_add_test_case escape_subst
> +	atf_add_test_case hex_subst
>  }
> 

-- 
Rod Grimes                                                 rgrimes@freebsd.org



Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?202006071331.057DV4Vo040383>