Skip site navigation (1)Skip section navigation (2)
Date:      Sun, 28 Jun 2020 02:38:07 +0000 (UTC)
From:      Kyle Evans <kevans@FreeBSD.org>
To:        src-committers@freebsd.org, svn-src-all@freebsd.org, svn-src-stable@freebsd.org, svn-src-stable-12@freebsd.org
Subject:   svn commit: r362715 - in stable/12/usr.bin/sed: . tests
Message-ID:  <202006280238.05S2c7hA027252@repo.freebsd.org>

next in thread | raw e-mail | index | archive | help
Author: kevans
Date: Sun Jun 28 02:38:07 2020
New Revision: 362715
URL: https://svnweb.freebsd.org/changeset/base/362715

Log:
  MFC r361884: sed: attempt to learn about hex escapes (e.g. \x27)
  
  Somewhat predictably, software often wants to use \x27/\x24 among others so
  that they can decline worrying about ugly escaping, if said escaping is even
  possible. Right now, this software is using these and getting the wrong
  results, as we'll interpret those as x27 and x24 respectively. Some examples
  of this, when an exp-run was ran, were science/octopus and misc/vifm.
  
  Go ahead and process these at all times.  We allow either one or two digits,
  and the tests account for both.  If extra digits are specified, e.g. \x2727,
  then the third and fourth digits are interpreted literally as one might
  expect.

Modified:
  stable/12/usr.bin/sed/compile.c
  stable/12/usr.bin/sed/tests/sed2_test.sh
Directory Properties:
  stable/12/   (props changed)

Modified: stable/12/usr.bin/sed/compile.c
==============================================================================
--- stable/12/usr.bin/sed/compile.c	Sun Jun 28 02:29:53 2020	(r362714)
+++ stable/12/usr.bin/sed/compile.c	Sun Jun 28 02:38:07 2020	(r362715)
@@ -49,6 +49,7 @@ static const char sccsid[] = "@(#)compile.c	8.1 (Berke
 #include <fcntl.h>
 #include <limits.h>
 #include <regex.h>
+#include <stdbool.h>
 #include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
@@ -365,6 +366,51 @@ nonsel:		/* Now parse the command */
 	}
 }
 
+static int
+hex2char(const char *in, char *out, int len)
+{
+	long ord;
+	char *endptr, hexbuf[3];
+
+	hexbuf[0] = in[0];
+	hexbuf[1] = len > 1 ? in[1] : '\0';
+	hexbuf[2] = '\0';
+
+	errno = 0;
+	ord = strtol(hexbuf, &endptr, 16);
+	if (*endptr != '\0' || errno != 0)
+		return (ERANGE);
+	*out = (char)ord;
+	return (0);
+}
+
+static bool
+hexdigit(char c)
+{
+	int lc;
+
+	lc = tolower(c);
+	return isdigit(lc) || (lc >= 'a' && lc <= 'f');
+}
+
+static bool
+dohex(const char *in, char *out, int *len)
+{
+	int tmplen;
+
+	if (!hexdigit(in[0]))
+		return (false);
+	tmplen = 1;
+	if (hexdigit(in[1]))
+		++tmplen;
+	if (hex2char(in, out, tmplen) == 0) {
+		*len = tmplen;
+		return (true);
+	}
+
+	return (false);
+}
+
 /*
  * Get a delimited string.  P points to the delimiter of the string; d points
  * to a buffer area.  Newline and delimiter escapes are processed; other
@@ -377,6 +423,7 @@ nonsel:		/* Now parse the command */
 static char *
 compile_delimited(char *p, char *d, int is_tr)
 {
+	int hexlen;
 	char c;
 
 	c = *p++;
@@ -412,6 +459,12 @@ compile_delimited(char *p, char *d, int is_tr)
 			}
 			p += 2;
 			continue;
+		} else if (*p == '\\' && p[1] == 'x') {
+			if (dohex(&p[2], d, &hexlen)) {
+				++d;
+				p += hexlen + 2;
+				continue;
+			}
 		} else if (*p == '\\' && p[1] == '\\') {
 			if (is_tr)
 				p++;
@@ -431,7 +484,7 @@ compile_delimited(char *p, char *d, int is_tr)
 static char *
 compile_ccl(char **sp, char *t)
 {
-	int c, d;
+	int c, d, hexlen;
 	char *s = *sp;
 
 	*t++ = *s++;
@@ -459,6 +512,10 @@ compile_ccl(char **sp, char *t)
 				*t = '\t';
 				s++;
 				break;
+			case 'x':
+				if (dohex(&s[2], t, &hexlen))
+					s += hexlen + 1;
+				break;
 			}
 		}
 	}
@@ -499,7 +556,7 @@ static char *
 compile_subst(char *p, struct s_subst *s)
 {
 	static char lbuf[_POSIX2_LINE_MAX + 1];
-	int asize, size;
+	int asize, hexlen, size;
 	u_char ref;
 	char c, *text, *op, *sp;
 	int more = 1, sawesc = 0;
@@ -562,6 +619,21 @@ compile_subst(char *p, struct s_subst *s)
 						break;
 					case 't':
 						*p = '\t';
+						break;
+					case 'x':
+#define	ADVANCE_N(s, n)					\
+	do {						\
+		char *adv = (s);			\
+		while (*(adv + (n) - 1) != '\0') {	\
+			*adv = *(adv + (n));		\
+			++adv;				\
+		}					\
+		*adv = '\0';				\
+	} while (0);
+						if (dohex(&p[1], p, &hexlen)) {
+							ADVANCE_N(p + 1,
+							    hexlen);
+						}
 						break;
 					}
 				}

Modified: stable/12/usr.bin/sed/tests/sed2_test.sh
==============================================================================
--- stable/12/usr.bin/sed/tests/sed2_test.sh	Sun Jun 28 02:29:53 2020	(r362714)
+++ stable/12/usr.bin/sed/tests/sed2_test.sh	Sun Jun 28 02:38:07 2020	(r362715)
@@ -88,6 +88,34 @@ escape_subst_body()
 	atf_check -o 'inline:abcx\n' sed 's/[ \r\t]//g' c
 }
 
+atf_test_case hex_subst
+hex_subst_head()
+{
+	atf_set "descr" "Verify proper conversion of hex escapes"
+}
+hex_subst_body()
+{
+	printf "test='foo'" > a
+	printf "test='27foo'" > b
+	printf "\rn" > c
+	printf "xx" > d
+
+	atf_check -o 'inline:test="foo"' sed 's/\x27/"/g' a
+	atf_check -o "inline:'test'='foo'" sed 's/test/\x27test\x27/g' a
+
+	# Make sure we take trailing digits literally.
+	atf_check -o "inline:test=\"foo'" sed 's/\x2727/"/g' b
+
+	# Single digit \x should work as well.
+	atf_check -o "inline:xn" sed 's/\xd/x/' c
+
+	# Invalid digit should cause us to ignore the sequence.  This test
+	# invokes UB, escapes of an ordinary character.  A future change will
+	# make regex(3) on longer tolerate this and we'll need to adjust what
+	# we're doing, but for now this will suffice.
+	atf_check -o "inline:" sed 's/\xx//' d
+}
+
 atf_test_case commands_on_stdin
 commands_on_stdin_head()
 {
@@ -112,5 +140,6 @@ atf_init_test_cases()
 	atf_add_test_case inplace_hardlink_src
 	atf_add_test_case inplace_symlink_src
 	atf_add_test_case escape_subst
+	atf_add_test_case hex_subst
 	atf_add_test_case commands_on_stdin
 }



Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?202006280238.05S2c7hA027252>