Skip site navigation (1)Skip section navigation (2)
Date:      Thu, 24 Aug 2017 01:23:33 +0000 (UTC)
From:      Kyle Evans <kevans@FreeBSD.org>
To:        src-committers@freebsd.org, svn-src-all@freebsd.org, svn-src-head@freebsd.org
Subject:   svn commit: r322826 - head/usr.bin/grep
Message-ID:  <201708240123.v7O1NXTx000502@repo.freebsd.org>

next in thread | raw e-mail | index | archive | help
Author: kevans
Date: Thu Aug 24 01:23:33 2017
New Revision: 322826
URL: https://svnweb.freebsd.org/changeset/base/322826

Log:
  bsdgrep: add a primitive literal matcher
  
  fgrep/grep -F will error out at runtime if compiled with a regex(3)
  that does not define REG_NOSPEC or REG_LITERAL. glibc is one such regex(3)
  implementation, and as it turns out they don't support literal matching at
  all.
  
  Provide a primitive literal matcher for use with glibc and other
  implementations that don't support literal matching so that we don't
  completely lose fgrep/grep -F if compiled against libgnuregex on stable/10,
  stable/11, or other systems that we don't necessarily support.
  
  This is a wholly unoptimized implementation with no plans to optimize it as
  of now. This is due to both its use-case being primarily on unsupported
  systems in the near-distant future and that it's reinventing the wheel that
  we already have available as a feature of regex(3).
  
  Reviewed by:	cem, emaste, ngie
  Approved by:	emaste (mentor)
  MFC after:	2 weeks
  Differential Revision:	https://reviews.freebsd.org/D12056

Modified:
  head/usr.bin/grep/grep.c
  head/usr.bin/grep/grep.h
  head/usr.bin/grep/util.c

Modified: head/usr.bin/grep/grep.c
==============================================================================
--- head/usr.bin/grep/grep.c	Thu Aug 24 01:20:52 2017	(r322825)
+++ head/usr.bin/grep/grep.c	Thu Aug 24 01:23:33 2017	(r322826)
@@ -720,12 +720,19 @@ main(int argc, char *argv[])
 	case GREP_BASIC:
 		break;
 	case GREP_FIXED:
+		/*
+		 * regex(3) implementations that support fixed-string searches generally
+		 * define either REG_NOSPEC or REG_LITERAL. Set the appropriate flag
+		 * here. If neither are defined, GREP_FIXED later implies that the
+		 * internal literal matcher should be used. Other cflags that have
+		 * the same interpretation as REG_NOSPEC and REG_LITERAL should be
+		 * similarly added here, and grep.h should be amended to take this into
+		 * consideration when defining WITH_INTERNAL_NOSPEC.
+		 */
 #if defined(REG_NOSPEC)
 		cflags |= REG_NOSPEC;
 #elif defined(REG_LITERAL)
 		cflags |= REG_LITERAL;
-#else
-		errx(2, "literal expressions not supported at compile time");
 #endif
 		break;
 	case GREP_EXTENDED:
@@ -742,7 +749,11 @@ main(int argc, char *argv[])
 	r_pattern = grep_calloc(patterns, sizeof(*r_pattern));
 
 	/* Don't process any patterns if we have a blank one */
+#ifdef WITH_INTERNAL_NOSPEC
+	if (!matchall && grepbehave != GREP_FIXED) {
+#else
 	if (!matchall) {
+#endif
 		/* Check if cheating is allowed (always is for fgrep). */
 		for (i = 0; i < patterns; ++i) {
 #ifndef WITHOUT_FASTMATCH

Modified: head/usr.bin/grep/grep.h
==============================================================================
--- head/usr.bin/grep/grep.h	Thu Aug 24 01:20:52 2017	(r322825)
+++ head/usr.bin/grep/grep.h	Thu Aug 24 01:23:33 2017	(r322826)
@@ -57,6 +57,10 @@ extern const char		*errstr[];
 #define	GREP_BASIC	1
 #define	GREP_EXTENDED	2
 
+#if !defined(REG_NOSPEC) && !defined(REG_LITERAL)
+#define WITH_INTERNAL_NOSPEC
+#endif
+
 #define	BINFILE_BIN	0
 #define	BINFILE_SKIP	1
 #define	BINFILE_TEXT	2

Modified: head/usr.bin/grep/util.c
==============================================================================
--- head/usr.bin/grep/util.c	Thu Aug 24 01:20:52 2017	(r322825)
+++ head/usr.bin/grep/util.c	Thu Aug 24 01:23:33 2017	(r322826)
@@ -70,7 +70,10 @@ struct parsec {
 	bool		binary;				/* Binary file? */
 };
 
-
+#ifdef WITH_INTERNAL_NOSPEC
+static int litexec(const struct pat *pat, const char *string,
+    size_t nmatch, regmatch_t pmatch[]);
+#endif
 static int procline(struct parsec *pc);
 static void printline(struct parsec *pc, int sep);
 static void printline_metadata(struct str *line, int sep);
@@ -350,6 +353,67 @@ procfile(const char *fn)
 	return (c);
 }
 
+#ifdef WITH_INTERNAL_NOSPEC
+/*
+ * Internal implementation of literal string search within a string, modeled
+ * after regexec(3), for use when the regex(3) implementation doesn't offer
+ * either REG_NOSPEC or REG_LITERAL. This does not apply in the default FreeBSD
+ * config, but in other scenarios such as building against libgnuregex or on
+ * some non-FreeBSD OSes.
+ */
+static int
+litexec(const struct pat *pat, const char *string, size_t nmatch,
+    regmatch_t pmatch[])
+{
+	char *(*strstr_fn)(const char *, const char *);
+	char *sub, *subject;
+	const char *search;
+	size_t idx, n, ofs, stringlen;
+
+	if (cflags & REG_ICASE)
+		strstr_fn = strcasestr;
+	else
+		strstr_fn = strstr;
+	idx = 0;
+	ofs = pmatch[0].rm_so;
+	stringlen = pmatch[0].rm_eo;
+	if (ofs >= stringlen)
+		return (REG_NOMATCH);
+	subject = strndup(string, stringlen);
+	if (subject == NULL)
+		return (REG_ESPACE);
+	for (n = 0; ofs < stringlen;) {
+		search = (subject + ofs);
+		if ((unsigned long)pat->len > strlen(search))
+			break;
+		sub = strstr_fn(search, pat->pat);
+		/*
+		 * Ignoring the empty string possibility due to context: grep optimizes
+		 * for empty patterns and will never reach this point.
+		 */
+		if (sub == NULL)
+			break;
+		++n;
+		/* Fill in pmatch if necessary */
+		if (nmatch > 0) {
+			pmatch[idx].rm_so = ofs + (sub - search);
+			pmatch[idx].rm_eo = pmatch[idx].rm_so + pat->len;
+			if (++idx == nmatch)
+				break;
+			ofs = pmatch[idx].rm_so + 1;
+		} else
+			/* We only needed to know if we match or not */
+			break;
+	}
+	free(subject);
+	if (n > 0 && nmatch > 0)
+		for (n = idx; n < nmatch; ++n)
+			pmatch[n].rm_so = pmatch[n].rm_eo = -1;
+
+	return (n > 0 ? 0 : REG_NOMATCH);
+}
+#endif /* WITH_INTERNAL_NOSPEC */
+
 #define iswword(x)	(iswalnum((x)) || (x) == L'_')
 
 /*
@@ -400,6 +464,11 @@ procline(struct parsec *pc)
 		for (i = 0; i < patterns; i++) {
 			pmatch.rm_so = st;
 			pmatch.rm_eo = pc->ln.len;
+#ifdef WITH_INTERNAL_NOSPEC
+			if (grepbehave == GREP_FIXED)
+				r = litexec(&pattern[i], pc->ln.dat, 1, &pmatch);
+			else
+#endif
 #ifndef WITHOUT_FASTMATCH
 			if (fg_pattern[i].pattern)
 				r = fastexec(&fg_pattern[i],



Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?201708240123.v7O1NXTx000502>