Skip site navigation (1)Skip section navigation (2)
Date:      Tue, 20 Sep 2011 21:54:43 +0000 (UTC)
From:      Gabor Kovesdan <gabor@FreeBSD.org>
To:        src-committers@freebsd.org, svn-src-user@freebsd.org
Subject:   svn commit: r225702 - in user/gabor/grep/trunk: . regex
Message-ID:  <201109202154.p8KLsh89046251@svn.freebsd.org>

next in thread | raw e-mail | index | archive | help
Author: gabor
Date: Tue Sep 20 21:54:43 2011
New Revision: 225702
URL: http://svn.freebsd.org/changeset/base/225702

Log:
  - Merge improvements from TRE

Modified:
  user/gabor/grep/trunk/Makefile
  user/gabor/grep/trunk/regex/fastmatch.c
  user/gabor/grep/trunk/regex/glue.h
  user/gabor/grep/trunk/regex/tre-fastmatch.c

Modified: user/gabor/grep/trunk/Makefile
==============================================================================
--- user/gabor/grep/trunk/Makefile	Tue Sep 20 21:53:46 2011	(r225701)
+++ user/gabor/grep/trunk/Makefile	Tue Sep 20 21:54:43 2011	(r225702)
@@ -17,7 +17,7 @@ SRCS=	file.c grep.c queue.c util.c
 
 # Extra files ported backported form some regex improvements
 .PATH: ${.CURDIR}/regex
-SRCS+=	fastmatch.c hashtable.c tre-fastmatch.c xmalloc.c
+SRCS+=	fastmatch.c hashtable.c tre-compile.c tre-fastmatch.c xmalloc.c
 CFLAGS+=-I${.CURDIR}/regex
 
 .if ${MK_BSD_GREP} == "yes"

Modified: user/gabor/grep/trunk/regex/fastmatch.c
==============================================================================
--- user/gabor/grep/trunk/regex/fastmatch.c	Tue Sep 20 21:53:46 2011	(r225701)
+++ user/gabor/grep/trunk/regex/fastmatch.c	Tue Sep 20 21:54:43 2011	(r225702)
@@ -36,63 +36,6 @@
 #include "tre-fastmatch.h"
 #include "xmalloc.h"
 
-/* XXX: avoid duplication */
-#define CONV_PAT							\
-  {									\
-    wregex = xmalloc(sizeof(tre_char_t) * (n + 1));			\
-    if (wregex == NULL)							\
-      return REG_ESPACE;						\
-									\
-    if (TRE_MB_CUR_MAX == 1)						\
-      {									\
-	unsigned int i;							\
-	const unsigned char *str = (const unsigned char *)regex;	\
-	tre_char_t *wstr = wregex;					\
-									\
-	for (i = 0; i < n; i++)						\
-	  *(wstr++) = *(str++);						\
-	wlen = n;							\
-      }									\
-    else								\
-      {									\
-	int consumed;							\
-	tre_char_t *wcptr = wregex;					\
-	mbstate_t state;						\
-	memset(&state, '\0', sizeof(state));				\
-	while (n > 0)							\
-	  {								\
-	    consumed = tre_mbrtowc(wcptr, regex, n, &state);		\
-									\
-	    switch (consumed)						\
-	      {								\
-		case 0:							\
-		  if (*regex == '\0')					\
-		    consumed = 1;					\
-		  else							\
-		    {							\
-		      xfree(wregex);					\
-		      return REG_BADPAT;				\
-		    }							\
-		  break;						\
-		case -1:						\
-		  DPRINT(("mbrtowc: error %d: %s.\n", errno,		\
-		  strerror(errno)));					\
-		  xfree(wregex);					\
-		  return REG_BADPAT;					\
-		case -2:						\
-		  consumed = n;						\
-		  break;						\
-	      }								\
-	    regex += consumed;						\
-	    n -= consumed;						\
-	    wcptr++;							\
-	}								\
-        wlen = wcptr - wregex;						\
-      }									\
-									\
-    wregex[wlen] = L'\0';						\
-  }
-
 int
 tre_fixncomp(fastmatch_t *preg, const char *regex, size_t n, int cflags)
 {
@@ -101,14 +44,17 @@ tre_fixncomp(fastmatch_t *preg, const ch
   size_t wlen;
 
   if (n != 0)
-    CONV_PAT
+    {
+      ret = tre_convert_pattern(regex, n, &wregex, &wlen);
+      if (ret != REG_OK)
+	return ret;
+      else 
+	ret = tre_compile_literal(preg, wregex, wlen, cflags);
+      tre_free_pattern(wregex);
+      return ret;
+    }
   else
     return tre_compile_literal(preg, NULL, 0, cflags);
-
-  ret = tre_compile_literal(preg, wregex, wlen, cflags);
-  xfree(wregex);
-
-  return ret;
 }
 
 int
@@ -119,16 +65,19 @@ tre_fastncomp(fastmatch_t *preg, const c
   size_t wlen;
 
   if (n != 0)
-    CONV_PAT
+    {
+      ret = tre_convert_pattern(regex, n, &wregex, &wlen);
+      if (ret != REG_OK)
+	return ret;
+      else
+	ret = (cflags & REG_LITERAL)
+	      ? tre_compile_literal(preg, wregex, wlen, cflags)
+	      : tre_compile_fast(preg, wregex, wlen, cflags);
+      tre_free_pattern(wregex);
+      return ret;
+    }
   else
     return tre_compile_literal(preg, NULL, 0, cflags);
-
-  ret = (cflags & REG_LITERAL) ?
-    tre_compile_literal(preg, wregex, wlen, cflags) :
-    tre_compile_fast(preg, wregex, wlen, cflags);
-  xfree(wregex);
-
-  return ret;
 }
 
 
@@ -176,30 +125,6 @@ tre_fastfree(fastmatch_t *preg)
   tre_free_fast(preg);
 }
 
-/* XXX: avoid duplication */
-#define ADJUST_OFFSETS							\
-  {									\
-    size_t slen = (size_t)(pmatch[0].rm_eo - pmatch[0].rm_so);		\
-    size_t offset = pmatch[0].rm_so;					\
-    int ret;								\
-									\
-    if ((pmatch[0].rm_so < 0) || (pmatch[0].rm_eo < 0))			\
-      return REG_NOMATCH;						\
-    if ((len != (unsigned)-1) && ((unsigned long)pmatch[0].rm_eo > len))\
-      return REG_NOMATCH;						\
-    if ((long long)pmatch[0].rm_eo - pmatch[0].rm_so < 0)		\
-      return REG_NOMATCH;						\
-    ret = tre_match_fast(preg, &string[offset], slen, type, nmatch,	\
-			 pmatch, eflags);				\
-    for (unsigned i = 0; (i == 0) || (!(eflags & REG_NOSUB) &&		\
-         (i < nmatch)); i++)						\
-      {									\
-        pmatch[i].rm_so += offset;					\
-        pmatch[i].rm_eo += offset;					\
-      }									\
-    return ret;								\
-  }
-
 int
 tre_fastnexec(const fastmatch_t *preg, const char *string, size_t len,
          size_t nmatch, regmatch_t pmatch[], int eflags)
@@ -207,7 +132,8 @@ tre_fastnexec(const fastmatch_t *preg, c
   tre_str_type_t type = (TRE_MB_CUR_MAX == 1) ? STR_BYTE : STR_MBS;
 
   if (eflags & REG_STARTEND)
-    ADJUST_OFFSETS
+    CALL_WITH_OFFSET(tre_match_fast(preg, &string[offset], slen,
+		     type, nmatch, pmatch, eflags));
   else
     return tre_match_fast(preg, string, len, type, nmatch,
       pmatch, eflags);
@@ -227,7 +153,8 @@ tre_fastwnexec(const fastmatch_t *preg, 
   tre_str_type_t type = STR_WIDE;
 
   if (eflags & REG_STARTEND)
-    ADJUST_OFFSETS
+    CALL_WITH_OFFSET(tre_match_fast(preg, &string[offset], slen,
+		     type, nmatch, pmatch, eflags));
   else
     return tre_match_fast(preg, string, len, type, nmatch,
       pmatch, eflags);

Modified: user/gabor/grep/trunk/regex/glue.h
==============================================================================
--- user/gabor/grep/trunk/regex/glue.h	Tue Sep 20 21:53:46 2011	(r225701)
+++ user/gabor/grep/trunk/regex/glue.h	Tue Sep 20 21:54:43 2011	(r225702)
@@ -11,6 +11,7 @@
 
 #define TRE_WCHAR			1
 #define TRE_MULTIBYTE			1
+#define HAVE_MBSTATE_T			1
 
 #define TRE_CHAR(n) L##n
 
@@ -37,4 +38,29 @@
 #define MAX(a,b)			((a > b) ? (a) : (b))
 
 typedef enum { STR_WIDE, STR_BYTE, STR_MBS, STR_USER } tre_str_type_t;
+
+#define CALL_WITH_OFFSET(fn)						\
+  do									\
+    {									\
+      size_t slen = (size_t)(pmatch[0].rm_eo - pmatch[0].rm_so);	\
+      size_t offset = pmatch[0].rm_so;					\
+      int ret;								\
+									\
+      if ((long long)pmatch[0].rm_eo - pmatch[0].rm_so < 0)		\
+	return REG_NOMATCH;						\
+      ret = fn;								\
+      for (unsigned i = 0; (!(eflags & REG_NOSUB) && (i < nmatch)); i++)\
+	{								\
+	  pmatch[i].rm_so += offset;					\
+	  pmatch[i].rm_eo += offset;					\
+	}								\
+      return ret;							\
+    } while (0 /*CONSTCOND*/)
+
+int
+tre_convert_pattern(const char *regex, size_t n, tre_char_t **w,
+    size_t *wn);
+
+void
+tre_free_pattern(tre_char_t *wregex);
 #endif

Modified: user/gabor/grep/trunk/regex/tre-fastmatch.c
==============================================================================
--- user/gabor/grep/trunk/regex/tre-fastmatch.c	Tue Sep 20 21:53:46 2011	(r225701)
+++ user/gabor/grep/trunk/regex/tre-fastmatch.c	Tue Sep 20 21:54:43 2011	(r225702)
@@ -42,8 +42,8 @@
 #include "tre-fastmatch.h"
 #include "xmalloc.h"
 
-static int	fastcmp(const void *, const bool *, const void *, size_t,
-			tre_str_type_t, bool, bool);
+static int	fastcmp(const fastmatch_t *fg, const void *data,
+			tre_str_type_t type);
 
 /*
  * Clean up if pattern compilation fails.
@@ -97,24 +97,6 @@ static int	fastcmp(const void *, const b
     fg->pattern[siz] = '\0';						\
   }									\
 
-/*
- * Compares the pattern to the input string at the position
- * stored in startptr.
- */
-#define COMPARE								\
-  switch (type)								\
-    {									\
-      case STR_WIDE:							\
-	mismatch = fastcmp(fg->wpattern, fg->wescmap, startptr,		\
-			   fg->wlen, type,				\
-			   fg->icase, fg->newline);			\
-	break;								\
-      default:								\
-	mismatch = fastcmp(fg->pattern, fg->escmap, startptr,		\
-			   fg->len, type,				\
-			   fg->icase, fg->newline);			\
-      }									\
-
 #define IS_OUT_OF_BOUNDS						\
   ((!fg->reversed							\
     ? ((type == STR_WIDE) ? ((j + fg->wlen) > len)			\
@@ -154,7 +136,7 @@ static int	fastcmp(const void *, const b
 	      gs = fg->bmGs[mismatch];					\
 	    }								\
 	    bc = (r == HASH_OK) ? bc : fg->defBc;			\
-	    DPRINT(("tre_fast_match: mismatch on character %lc, "	\
+	    DPRINT(("tre_fast_match: mismatch on character" CHF ", "	\
 		    "BC %d, GS %d\n",					\
 		    ((const tre_char_t *)startptr)[mismatch + 1],	\
 		    bc, gs));						\
@@ -297,7 +279,7 @@ static int	fastcmp(const void *, const b
       r = hashtable_put(fg->qsBc_table, &fg->wpattern[i], &k);		\
       if ((r == HASH_FAIL) || (r == HASH_FULL))				\
 	FAIL_COMP(REG_ESPACE);						\
-      DPRINT(("BC shift for wide char %lc is %d\n", fg->wpattern[i],	\
+      DPRINT(("BC shift for wide char " CHF " is %d\n", fg->wpattern[i],\
 	     k));							\
       if (fg->icase)							\
 	{								\
@@ -306,7 +288,7 @@ static int	fastcmp(const void *, const b
 	  r = hashtable_put(fg->qsBc_table, &wc, &k);			\
 	  if ((r == HASH_FAIL) || (r == HASH_FULL))			\
 	    FAIL_COMP(REG_ESPACE);					\
-	  DPRINT(("BC shift for wide char %lc is %d\n", wc, k));	\
+	  DPRINT(("BC shift for wide char " CHF " is %d\n", wc, k));	\
 	}								\
     }
 
@@ -327,7 +309,7 @@ static int	fastcmp(const void *, const b
       r = hashtable_put(fg->qsBc_table, &fg->wpattern[i], &k);		\
       if ((r == HASH_FAIL) || (r == HASH_FULL))				\
 	FAIL_COMP(REG_ESPACE);						\
-      DPRINT(("Reverse BC shift for wide char %lc is %d\n",		\
+      DPRINT(("Reverse BC shift for wide char " CHF " is %d\n",		\
 	     fg->wpattern[i], k));					\
       if (fg->icase)							\
 	{								\
@@ -336,7 +318,8 @@ static int	fastcmp(const void *, const b
 	  r = hashtable_put(fg->qsBc_table, &wc, &k);			\
 	  if ((r == HASH_FAIL) || (r == HASH_FULL))			\
 	    FAIL_COMP(REG_ESPACE);					\
-	  DPRINT(("Reverse BC shift for wide char %lc is %d\n", wc, k));\
+	  DPRINT(("Reverse BC shift for wide char " CHF " is %d\n", wc,	\
+		 k));							\
 	}								\
     }
 
@@ -853,7 +836,7 @@ badpat:
  */
 int
 tre_match_fast(const fastmatch_t *fg, const void *data, size_t len,
-    tre_str_type_t type, int nmatch __unused, regmatch_t pmatch[], int eflags)
+    tre_str_type_t type, int nmatch, regmatch_t pmatch[], int eflags)
 {
   unsigned int shift, u = 0, v = 0;
   ssize_t j = 0;
@@ -878,7 +861,7 @@ tre_match_fast(const fastmatch_t *fg, co
   /* Shortcut for empty pattern */
   if (fg->matchall)
     {
-      if (!fg->nosub)
+      if (!fg->nosub && nmatch >= 1)
 	{
 	  pmatch[0].rm_so = 0;
 	  pmatch[0].rm_eo = len;
@@ -932,12 +915,12 @@ tre_match_fast(const fastmatch_t *fg, co
 	  /* Determine where in data to start search at. */
 	  j = fg->eol ? len - (type == STR_WIDE ? fg->wlen : fg->len) : 0;
 	  SKIP_CHARS(j);
-	  COMPARE;
+	  mismatch = fastcmp(fg, startptr, type);
 	  if (mismatch == REG_OK)
 	    {
 	      if (fg->word && !IS_ON_WORD_BOUNDARY)
 		return ret;
-	      if (!fg->nosub)
+	      if (!fg->nosub && nmatch >= 1)
 		{
 		  pmatch[0].rm_so = j;
 		  pmatch[0].rm_eo = j + (type == STR_WIDE ? fg->wlen : fg->len);
@@ -952,7 +935,7 @@ tre_match_fast(const fastmatch_t *fg, co
       do
 	{
 	  SKIP_CHARS(j);
-	  COMPARE;
+	  mismatch = fastcmp(fg, startptr, type);
 	  if (mismatch == REG_OK)
 	    {
 	      if (fg->word)
@@ -961,7 +944,7 @@ tre_match_fast(const fastmatch_t *fg, co
 		CHECK_BOL_ANCHOR;
 	      if (fg->eol)
 		CHECK_EOL_ANCHOR;
-	      if (!fg->nosub)
+	      if (!fg->nosub && nmatch >= 1)
 		{
 		  pmatch[0].rm_so = j;
 		  pmatch[0].rm_eo = j + ((type == STR_WIDE) ? fg->wlen : fg->len);
@@ -1008,14 +991,15 @@ tre_free_fast(fastmatch_t *fg)
  *		REG_OK on success
  */
 static inline int
-fastcmp(const void *pat, const bool *escmap, const void *data, size_t len,
-	tre_str_type_t type, bool icase, bool newline)
+fastcmp(const fastmatch_t *fg, const void *data, tre_str_type_t type)
 {
   const char *str_byte = data;
-  const char *pat_byte = pat;
-  int ret = REG_OK;
+  const char *pat_byte = fg->pattern;
   const tre_char_t *str_wide = data;
-  const tre_char_t *pat_wide = pat;
+  const tre_char_t *pat_wide = fg->wpattern;
+  const bool *escmap = (type == STR_WIDE) ? fg->wescmap : fg->escmap;
+  size_t len = (type == STR_WIDE) ? fg->wlen : fg->len;
+  int ret = REG_OK;
 
   /* Compare the pattern and the input char-by-char from the last position. */
   for (int i = len - 1; i >= 0; i--) {
@@ -1024,23 +1008,25 @@ fastcmp(const void *pat, const bool *esc
 	case STR_WIDE:
 
 	  /* Check dot */
-	  if (pat_wide[i] == TRE_CHAR('.') && (!escmap || !escmap[i]) &&
-	      (!newline || (str_wide[i] != TRE_CHAR('\n'))))
+	  if (fg->hasdot && pat_wide[i] == TRE_CHAR('.') &&
+	      (!escmap || !escmap[i]) &&
+	      (!fg->newline || (str_wide[i] != TRE_CHAR('\n'))))
 	    continue;
 
 	  /* Compare */
-	  if (icase ? (towlower(pat_wide[i]) == towlower(str_wide[i]))
+	  if (fg->icase ? (towlower(pat_wide[i]) == towlower(str_wide[i]))
 		    : (pat_wide[i] == str_wide[i]))
 	    continue;
 	  break;
 	default:
 	  /* Check dot */
-	  if (pat_byte[i] == '.' && (!escmap || !escmap[i]) &&
-	      (!newline || (str_byte[i] != '\n')))
+	  if (fg->hasdot && pat_byte[i] == '.' &&
+	      (!escmap || !escmap[i]) &&
+	      (!fg->newline || (str_byte[i] != '\n')))
 	    continue;
 
 	  /* Compare */
-	  if (icase ? (tolower(pat_byte[i]) == tolower(str_byte[i]))
+	  if (fg->icase ? (tolower(pat_byte[i]) == tolower(str_byte[i]))
 		    : (pat_byte[i] == str_byte[i]))
 	  continue;
       }



Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?201109202154.p8KLsh89046251>