From owner-svn-src-user@FreeBSD.ORG Sat Oct 22 09:38:03 2011 Return-Path: Delivered-To: svn-src-user@freebsd.org Received: from mx1.freebsd.org (mx1.freebsd.org [IPv6:2001:4f8:fff6::34]) by hub.freebsd.org (Postfix) with ESMTP id 9BC8D106566B; Sat, 22 Oct 2011 09:38:03 +0000 (UTC) (envelope-from gabor@FreeBSD.org) Received: from svn.freebsd.org (svn.freebsd.org [IPv6:2001:4f8:fff6::2c]) by mx1.freebsd.org (Postfix) with ESMTP id 8B7A68FC0C; Sat, 22 Oct 2011 09:38:03 +0000 (UTC) Received: from svn.freebsd.org (localhost [127.0.0.1]) by svn.freebsd.org (8.14.4/8.14.4) with ESMTP id p9M9c3oh071157; Sat, 22 Oct 2011 09:38:03 GMT (envelope-from gabor@svn.freebsd.org) Received: (from gabor@localhost) by svn.freebsd.org (8.14.4/8.14.4/Submit) id p9M9c3tH071154; Sat, 22 Oct 2011 09:38:03 GMT (envelope-from gabor@svn.freebsd.org) Message-Id: <201110220938.p9M9c3tH071154@svn.freebsd.org> From: Gabor Kovesdan Date: Sat, 22 Oct 2011 09:38:03 +0000 (UTC) To: src-committers@freebsd.org, svn-src-user@freebsd.org X-SVN-Group: user MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Cc: Subject: svn commit: r226627 - user/gabor/tre-integration/contrib/tre/lib X-BeenThere: svn-src-user@freebsd.org X-Mailman-Version: 2.1.5 Precedence: list List-Id: "SVN commit messages for the experimental " user" src tree" List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , X-List-Received-Date: Sat, 22 Oct 2011 09:38:03 -0000 Author: gabor Date: Sat Oct 22 09:38:03 2011 New Revision: 226627 URL: http://svn.freebsd.org/changeset/base/226627 Log: - Drop dots from heuristics because they decrease efficiency - Count pattern length for future usage Modified: user/gabor/tre-integration/contrib/tre/lib/tre-heuristic.c user/gabor/tre-integration/contrib/tre/lib/tre-heuristic.h Modified: user/gabor/tre-integration/contrib/tre/lib/tre-heuristic.c ============================================================================== --- user/gabor/tre-integration/contrib/tre/lib/tre-heuristic.c Sat Oct 22 09:29:44 2011 (r226626) +++ user/gabor/tre-integration/contrib/tre/lib/tre-heuristic.c Sat Oct 22 09:38:03 2011 (r226627) @@ -97,24 +97,26 @@ /* * Finishes a segment (fixed-length text fragment). */ -#define END_SEGMENT \ +#define END_SEGMENT(varlen) \ do \ { \ + if (varlen) \ + tlen = -1; \ st = i + 1; \ escaped = false; \ goto end_segment; \ } while (0) -#define STORE_CHAR(esc) \ +#define STORE_CHAR \ do \ { \ - if (esc) \ - heur[pos++] = TRE_CHAR('\\'); \ heur[pos++] = regex[i]; \ escaped = false; \ + tlen = (tlen == -1) ? -1 : tlen + 1; \ continue; \ } while (0) +#define DEC_POS pos = (pos == 0) ? 0 : pos - 1; /* * Parses a regular expression and constructs a heuristic in heur_t and @@ -126,6 +128,7 @@ tre_compile_heur(heur_t *h, const tre_ch { tre_char_t *arr[MAX_FRAGMENTS], *heur; size_t length[MAX_FRAGMENTS]; + ssize_t tlen = 0; int errcode, j = 0, pos = 0, st = 0; bool escaped = false; @@ -154,17 +157,17 @@ tre_compile_heur(heur_t *h, const tre_ch { /* - * Bracketed expression is substituted with a dot or the + * Bracketed expression ends the segment or the * brackets are treated as normal if at least the opening * bracket is escaped. */ case TRE_CHAR('['): if (escaped) - STORE_CHAR(true); + STORE_CHAR; else { PARSE_BRACKETS; - heur[pos++] = TRE_CHAR('.'); + END_SEGMENT(true); } continue; @@ -175,20 +178,20 @@ tre_compile_heur(heur_t *h, const tre_ch */ case TRE_CHAR('{'): if (escaped && (i == 1)) - STORE_CHAR(true); + STORE_CHAR; else if ((i == 0) && !(cflags & REG_EXTENDED)) - STORE_CHAR(true); + STORE_CHAR; else if ((i == 0) && (cflags & REG_EXTENDED)) continue; PARSE_UNIT('{', '}'); if (escaped ^ (cflags & REG_EXTENDED)) { - pos--; - END_SEGMENT; + DEC_POS; + END_SEGMENT(true); } else - STORE_CHAR(cflags & REG_EXTENDED); + STORE_CHAR; continue; /* @@ -199,10 +202,10 @@ tre_compile_heur(heur_t *h, const tre_ch if (escaped ^ (cflags & REG_EXTENDED)) { PARSE_UNIT('(', ')'); - END_SEGMENT; + END_SEGMENT(true); } else - STORE_CHAR(cflags & REG_EXTENDED); + STORE_CHAR; continue; /* @@ -212,7 +215,7 @@ tre_compile_heur(heur_t *h, const tre_ch */ case TRE_CHAR('\\'): if (escaped) - STORE_CHAR(true); + STORE_CHAR; else escaped = true; continue; @@ -225,11 +228,11 @@ tre_compile_heur(heur_t *h, const tre_ch */ case TRE_CHAR('*'): if (escaped || (!(cflags & REG_EXTENDED) && (i == 0))) - STORE_CHAR(true); + STORE_CHAR; else if ((i != 0)) { - pos--; - END_SEGMENT; + DEC_POS; + END_SEGMENT(true); } continue; @@ -245,9 +248,9 @@ tre_compile_heur(heur_t *h, const tre_ch if ((cflags & REG_EXTENDED) && (i == 0)) continue; else if ((cflags & REG_EXTENDED) ^ escaped) - END_SEGMENT; - else - STORE_CHAR(cflags & REG_EXTENDED); + END_SEGMENT(true); + else + STORE_CHAR; continue; /* @@ -262,11 +265,11 @@ tre_compile_heur(heur_t *h, const tre_ch continue; if ((cflags & REG_EXTENDED) ^ escaped) { - pos--; - END_SEGMENT; + DEC_POS; + END_SEGMENT(true); } else - STORE_CHAR(true); + STORE_CHAR; continue; /* @@ -279,17 +282,22 @@ tre_compile_heur(heur_t *h, const tre_ch goto err; } else if (!(cflags & REG_EXTENDED) && escaped) - END_SEGMENT; + { + errcode = REG_BADPAT; + goto err; + } else - STORE_CHAR(cflags & REG_EXTENDED); + STORE_CHAR; continue; - /* - * Cut the segment at an escaped dot because the fast matcher - * cannot handle it. - */ case TRE_CHAR('.'): - STORE_CHAR(escaped); + if (escaped) + STORE_CHAR; + else + { + tlen = (tlen == -1) ? -1 : tlen + 1; + END_SEGMENT(false); + } continue; /* @@ -299,9 +307,9 @@ tre_compile_heur(heur_t *h, const tre_ch */ default: if (escaped) - END_SEGMENT; + END_SEGMENT(true); else - STORE_CHAR(false); + STORE_CHAR; continue; } } @@ -321,6 +329,8 @@ end_segment: h->type = HEUR_PREFIX_ARRAY; goto ok; } + else if (pos == 0) + continue; if (j == MAX_FRAGMENTS) { @@ -346,6 +356,8 @@ ok: size_t m = 1; int ret; + h->tlen = tlen; + for (int i = 1; i < j; i++) m = (length[i] > length[m]) ? i : m; @@ -368,13 +380,13 @@ ok: if (cflags & REG_NEWLINE) { - ret = tre_compile_fast(h->heurs[0], arr[m], length[m], 0); + ret = tre_compile_literal(h->heurs[0], arr[m], length[m], 0); CHECK_ERR h->type = HEUR_LONGEST; } else { - ret = tre_compile_fast(h->heurs[0], arr[0], length[0], 0); + ret = tre_compile_literal(h->heurs[0], arr[0], length[0], 0); CHECK_ERR if (j == 1) { @@ -384,7 +396,7 @@ ok: goto finish; } else - ret = tre_compile_fast(h->heurs[1], arr[m], length[m], 0); + ret = tre_compile_literal(h->heurs[1], arr[m], length[m], 0); CHECK_ERR if ((h->type == HEUR_PREFIX_ARRAY) || (m == j - 1)) { @@ -394,7 +406,7 @@ ok: goto finish; } else - ret = tre_compile_fast(h->heurs[2], arr[j - 1], length[j - 1], 0); + ret = tre_compile_literal(h->heurs[2], arr[j - 1], length[j - 1], 0); CHECK_ERR h->heurs[3] = NULL; } Modified: user/gabor/tre-integration/contrib/tre/lib/tre-heuristic.h ============================================================================== --- user/gabor/tre-integration/contrib/tre/lib/tre-heuristic.h Sat Oct 22 09:29:44 2011 (r226626) +++ user/gabor/tre-integration/contrib/tre/lib/tre-heuristic.h Sat Oct 22 09:38:03 2011 (r226627) @@ -13,6 +13,7 @@ typedef struct { fastmatch_t *heurs[4]; + ssize_t tlen; int type; } heur_t;