Date: Thu, 20 Oct 2011 22:38:24 +0000 (UTC) From: Gabor Kovesdan <gabor@FreeBSD.org> To: src-committers@freebsd.org, svn-src-user@freebsd.org Subject: svn commit: r226590 - user/gabor/tre-integration/contrib/tre/lib Message-ID: <201110202238.p9KMcOu6094809@svn.freebsd.org>
next in thread | raw e-mail | index | archive | help
Author: gabor Date: Thu Oct 20 22:38:24 2011 New Revision: 226590 URL: http://svn.freebsd.org/changeset/base/226590 Log: - Fix return value in heuristic compiler - Implement longest fragment heuristic for REG_NEWLINE behavior Modified: user/gabor/tre-integration/contrib/tre/lib/regexec.c user/gabor/tre-integration/contrib/tre/lib/tre-heuristic.c user/gabor/tre-integration/contrib/tre/lib/tre-heuristic.h Modified: user/gabor/tre-integration/contrib/tre/lib/regexec.c ============================================================================== --- user/gabor/tre-integration/contrib/tre/lib/regexec.c Thu Oct 20 21:49:26 2011 (r226589) +++ user/gabor/tre-integration/contrib/tre/lib/regexec.c Thu Oct 20 22:38:24 2011 (r226590) @@ -165,10 +165,10 @@ tre_match(const tre_tnfa_t *tnfa, const pmatch, eflags); } -#define FIX_OFFSETS \ +#define FIX_OFFSETS(adj) \ if (ret == REG_NOMATCH) \ { \ - st += n; \ + adj; \ continue; \ } \ else if ((ret == REG_OK) && !(tnfa->cflags & REG_NOSUB)) \ @@ -191,57 +191,94 @@ tre_match(const tre_tnfa_t *tnfa, const const char *data_byte = string; const tre_char_t *data_wide = string; - DPRINT(("tre_match: using a heuristic [%s/%s] to speed up the " - "search\n", heur->start->pattern, heur->end->pattern)); - - while (st < len) + if (heur->type == HEUR_LONGEST) { - SEEK_TO(st); - - /* Prefix heuristic */ - ret = tre_match_fast(heur->heurs[0], string, len - st, type, nmatch, - pmatch, eflags); - if (ret != REG_OK) - return ret; - st += pmatch[0].rm_so; - n = pmatch[0].rm_eo; - - /* Intermediate heuristics */ - while (!((heur->heurs[i] == NULL) || - (heur->prefix && heur->heurs[i + 1] == NULL))) + while (st < len) { - SEEK_TO(st + n); - ret = tre_match_fast(heur->heurs[i], string, len - st - n, type, - nmatch, pmatch, eflags); - if (ret != REG_OK) - return ret; - n += pmatch[0].rm_eo; - i++; - } + size_t eo, so; - /* Suffix heuristic available */ - if (heur->prefix && heur->heurs[i] != NULL) - { - SEEK_TO(st + n); - ret = tre_match_fast(heur->heurs[i], string, len - st - n, type, - nmatch, pmatch, eflags); + SEEK_TO(st); + ret = tre_match_fast(heur->heurs[0], string, len - st, type, nmatch, + pmatch, eflags); if (ret != REG_OK) return ret; - n += pmatch[0].rm_eo; - SEEK_TO(st); - ret = tre_match(tnfa, string, n, type, nmatch, pmatch, - eflags, NULL, NULL); - FIX_OFFSETS; - } - /* Suffix heuristic not available */ - else + for (so = st + pmatch[0].rm_so - 1; ; so--) + { + if ((type == STR_WIDE) ? (data_wide[so] == TRE_CHAR('\n')) : + (data_byte[so] == '\n')) + break; + if (so == 0) + break; + } + + for (eo = st + pmatch[0].rm_eo; st + eo < len; eo++) + { + if ((type == STR_WIDE) ? (data_wide[eo] == TRE_CHAR('\n')) : + (data_byte[eo] == '\n')) + break; + } + + SEEK_TO(so); + ret = tre_match(tnfa, string, eo - so, type, nmatch, pmatch, eflags, NULL, NULL); + FIX_OFFSETS(st = eo); + + } + return REG_NOMATCH; + } + else + { + while (st < len) { SEEK_TO(st); - ret = tre_match(tnfa, string, len - st, type, nmatch, pmatch, - eflags, NULL, NULL); - FIX_OFFSETS; - } + + /* Prefix heuristic */ + ret = tre_match_fast(heur->heurs[0], string, len - st, + type, nmatch, pmatch, eflags); + if (ret != REG_OK) + return ret; + st += pmatch[0].rm_so; + n = pmatch[0].rm_eo; + + /* Intermediate heuristics */ + while (!((heur->heurs[i] == NULL) || + ((heur->type == HEUR_PREFIX_ARRAY) && + heur->heurs[i + 1] == NULL))) + { + SEEK_TO(st + n); + ret = tre_match_fast(heur->heurs[i], string, len - st - n, + type, nmatch, pmatch, eflags); + if (ret != REG_OK) + return ret; + n += pmatch[0].rm_eo; + i++; + } + + /* Suffix heuristic available */ + if ((heur->type == HEUR_ARRAY) && heur->heurs[i] != NULL) + { + SEEK_TO(st + n); + ret = tre_match_fast(heur->heurs[i], string, len - st - n, + type, nmatch, pmatch, eflags); + if (ret != REG_OK) + return ret; + n += pmatch[0].rm_eo; + + SEEK_TO(st); + ret = tre_match(tnfa, string, n, type, nmatch, pmatch, + eflags, NULL, NULL); + FIX_OFFSETS(st += n); + } + /* Suffix heuristic not available */ + else + { + SEEK_TO(st); + ret = tre_match(tnfa, string, len - st, type, nmatch, + pmatch, eflags, NULL, NULL); + FIX_OFFSETS(st += n); + } + } + return REG_NOMATCH; } } Modified: user/gabor/tre-integration/contrib/tre/lib/tre-heuristic.c ============================================================================== --- user/gabor/tre-integration/contrib/tre/lib/tre-heuristic.c Thu Oct 20 21:49:26 2011 (r226589) +++ user/gabor/tre-integration/contrib/tre/lib/tre-heuristic.c Thu Oct 20 22:38:24 2011 (r226590) @@ -133,6 +133,8 @@ tre_compile_heur(heur_t *h, const tre_ch if (!heur) return REG_ESPACE; + h->type = HEUR_ARRAY; + while (true) { @@ -316,7 +318,7 @@ end_segment: errcode = REG_BADPAT; goto err; } - h->prefix = true; + h->type = HEUR_PREFIX_ARRAY; goto ok; } @@ -340,7 +342,6 @@ end_segment: } ok: - { size_t m = 1; int ret; @@ -348,12 +349,6 @@ ok: for (int i = 1; i < j; i++) m = (length[i] > length[m]) ? i : m; - if (!h->heurs) - { - errcode = REG_ESPACE; - goto err; - } - for (int i = 0; i < MIN(3, j + 1); i++) { h->heurs[i] = xmalloc(sizeof(fastmatch_t)); @@ -371,30 +366,41 @@ ok: goto err2; \ } - ret = tre_compile_fast(h->heurs[0], arr[0], length[0], 0); - CHECK_ERR - if (j == 1) + if (cflags & REG_NEWLINE) { - xfree(h->heurs[1]); - h->heurs[1] = NULL; - goto finish; + ret = tre_compile_fast(h->heurs[0], arr[m], length[m], 0); + CHECK_ERR + h->type = HEUR_LONGEST; } else - ret = tre_compile_fast(h->heurs[1], arr[m], length[m], 0); - CHECK_ERR - if (h->prefix || (m == j - 1)) { - xfree(h->heurs[2]); - h->heurs[2] = NULL; - goto finish; + ret = tre_compile_fast(h->heurs[0], arr[0], length[0], 0); + CHECK_ERR + if (j == 1) + { + free(h->heurs[1]); + h->heurs[1] = NULL; + errcode = REG_OK; + goto finish; + } + else + ret = tre_compile_fast(h->heurs[1], arr[m], length[m], 0); + CHECK_ERR + if ((h->type == HEUR_PREFIX_ARRAY) || (m == j - 1)) + { + xfree(h->heurs[2]); + h->heurs[2] = NULL; + errcode = REG_OK; + goto finish; + } + else + ret = tre_compile_fast(h->heurs[2], arr[j - 1], length[j - 1], 0); + CHECK_ERR + h->heurs[3] = NULL; } - else - ret = tre_compile_fast(h->heurs[2], arr[j - 1], length[j - 1], 0); - CHECK_ERR - h->heurs[3] = NULL; - errcode = REG_OK; - goto finish; + errcode = REG_OK; + goto finish; } err2: Modified: user/gabor/tre-integration/contrib/tre/lib/tre-heuristic.h ============================================================================== --- user/gabor/tre-integration/contrib/tre/lib/tre-heuristic.h Thu Oct 20 21:49:26 2011 (r226589) +++ user/gabor/tre-integration/contrib/tre/lib/tre-heuristic.h Thu Oct 20 22:38:24 2011 (r226590) @@ -7,10 +7,13 @@ #include "tre-fastmatch.h" #include "tre-internal.h" +#define HEUR_ARRAY 0 +#define HEUR_PREFIX_ARRAY 1 +#define HEUR_LONGEST 2 + typedef struct { fastmatch_t *heurs[4]; - bool prefix; - bool newline; + int type; } heur_t; extern int tre_compile_heur(heur_t *h, const tre_char_t *regex,
Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?201110202238.p9KMcOu6094809>