From owner-svn-src-user@FreeBSD.ORG Wed Feb 15 11:11:59 2012 Return-Path: Delivered-To: svn-src-user@freebsd.org Received: from mx1.freebsd.org (mx1.freebsd.org [IPv6:2001:4f8:fff6::34]) by hub.freebsd.org (Postfix) with ESMTP id 6919B106564A; Wed, 15 Feb 2012 11:11:59 +0000 (UTC) (envelope-from gabor@FreeBSD.org) Received: from svn.freebsd.org (svn.freebsd.org [IPv6:2001:4f8:fff6::2c]) by mx1.freebsd.org (Postfix) with ESMTP id 5242F8FC08; Wed, 15 Feb 2012 11:11:59 +0000 (UTC) Received: from svn.freebsd.org (localhost [127.0.0.1]) by svn.freebsd.org (8.14.4/8.14.4) with ESMTP id q1FBBxNC058064; Wed, 15 Feb 2012 11:11:59 GMT (envelope-from gabor@svn.freebsd.org) Received: (from gabor@localhost) by svn.freebsd.org (8.14.4/8.14.4/Submit) id q1FBBxni058056; Wed, 15 Feb 2012 11:11:59 GMT (envelope-from gabor@svn.freebsd.org) Message-Id: <201202151111.q1FBBxni058056@svn.freebsd.org> From: Gabor Kovesdan Date: Wed, 15 Feb 2012 11:11:59 +0000 (UTC) To: src-committers@freebsd.org, svn-src-user@freebsd.org X-SVN-Group: user MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Cc: Subject: svn commit: r231755 - user/gabor/tre-integration/contrib/tre/lib X-BeenThere: svn-src-user@freebsd.org X-Mailman-Version: 2.1.5 Precedence: list List-Id: "SVN commit messages for the experimental " user" src tree" List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , X-List-Received-Date: Wed, 15 Feb 2012 11:11:59 -0000 Author: gabor Date: Wed Feb 15 11:11:58 2012 New Revision: 231755 URL: http://svn.freebsd.org/changeset/base/231755 Log: - Separate different compile logics for better later reuse and readability - Rename some functions that belong to a lower layer for clarity Modified: user/gabor/tre-integration/contrib/tre/lib/fastmatch.c user/gabor/tre-integration/contrib/tre/lib/tre-compile.c user/gabor/tre-integration/contrib/tre/lib/tre-compile.h user/gabor/tre-integration/contrib/tre/lib/tre-fastmatch.c user/gabor/tre-integration/contrib/tre/lib/tre-fastmatch.h user/gabor/tre-integration/contrib/tre/lib/tre-heuristic.c user/gabor/tre-integration/contrib/tre/lib/tre-heuristic.h Modified: user/gabor/tre-integration/contrib/tre/lib/fastmatch.c ============================================================================== --- user/gabor/tre-integration/contrib/tre/lib/fastmatch.c Wed Feb 15 10:33:29 2012 (r231754) +++ user/gabor/tre-integration/contrib/tre/lib/fastmatch.c Wed Feb 15 11:11:58 2012 (r231755) @@ -51,12 +51,12 @@ tre_fixncomp(fastmatch_t *preg, const ch if (ret != REG_OK) return ret; else - ret = tre_compile_literal(preg, wregex, wlen, cflags); + ret = tre_proc_literal(preg, wregex, wlen, cflags); tre_free_pattern(wregex); return ret; } else - return tre_compile_literal(preg, NULL, 0, cflags); + return tre_proc_literal(preg, NULL, 0, cflags); } int @@ -73,13 +73,13 @@ tre_fastncomp(fastmatch_t *preg, const c return ret; else ret = (cflags & REG_LITERAL) - ? tre_compile_literal(preg, wregex, wlen, cflags) - : tre_compile_fast(preg, wregex, wlen, cflags); + ? tre_proc_literal(preg, wregex, wlen, cflags) + : tre_proc_fast(preg, wregex, wlen, cflags); tre_free_pattern(wregex); return ret; } else - return tre_compile_literal(preg, NULL, 0, cflags); + return tre_proc_literal(preg, NULL, 0, cflags); } @@ -98,15 +98,15 @@ tre_fastcomp(fastmatch_t *preg, const ch int tre_fixwncomp(fastmatch_t *preg, const wchar_t *regex, size_t n, int cflags) { - return tre_compile_literal(preg, regex, n, cflags); + return tre_proc_literal(preg, regex, n, cflags); } int tre_fastwncomp(fastmatch_t *preg, const wchar_t *regex, size_t n, int cflags) { return (cflags & REG_LITERAL) ? - tre_compile_literal(preg, regex, n, cflags) : - tre_compile_fast(preg, regex, n, cflags); + tre_proc_literal(preg, regex, n, cflags) : + tre_proc_fast(preg, regex, n, cflags); } int Modified: user/gabor/tre-integration/contrib/tre/lib/tre-compile.c ============================================================================== --- user/gabor/tre-integration/contrib/tre/lib/tre-compile.c Wed Feb 15 10:33:29 2012 (r231754) +++ user/gabor/tre-integration/contrib/tre/lib/tre-compile.c Wed Feb 15 11:11:58 2012 (r231755) @@ -1947,49 +1947,107 @@ tre_free_pattern(tre_char_t *wregex) int tre_compile(regex_t *preg, const tre_char_t *regex, size_t n, int cflags) { + int ret; + + /* + * First, we always compile the NFA and it also serves as + * pattern validation. In this way, validation is not + * scattered through the code. + */ + ret = tre_compile_nfa(preg, regex, n, cflags); + if (ret != REG_OK) + return ret; + + /* + * Check if we can cheat with a fixed string algorithm + * if the pattern is long enough. + */ + ret = tre_compile_bm(preg, regex, n, cflags); + + /* Only try to compile heuristic if the fast matcher failed. */ + if (ret != REG_OK) + ret = tre_compile_heur(preg, regex, n, cflags); + else + preg->heur = NULL; + + /* When here, at least NFA surely succeeded. */ + return REG_OK; +} + +int +tre_compile_bm(regex_t *preg, const tre_char_t *regex, size_t n, int cflags) +{ + fastmatch_t *shortcut; + int ret; + + if (n < 2) + goto too_short; + shortcut = xmalloc(sizeof(fastmatch_t)); + if (!shortcut) + return REG_ESPACE; + ret = (cflags & REG_LITERAL) + ? tre_proc_literal(shortcut, regex, n, cflags) + : tre_proc_fast(shortcut, regex, n, cflags); + if (ret == REG_OK) + { + preg->shortcut = shortcut; + DPRINT("tre_compile_bm: pattern compiled for fast matcher\n"); + } + else + { +too_short: + xfree(shortcut); + preg->shortcut = NULL; + DPRINT("tre_compile_bm: pattern compilation failed for fast matcher\n"); + } + return ret; +} + +int +tre_compile_heur(regex_t *preg, const tre_char_t *regex, size_t n, int cflags) +{ + heur_t *heur; + int ret; + + heur = xmalloc(sizeof(heur_t)); + if (!heur) + return REG_ESPACE; + + ret = tre_proc_heur(heur, regex, n, cflags); + if (ret != REG_OK) + { + xfree(heur); + preg->heur = NULL; + DPRINT("tre_compile_heur: heuristic compilation failed, NFA will be used " + "entirely\n"); + } + else + { + preg->heur = heur; + DPRINT("tre_compile_heur: heuristic compiled to speed up the search\n"); + } + + return ret; +} + +int +tre_compile_nfa(regex_t *preg, const tre_char_t *regex, size_t n, int cflags) +{ tre_stack_t *stack; tre_ast_node_t *tree, *tmp_ast_l, *tmp_ast_r; tre_pos_and_tags_t *p; int *counts = NULL, *offs = NULL; - int i, add = 0, ret; + int i, add = 0; tre_tnfa_transition_t *transitions, *initial; tre_tnfa_t *tnfa = NULL; tre_submatch_data_t *submatch_data; tre_tag_direction_t *tag_directions = NULL; reg_errcode_t errcode; tre_mem_t mem; - fastmatch_t *shortcut; - heur_t *heur; /* Parse context. */ tre_parse_ctx_t parse_ctx; - /* - * Check if we can cheat with a fixed string algorithm - * if the pattern is long enough. - */ - if (n >= 2) - { - shortcut = xmalloc(sizeof(fastmatch_t)); - if (!shortcut) - return REG_ESPACE; - ret = (cflags & REG_LITERAL) - ? tre_compile_literal(shortcut, regex, n, cflags) - : tre_compile_fast(shortcut, regex, n, cflags); - if (ret == REG_OK) - { - preg->shortcut = shortcut; - preg->re_nsub = 0; - DPRINT("tre_compile: pattern compiled for fast matcher\n"); - } - else - { - xfree(shortcut); - preg->shortcut = NULL; - DPRINT("tre_compile: pattern compilation failed for fast matcher\n"); - } - } - /* Allocate a stack used throughout the compilation process for various purposes. */ stack = tre_stack_new(512, 10240, 128); @@ -2008,7 +2066,7 @@ tre_compile(regex_t *preg, const tre_cha parse_ctx.len = n; parse_ctx.cflags = cflags; parse_ctx.max_backref = -1; - DPRINT(("tre_compile: parsing '%.*" STRF "'\n", (int)n, regex)); + DPRINT(("tre_compile_nfa: parsing '%.*" STRF "'\n", (int)n, regex)); errcode = tre_parse(&parse_ctx); if (errcode != REG_OK) ERROR_EXIT(errcode); @@ -2040,7 +2098,7 @@ tre_compile(regex_t *preg, const tre_cha regexp does not have back references, this can be skipped. */ if (tnfa->have_backrefs || !(cflags & REG_NOSUB)) { - DPRINT(("tre_compile: setting up tags\n")); + DPRINT(("tre_compile_nfa: setting up tags\n")); /* Figure out how many tags we will need. */ errcode = tre_add_tags(NULL, stack, tree, tnfa); @@ -2277,42 +2335,10 @@ tre_compile(regex_t *preg, const tre_cha preg->TRE_REGEX_T_FIELD = (void *)tnfa; - /* - * If we reach here, the regex is parsed and legal. Now we try to construct - * a heuristic to speed up matching if we do not already have a shortcut - * pattern. - */ - if (!preg->shortcut) - { - heur = xmalloc(sizeof(heur_t)); - if (!heur) - ERROR_EXIT(REG_ESPACE); - - ret = tre_compile_heur(heur, regex, n, cflags); - if (ret != REG_OK) - { - xfree(heur); - preg->heur = NULL; - DPRINT("tre_compile: heuristic compilation failed, NFA will be used " - "entirely\n"); - } - else - { - preg->heur = heur; - DPRINT("tre_compile: heuristic compiled to speed up the search\n"); - } - } - else - preg->heur = NULL; - return REG_OK; error_exit: /* Free everything that was allocated and return the error code. */ - if (shortcut != NULL) - xfree(shortcut); - if (heur != NULL) - xfree(heur); if (mem != NULL) tre_mem_destroy(mem); if (stack != NULL) Modified: user/gabor/tre-integration/contrib/tre/lib/tre-compile.h ============================================================================== --- user/gabor/tre-integration/contrib/tre/lib/tre-compile.h Wed Feb 15 10:33:29 2012 (r231754) +++ user/gabor/tre-integration/contrib/tre/lib/tre-compile.h Wed Feb 15 11:11:58 2012 (r231755) @@ -10,6 +10,8 @@ #ifndef TRE_COMPILE_H #define TRE_COMPILE_H 1 +#include + typedef struct { int position; int code_min; @@ -22,6 +24,13 @@ typedef struct { int *params; } tre_pos_and_tags_t; +int tre_compile_bm(regex_t *preg, const tre_char_t *regex, size_t n, + int cflags); +int tre_compile_heur(regex_t *preg, const tre_char_t *regex, size_t n, + int cflags); +int tre_compile_nfa(regex_t *preg, const tre_char_t *regex, size_t n, + int cflags); + #endif /* TRE_COMPILE_H */ /* EOF */ Modified: user/gabor/tre-integration/contrib/tre/lib/tre-fastmatch.c ============================================================================== --- user/gabor/tre-integration/contrib/tre/lib/tre-fastmatch.c Wed Feb 15 10:33:29 2012 (r231754) +++ user/gabor/tre-integration/contrib/tre/lib/tre-fastmatch.c Wed Feb 15 11:11:58 2012 (r231755) @@ -402,8 +402,8 @@ static int fastcmp(const fastmatch_t *fg * Returns: REG_OK on success, error code otherwise */ int -tre_compile_literal(fastmatch_t *fg, const tre_char_t *pat, size_t n, - int cflags) +tre_proc_literal(fastmatch_t *fg, const tre_char_t *pat, size_t n, + int cflags) { INIT_COMP; @@ -421,7 +421,7 @@ tre_compile_literal(fastmatch_t *fg, con SAVE_PATTERN(pat, n, fg->pattern, fg->len); #endif - DPRINT(("tre_compile_literal: pattern: %s, len %zu, icase: %c, word: %c, " + DPRINT(("tre_proc_literal: pattern: %s, len %zu, icase: %c, word: %c, " "newline %c\n", fg->pattern, fg->len, fg->icase ? 'y' : 'n', fg->word ? 'y' : 'n', fg->newline ? 'y' : 'n')); @@ -439,8 +439,8 @@ tre_compile_literal(fastmatch_t *fg, con * Returns: REG_OK on success, error code otherwise */ int -tre_compile_fast(fastmatch_t *fg, const tre_char_t *pat, size_t n, - int cflags) +tre_proc_fast(fastmatch_t *fg, const tre_char_t *pat, size_t n, + int cflags) { tre_char_t *tmp; size_t pos = 0; @@ -563,7 +563,7 @@ tre_compile_fast(fastmatch_t *fg, const continue; badpat: xfree(tmp); - DPRINT(("tre_compile_fast: compilation of pattern failed, falling" + DPRINT(("tre_proc_fast: compilation of pattern failed, falling" "back to NFA\n")); return REG_BADPAT; } @@ -582,7 +582,7 @@ badpat: xfree(tmp); - DPRINT(("tre_compile_fast: pattern: %s, len %zu, bol %c, eol %c, " + DPRINT(("tre_proc_fast: pattern: %s, len %zu, bol %c, eol %c, " "icase: %c, word: %c, newline %c\n", fg->pattern, fg->len, fg->bol ? 'y' : 'n', fg->eol ? 'y' : 'n', fg->icase ? 'y' : 'n', fg->word ? 'y' : 'n', Modified: user/gabor/tre-integration/contrib/tre/lib/tre-fastmatch.h ============================================================================== --- user/gabor/tre-integration/contrib/tre/lib/tre-fastmatch.h Wed Feb 15 10:33:29 2012 (r231754) +++ user/gabor/tre-integration/contrib/tre/lib/tre-fastmatch.h Wed Feb 15 11:11:58 2012 (r231755) @@ -9,9 +9,9 @@ #include "hashtable.h" #include "tre-internal.h" -int tre_compile_literal(fastmatch_t *preg, const tre_char_t *regex, +int tre_proc_literal(fastmatch_t *preg, const tre_char_t *regex, size_t, int); -int tre_compile_fast(fastmatch_t *preg, const tre_char_t *regex, size_t, int); +int tre_proc_fast(fastmatch_t *preg, const tre_char_t *regex, size_t, int); int tre_match_fast(const fastmatch_t *fg, const void *data, size_t len, tre_str_type_t type, int nmatch, regmatch_t pmatch[], int eflags); void tre_free_fast(fastmatch_t *preg); Modified: user/gabor/tre-integration/contrib/tre/lib/tre-heuristic.c ============================================================================== --- user/gabor/tre-integration/contrib/tre/lib/tre-heuristic.c Wed Feb 15 10:33:29 2012 (r231754) +++ user/gabor/tre-integration/contrib/tre/lib/tre-heuristic.c Wed Feb 15 11:11:58 2012 (r231755) @@ -124,7 +124,7 @@ * heuristic cannot be constructed. */ int -tre_compile_heur(heur_t *h, const tre_char_t *regex, size_t len, int cflags) +tre_proc_heur(heur_t *h, const tre_char_t *regex, size_t len, int cflags) { tre_char_t **arr, *heur; tre_char_t **farr; @@ -513,7 +513,7 @@ ok: errcode = REG_ESPACE; goto err; } - ret = tre_compile_literal(h->heurs[i], farr[i], fsiz[i], 0); + ret = tre_proc_literal(h->heurs[i], farr[i], fsiz[i], 0); if (ret != REG_OK) { errcode = REG_BADPAT; Modified: user/gabor/tre-integration/contrib/tre/lib/tre-heuristic.h ============================================================================== --- user/gabor/tre-integration/contrib/tre/lib/tre-heuristic.h Wed Feb 15 10:33:29 2012 (r231754) +++ user/gabor/tre-integration/contrib/tre/lib/tre-heuristic.h Wed Feb 15 11:11:58 2012 (r231755) @@ -23,8 +23,8 @@ typedef struct { int type; } heur_t; -extern int tre_compile_heur(heur_t *h, const tre_char_t *regex, - size_t len, int cflags); +extern int tre_proc_heur(heur_t *h, const tre_char_t *regex, + size_t len, int cflags); extern void tre_free_heur(heur_t *h); #endif /* TRE_HEURISTIC_H */