From owner-svn-src-user@FreeBSD.ORG Sat Jul 2 20:14:40 2011 Return-Path: Delivered-To: svn-src-user@freebsd.org Received: from mx1.freebsd.org (mx1.freebsd.org [IPv6:2001:4f8:fff6::34]) by hub.freebsd.org (Postfix) with ESMTP id D8E0B106564A; Sat, 2 Jul 2011 20:14:40 +0000 (UTC) (envelope-from gabor@FreeBSD.org) Received: from svn.freebsd.org (svn.freebsd.org [IPv6:2001:4f8:fff6::2c]) by mx1.freebsd.org (Postfix) with ESMTP id C7CDB8FC18; Sat, 2 Jul 2011 20:14:40 +0000 (UTC) Received: from svn.freebsd.org (localhost [127.0.0.1]) by svn.freebsd.org (8.14.4/8.14.4) with ESMTP id p62KEewK054193; Sat, 2 Jul 2011 20:14:40 GMT (envelope-from gabor@svn.freebsd.org) Received: (from gabor@localhost) by svn.freebsd.org (8.14.4/8.14.4/Submit) id p62KEefU054187; Sat, 2 Jul 2011 20:14:40 GMT (envelope-from gabor@svn.freebsd.org) Message-Id: <201107022014.p62KEefU054187@svn.freebsd.org> From: Gabor Kovesdan Date: Sat, 2 Jul 2011 20:14:40 +0000 (UTC) To: src-committers@freebsd.org, svn-src-user@freebsd.org X-SVN-Group: user MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Cc: Subject: svn commit: r223726 - user/gabor/tre-integration/contrib/tre/lib X-BeenThere: svn-src-user@freebsd.org X-Mailman-Version: 2.1.5 Precedence: list List-Id: "SVN commit messages for the experimental " user" src tree" List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , X-List-Received-Date: Sat, 02 Jul 2011 20:14:40 -0000 Author: gabor Date: Sat Jul 2 20:14:40 2011 New Revision: 223726 URL: http://svn.freebsd.org/changeset/base/223726 Log: - Fix some bugs - Refactor to support single-byte, multi-byte and wide character strings; at the moment still not complete - Be more consistent to TRE coding style Modified: user/gabor/tre-integration/contrib/tre/lib/fastmatch.c user/gabor/tre-integration/contrib/tre/lib/fastmatch.h user/gabor/tre-integration/contrib/tre/lib/regcomp.c user/gabor/tre-integration/contrib/tre/lib/regexec.c user/gabor/tre-integration/contrib/tre/lib/tre-compile.c Modified: user/gabor/tre-integration/contrib/tre/lib/fastmatch.c ============================================================================== --- user/gabor/tre-integration/contrib/tre/lib/fastmatch.c Sat Jul 2 18:43:35 2011 (r223725) +++ user/gabor/tre-integration/contrib/tre/lib/fastmatch.c Sat Jul 2 20:14:40 2011 (r223726) @@ -38,9 +38,10 @@ #include "fastmatch.h" #include "hashtable.h" #include "tre.h" +#include "tre-internal.h" #include "xmalloc.h" -static int fastcmp(const tre_char_t *, const tre_char_t *, size_t); +static int fastcmp(const tre_char_t *, const void *, size_t, tre_str_type_t); static void revstr(tre_char_t *, int); #ifdef TRE_WCHAR @@ -49,6 +50,30 @@ static void revstr(tre_char_t *, int); #define TRE_CHAR(n) n #endif +#define SKIP_CHARS(n) \ + do { \ + switch (type) \ + { \ + case STR_BYTE: \ + startptr = str_byte + n; \ + break; \ + case STR_MBS: \ + for (skip = j = 0; j < n; j++) \ + { \ + siz = mbrlen(str_byte, MB_CUR_MAX, NULL); \ + skip += siz; \ + } \ + startptr = str_byte + skip; \ + break; \ + case STR_WIDE: \ + startptr = str_wide + n; \ + break; \ + default: \ + /* XXX */ \ + break; \ + } \ + } while (0); \ + /* * Returns: -1 on failure, 0 on success */ @@ -57,14 +82,16 @@ tre_fastcomp_literal(fastmatch_t *fg, co { /* Initialize. */ - fg->len = n; + fg->len = (n == 0) ? tre_strlen(pat) : n; fg->bol = false; fg->eol = false; fg->reversed = false; fg->cflags = cflags; - fg->pattern = xmalloc((n + 1) * sizeof(tre_char_t)); - memcpy(&fg->pattern, pat, n * sizeof(tre_char_t)); - fg->pattern[n] = TRE_CHAR('\0'); + fg->pattern = xmalloc((fg->len + 1) * sizeof(tre_char_t)); + if (fg->pattern == NULL) + return -1; + memcpy(fg->pattern, pat, fg->len * sizeof(tre_char_t)); + fg->pattern[fg->len] = TRE_CHAR('\0'); /* Preprocess pattern. */ #ifdef TRE_WCHAR @@ -84,7 +111,7 @@ tre_fastcomp_literal(fastmatch_t *fg, co fg->qsBc[fg->pattern[i]] = fg->len - i; #endif - return 0; + return REG_OK; } /* @@ -99,7 +126,7 @@ tre_fastcomp(fastmatch_t *fg, const tre_ int lastHalfDot = 0; /* Initialize. */ - fg->len = n; + fg->len = (n == 0) ? tre_strlen(pat) : n; fg->bol = false; fg->eol = false; fg->reversed = false; @@ -107,7 +134,7 @@ tre_fastcomp(fastmatch_t *fg, const tre_ fg->cflags = cflags; /* Remove end-of-line character ('$'). */ - if (fg->len > 0 && pat[fg->len - 1] == TRE_CHAR('$')) + if ((fg->len > 0) && (pat[fg->len - 1] == TRE_CHAR('$'))) { fg->eol = true; fg->len--; @@ -121,9 +148,9 @@ tre_fastcomp(fastmatch_t *fg, const tre_ pat++; } - if (fg->len >= 14 && - memcmp(pat, TRE_CHAR("[[:<:]]"), 7 * sizeof(tre_char_t)) == 0 && - memcmp(pat + fg->len - 7, TRE_CHAR("[[:>:]]"), 7 * sizeof(tre_char_t)) == 0) + if ((fg->len >= 14) && + (memcmp(pat, TRE_CHAR("[[:<:]]"), 7 * sizeof(tre_char_t)) == 0) && + (memcmp(pat + fg->len - 7, TRE_CHAR("[[:>:]]"), 7 * sizeof(tre_char_t)) == 0)) { fg->len -= 14; pat += 7; @@ -167,7 +194,7 @@ tre_fastcomp(fastmatch_t *fg, const tre_ /* Free memory and let others know this is empty. */ free(fg->pattern); fg->pattern = NULL; - return (-1); + return -1; } } @@ -230,20 +257,43 @@ tre_fastcomp(fastmatch_t *fg, const tre_ if (fg->reversed) revstr(fg->pattern, fg->len); - return (0); + return REG_OK; } int -tre_fastexec(const fastmatch_t *fg, const tre_char_t *data, size_t len, - int nmatch, regmatch_t pmatch[]) +tre_fastexec(const fastmatch_t *fg, const void *data, size_t len, + tre_str_type_t type, int nmatch, regmatch_t pmatch[]) { unsigned int j; + size_t siz, skip; int cnt = 0; int ret = REG_NOMATCH; + const char *str_byte = data; + const void *startptr; +#ifdef TRE_WCHAR + const wchar_t *str_wide = data; +#endif + + if (len == (unsigned)-1) + { + switch (type) + { + case STR_BYTE: + case STR_MBS: + len = strlen(str_byte); + break; + case STR_WIDE: + len = wcslen(str_wide); + break; + default: + /* XXX */ + break; + } + } /* No point in going farther if we do not have enough data. */ if (len < fg->len) - return (ret); + return ret; /* Only try once at the beginning or ending of the line. */ if (fg->bol || fg->eol) { @@ -251,28 +301,29 @@ tre_fastexec(const fastmatch_t *fg, cons if (!((fg->bol && fg->eol) && (len != fg->len))) { /* Determine where in data to start search at. */ j = fg->eol ? len - fg->len : 0; - if (fastcmp(fg->pattern, data + j, - fg->len) == -1) { + SKIP_CHARS(j); + if (fastcmp(fg->pattern, startptr, fg->len, type) == -1) { if (!(fg->cflags & REG_NOSUB) || (nmatch < 1)) - return 0; + return REG_OK; pmatch[cnt].rm_so = j; pmatch[cnt].rm_eo = j + fg->len; - ret = 0; + return REG_OK; } } } else if (fg->reversed) { /* Quick Search algorithm. */ j = len; do { - if (fastcmp(fg->pattern, data + j - fg->len, - fg->len) == -1) { + SKIP_CHARS(j - fg->len); + if (fastcmp(fg->pattern, startptr, fg->len, type) == -1) { if (!(fg->cflags & REG_NOSUB) || (nmatch < 1)) - return (0); + return REG_OK; pmatch[cnt++].rm_so = j - fg->len; pmatch[cnt++].rm_eo = j; nmatch--; + ret = REG_OK; if (nmatch < 1) - return (0); + return ret; else { j -= 2 * fg->len; continue; @@ -297,14 +348,16 @@ tre_fastexec(const fastmatch_t *fg, cons /* Quick Search algorithm. */ j = 0; do { - if (fastcmp(fg->pattern, data + j, fg->len) == -1) { + SKIP_CHARS(j); + if (fastcmp(fg->pattern, startptr, fg->len, type) == -1) { if (!(fg->cflags & REG_NOSUB) || (nmatch < 1)) - return (0); + return REG_OK; pmatch[cnt++].rm_so = j; pmatch[cnt++].rm_eo = j + fg->len; nmatch--; + ret = REG_OK; if (nmatch < 1) - return (0); + return ret; else { j += fg->len; continue; @@ -327,7 +380,7 @@ tre_fastexec(const fastmatch_t *fg, cons #endif } while (j <= (len - fg->len)); } - return (ret); + return ret; } void @@ -345,15 +398,45 @@ tre_fastfree(fastmatch_t *fg) * -1 on success */ static inline int -fastcmp(const tre_char_t *pat, const tre_char_t *data, size_t len) +fastcmp(const tre_char_t *pat, const void *data, size_t len, + tre_str_type_t type) { + const char *str_byte = data; +#ifdef TRE_WCHAR + const wchar_t *str_wide = data; + wint_t wc; + size_t s; +#endif for (unsigned int i = 0; i < len; i++) { - if ((pat[i] == data[i]) || (pat[i] == TRE_CHAR('.'))) + if (pat[i] == TRE_CHAR('.')) continue; - return (i); + switch (type) + { + case STR_BYTE: + if (pat[i] == btowc(str_byte[i])) + continue; + break; + case STR_MBS: + s = mbrtowc(&wc, str_byte, MB_CUR_MAX, NULL); + if (s == (size_t)-1) + return i; + else + str_byte += s; + if (pat[i] == wc) + continue; + break; + case STR_WIDE: + if (pat[i] == str_wide[i]) + continue; + break; + default: + /* XXX */ + break; + } + return i; } - return (-1); + return -1; } static inline void Modified: user/gabor/tre-integration/contrib/tre/lib/fastmatch.h ============================================================================== --- user/gabor/tre-integration/contrib/tre/lib/fastmatch.h Sat Jul 2 18:43:35 2011 (r223725) +++ user/gabor/tre-integration/contrib/tre/lib/fastmatch.h Sat Jul 2 20:14:40 2011 (r223726) @@ -32,6 +32,7 @@ #include "hashtable.h" #include "tre.h" +#include "tre-internal.h" typedef struct { size_t len; @@ -54,8 +55,8 @@ int tre_fastcomp_literal(fastmatch_t *pr size_t, int cflags); int tre_fastcomp(fastmatch_t *preg, const tre_char_t *regex, size_t, int cflags); -int tre_fastexec(const fastmatch_t *fg, const tre_char_t *data, - size_t len, int nmatch, regmatch_t pmatch[]); +int tre_fastexec(const fastmatch_t *fg, const void *data, size_t len, + tre_str_type_t type, int nmatch, regmatch_t pmatch[]); void tre_fastfree(fastmatch_t *preg); #endif /* FASTMATCH_H */ Modified: user/gabor/tre-integration/contrib/tre/lib/regcomp.c ============================================================================== --- user/gabor/tre-integration/contrib/tre/lib/regcomp.c Sat Jul 2 18:43:35 2011 (r223725) +++ user/gabor/tre-integration/contrib/tre/lib/regcomp.c Sat Jul 2 20:14:40 2011 (r223726) @@ -110,14 +110,13 @@ tre_regncomp(regex_t *preg, const char * int tre_regcomp(regex_t *preg, const char *regex, int cflags) { - size_t len; + size_t len; - if (cflags & REG_PEND) - { - if (preg->re_endp >= regex) - len = preg->re_endp - regex; - else - len = 0; + if (cflags & REG_PEND) + { + len = (preg->re_endp >= regex) + ? preg->re_endp - regex + : 0; return tre_regncomp(preg, regex, len, cflags); } else Modified: user/gabor/tre-integration/contrib/tre/lib/regexec.c ============================================================================== --- user/gabor/tre-integration/contrib/tre/lib/regexec.c Sat Jul 2 18:43:35 2011 (r223725) +++ user/gabor/tre-integration/contrib/tre/lib/regexec.c Sat Jul 2 20:14:40 2011 (r223726) @@ -151,15 +151,14 @@ tre_have_approx(const regex_t *preg) static int tre_match(const tre_tnfa_t *tnfa, const void *string, size_t len, tre_str_type_t type, size_t nmatch, regmatch_t pmatch[], - int eflags, void *shortcut) + int eflags, fastmatch_t *shortcut) { reg_errcode_t status; int *tags = NULL, eo; /* Check if we can cheat with a fixed string */ if (shortcut != NULL) - return tre_fastexec((fastmatch_t *)shortcut, (const tre_char_t *)string, - len, nmatch, pmatch); + return tre_fastexec(shortcut, string, len, nmatch, pmatch); if (tnfa->num_tags > 0 && nmatch > 0) { Modified: user/gabor/tre-integration/contrib/tre/lib/tre-compile.c ============================================================================== --- user/gabor/tre-integration/contrib/tre/lib/tre-compile.c Sat Jul 2 18:43:35 2011 (r223725) +++ user/gabor/tre-integration/contrib/tre/lib/tre-compile.c Sat Jul 2 20:14:40 2011 (r223726) @@ -1866,23 +1866,29 @@ tre_compile(regex_t *preg, const tre_cha tre_tag_direction_t *tag_directions = NULL; reg_errcode_t errcode; tre_mem_t mem; - fastmatch_t shortcut; + fastmatch_t *shortcut; /* Parse context. */ tre_parse_ctx_t parse_ctx; /* Check if we can cheat with a fixed string algorithm. */ + shortcut = xmalloc(sizeof(fastmatch_t)); + if (!shortcut) + return REG_ESPACE; ret = (cflags & REG_LITERAL) - ? tre_fastcomp_literal(&shortcut, regex, n, cflags) - : tre_fastcomp(&shortcut, regex, n, cflags); + ? tre_fastcomp_literal(shortcut, regex, n, cflags) + : tre_fastcomp(shortcut, regex, n, cflags); if (!ret) { - preg->shortcut = &shortcut; + preg->shortcut = shortcut; preg->re_nsub = 0; return REG_OK; } else - preg->shortcut = NULL; + { + free(shortcut); + preg->shortcut = NULL; + } /* Allocate a stack used throughout the compilation process for various purposes. */