From owner-svn-src-user@FreeBSD.ORG Tue Sep 13 23:28:04 2011 Return-Path: Delivered-To: svn-src-user@freebsd.org Received: from mx1.freebsd.org (mx1.freebsd.org [IPv6:2001:4f8:fff6::34]) by hub.freebsd.org (Postfix) with ESMTP id A35C6106564A; Tue, 13 Sep 2011 23:28:04 +0000 (UTC) (envelope-from gabor@FreeBSD.org) Received: from svn.freebsd.org (svn.freebsd.org [IPv6:2001:4f8:fff6::2c]) by mx1.freebsd.org (Postfix) with ESMTP id 93F4E8FC0A; Tue, 13 Sep 2011 23:28:04 +0000 (UTC) Received: from svn.freebsd.org (localhost [127.0.0.1]) by svn.freebsd.org (8.14.4/8.14.4) with ESMTP id p8DNS40C001062; Tue, 13 Sep 2011 23:28:04 GMT (envelope-from gabor@svn.freebsd.org) Received: (from gabor@localhost) by svn.freebsd.org (8.14.4/8.14.4/Submit) id p8DNS4Uu001058; Tue, 13 Sep 2011 23:28:04 GMT (envelope-from gabor@svn.freebsd.org) Message-Id: <201109132328.p8DNS4Uu001058@svn.freebsd.org> From: Gabor Kovesdan Date: Tue, 13 Sep 2011 23:28:04 +0000 (UTC) To: src-committers@freebsd.org, svn-src-user@freebsd.org X-SVN-Group: user MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Cc: Subject: svn commit: r225541 - user/gabor/grep/trunk/regex X-BeenThere: svn-src-user@freebsd.org X-Mailman-Version: 2.1.5 Precedence: list List-Id: "SVN commit messages for the experimental " user" src tree" List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , X-List-Received-Date: Tue, 13 Sep 2011 23:28:04 -0000 Author: gabor Date: Tue Sep 13 23:28:04 2011 New Revision: 225541 URL: http://svn.freebsd.org/changeset/base/225541 Log: - WCS dot position != MBS dot position Modified: user/gabor/grep/trunk/regex/fastmatch.h user/gabor/grep/trunk/regex/tre-fastmatch.c Modified: user/gabor/grep/trunk/regex/fastmatch.h ============================================================================== --- user/gabor/grep/trunk/regex/fastmatch.h Tue Sep 13 23:23:08 2011 (r225540) +++ user/gabor/grep/trunk/regex/fastmatch.h Tue Sep 13 23:28:04 2011 (r225541) @@ -13,7 +13,6 @@ typedef struct { size_t len; wchar_t *wpattern; bool *wescmap; - unsigned int hasdot; unsigned int qsBc[UCHAR_MAX + 1]; unsigned int *bmGs; char *pattern; @@ -24,6 +23,7 @@ typedef struct { const char *re_endp; /* flags */ + bool hasdot; bool bol; bool eol; bool word; Modified: user/gabor/grep/trunk/regex/tre-fastmatch.c ============================================================================== --- user/gabor/grep/trunk/regex/tre-fastmatch.c Tue Sep 13 23:23:08 2011 (r225540) +++ user/gabor/grep/trunk/regex/tre-fastmatch.c Tue Sep 13 23:28:04 2011 (r225541) @@ -224,8 +224,8 @@ static int fastcmp(const void *, const b #define _FILL_QSBC \ for (unsigned int i = 0; i <= UCHAR_MAX; i++) \ - fg->qsBc[i] = fg->len - fg->hasdot; \ - for (unsigned int i = fg->hasdot + 1; i < fg->len; i++) \ + fg->qsBc[i] = fg->len - hasdot; \ + for (unsigned int i = hasdot + 1; i < fg->len; i++) \ { \ fg->qsBc[(unsigned char)fg->pattern[i]] = fg->len - i; \ DPRINT(("BC shift for char %c is %zu\n", fg->pattern[i], \ @@ -279,14 +279,14 @@ static int fastcmp(const void *, const b #define _FILL_QSBC_WIDE \ /* Adjust the shift based on location of the last dot ('.'). */ \ - fg->defBc = fg->wlen - fg->hasdot; \ + fg->defBc = fg->wlen - whasdot; \ \ /* Preprocess pattern. */ \ fg->qsBc_table = hashtable_init(fg->wlen * (fg->icase ? 8 : 4), \ sizeof(tre_char_t), sizeof(int)); \ if (!fg->qsBc_table) \ FAIL_COMP(REG_ESPACE); \ - for (unsigned int i = fg->hasdot + 1; i < fg->wlen; i++) \ + for (unsigned int i = whasdot + 1; i < fg->wlen; i++) \ { \ int k = fg->wlen - i; \ int r; \ @@ -309,14 +309,14 @@ static int fastcmp(const void *, const b #define _FILL_QSBC_WIDE_REVERSED \ /* Adjust the shift based on location of the last dot ('.'). */ \ - fg->defBc = (size_t)firstdot; \ + fg->defBc = (size_t)wfirstdot; \ \ /* Preprocess pattern. */ \ fg->qsBc_table = hashtable_init(fg->wlen * (fg->icase ? 8 : 4), \ sizeof(tre_char_t), sizeof(int)); \ if (!fg->qsBc_table) \ FAIL_COMP(REG_ESPACE); \ - for (int i = firstdot - 1; i >= 0; i--) \ + for (int i = wfirstdot - 1; i >= 0; i--) \ { \ int k = i + 1; \ int r; \ @@ -518,7 +518,8 @@ int tre_compile_literal(fastmatch_t *fg, const tre_char_t *pat, size_t n, int cflags) { - ssize_t firstdot = -1; + size_t hasdot = 0, whasdot = 0; + ssize_t firstdot = -1, wfirstdot = -1; INIT_COMP; @@ -557,8 +558,8 @@ tre_compile_fast(fastmatch_t *fg, const int cflags) { tre_char_t *tmp; - size_t pos = 0; - ssize_t firstdot = -1; + size_t pos = 0, hasdot = 0, whasdot = 0;; + ssize_t firstdot = -1, wfirstdot = -1; bool escaped = false; bool *_escmap = NULL; @@ -647,9 +648,9 @@ tre_compile_fast(fastmatch_t *fg, const } else { - fg->hasdot = i; - if (firstdot == -1) - firstdot = i; + whasdot = i; + if (wfirstdot == -1) + wfirstdot = i; STORE_CHAR; } continue; @@ -699,6 +700,8 @@ badpat: return REG_BADPAT; } + fg->hasdot = whasdot; + /* * The pattern has been processed and copied to tmp as a literal string * with escapes, anchors (^$) and the word boundary match character @@ -708,25 +711,33 @@ badpat: SAVE_PATTERN(tmp, pos, fg->wpattern, fg->wlen); fg->wescmap = _escmap; STORE_MBS_PAT; - if (fg->wescmap != NULL) + if (fg->hasdot || (fg->wescmap != NULL)) { - escaped = false; - - fg->escmap = xmalloc(fg->len * sizeof(bool)); - if (!fg->escmap) + if (fg->wescmap != NULL) { - tre_free_fast(fg); - return REG_ESPACE; + fg->escmap = xmalloc(fg->len * sizeof(bool)); + if (!fg->escmap) + { + tre_free_fast(fg); + return REG_ESPACE; + } } + escaped = false; for (unsigned int i = 0; i < fg->len; i++) if (fg->pattern[i] == '\\') - escaped = ! escaped; + escaped = !escaped; else if (fg->pattern[i] == '.' && escaped) { fg->escmap[i] = true; escaped = false; } + else if (fg->pattern[i] == '.' && !escaped) + { + hasdot = i; + if (firstdot == -1) + firstdot = i; + } else escaped = false; } @@ -743,7 +754,7 @@ badpat: fg->icase ? 'y' : 'n', fg->word ? 'y' : 'n', fg->newline ? 'y' : 'n')); - if ((firstdot > -1) && (fg->len - fg->hasdot + 1 < (size_t)firstdot) && + if ((wfirstdot > -1) && (fg->wlen - whasdot + 1 < (size_t)wfirstdot) && fg->nosub) { fg->reversed = true;