From owner-svn-src-user@FreeBSD.ORG Thu Aug 25 18:03:29 2011 Return-Path: Delivered-To: svn-src-user@freebsd.org Received: from mx1.freebsd.org (mx1.freebsd.org [IPv6:2001:4f8:fff6::34]) by hub.freebsd.org (Postfix) with ESMTP id CD1EC106564A; Thu, 25 Aug 2011 18:03:29 +0000 (UTC) (envelope-from gabor@FreeBSD.org) Received: from svn.freebsd.org (svn.freebsd.org [IPv6:2001:4f8:fff6::2c]) by mx1.freebsd.org (Postfix) with ESMTP id BC94B8FC0C; Thu, 25 Aug 2011 18:03:29 +0000 (UTC) Received: from svn.freebsd.org (localhost [127.0.0.1]) by svn.freebsd.org (8.14.4/8.14.4) with ESMTP id p7PI3TYN013839; Thu, 25 Aug 2011 18:03:29 GMT (envelope-from gabor@svn.freebsd.org) Received: (from gabor@localhost) by svn.freebsd.org (8.14.4/8.14.4/Submit) id p7PI3TO3013837; Thu, 25 Aug 2011 18:03:29 GMT (envelope-from gabor@svn.freebsd.org) Message-Id: <201108251803.p7PI3TO3013837@svn.freebsd.org> From: Gabor Kovesdan Date: Thu, 25 Aug 2011 18:03:29 +0000 (UTC) To: src-committers@freebsd.org, svn-src-user@freebsd.org X-SVN-Group: user MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Cc: Subject: svn commit: r225183 - user/gabor/tre-integration/contrib/tre/lib X-BeenThere: svn-src-user@freebsd.org X-Mailman-Version: 2.1.5 Precedence: list List-Id: "SVN commit messages for the experimental " user" src tree" List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , X-List-Received-Date: Thu, 25 Aug 2011 18:03:29 -0000 Author: gabor Date: Thu Aug 25 18:03:29 2011 New Revision: 225183 URL: http://svn.freebsd.org/changeset/base/225183 Log: - Add support for REG_EXTENDED - Some accuracy fixes for REG_BASIC Modified: user/gabor/tre-integration/contrib/tre/lib/tre-heuristic.c Modified: user/gabor/tre-integration/contrib/tre/lib/tre-heuristic.c ============================================================================== --- user/gabor/tre-integration/contrib/tre/lib/tre-heuristic.c Thu Aug 25 17:43:06 2011 (r225182) +++ user/gabor/tre-integration/contrib/tre/lib/tre-heuristic.c Thu Aug 25 18:03:29 2011 (r225183) @@ -125,7 +125,15 @@ st = i + 1; \ escaped = false; \ goto end_segment; \ - } while (0); + } while (0) + +#define STORE_CHAR \ + do \ + { \ + escaped = false; \ + heur[pos++] = regex[i]; \ + } while (0) + /* * Parses a regular expression and constructs a heuristic in heur_t and @@ -140,10 +148,6 @@ tre_compile_heur(heur_t *h, const tre_ch bool escaped = false; int errcode, ret; - /* XXX: only basic regexes are supported. */ - if (cflags & REG_EXTENDED) - return REG_BADPAT; - /* Temporary space, len will be enough. */ heur = xmalloc(len); if (!heur) @@ -168,10 +172,17 @@ tre_compile_heur(heur_t *h, const tre_ch switch (regex[i]) { - /* Bracketed expression is substituted with a dot. */ + /* + * Bracketed expression is substituted with a dot or the + * brackets are treated as normal if at least the opening + * bracket is escaped. + */ case TRE_CHAR('['): PARSE_BRACKETS; - heur[pos++] = TRE_CHAR('.'); + if (escaped) + STORE_CHAR; + else + heur[pos++] = TRE_CHAR('.'); continue; /* @@ -180,14 +191,22 @@ tre_compile_heur(heur_t *h, const tre_ch * character. */ case TRE_CHAR('{'): + if (escaped && (i == 1)) + STORE_CHAR; + else if ((i == 0) && !(cflags & REG_EXTENDED)) + STORE_CHAR; + else if ((i == 0) && (cflags & REG_EXTENDED)) + continue; + PARSE_UNIT('{', '}'); - if (escaped) + if (escaped ^ (cflags & REG_EXTENDED)) { pos--; END_SEGMENT; } - heur[pos++] = regex[i]; - break; + else + STORE_CHAR; + continue; /* * Terminates the current segment when escaped, @@ -195,10 +214,11 @@ tre_compile_heur(heur_t *h, const tre_ch */ case TRE_CHAR('('): PARSE_UNIT('(', ')'); - if (escaped) + if (escaped ^ (cflags & REG_EXTENDED)) END_SEGMENT; - heur[pos++] = regex[i]; - break; + else + STORE_CHAR; + continue; /* * Sets escaped flag. @@ -207,24 +227,88 @@ tre_compile_heur(heur_t *h, const tre_ch */ case TRE_CHAR('\\'): if (escaped) - heur[pos++] = regex[i]; - escaped = !escaped; + STORE_CHAR; + else + escaped = !escaped; continue; /* - * If not the first character and not escaped, erases the + * BRE: If not the first character and not escaped, erases the * last character and terminates the segment. * Otherwise treated as a normal character. + * ERE: Skipped if first character (GNU), rest is like in BRE. */ case TRE_CHAR('*'): - if ((i != 0) && !escaped) + if (escaped || (!(cflags & REG_EXTENDED) && (i == 0))) + STORE_CHAR; + else if ((i != 0)) + { + pos--; + END_SEGMENT; + } + continue; + + /* + * In BRE, it is a normal character, behavior is undefined + * when escaped. + * In ERE, it is special unless escaped. Terminate segment + * when not escaped. Last character is not removed because it + * must occur at least once. It is skipped when first + * character (GNU). + */ + case TRE_CHAR('+'): + if ((cflags & REG_EXTENDED) && (i == 0)) + continue; + else if ((cflags & REG_EXTENDED) ^ escaped) + END_SEGMENT; + else + STORE_CHAR; + continue; + + /* + * In BRE, it is a normal character, behavior is undefined + * when escaped. + * In ERE, it is special unless escaped. Terminate segment + * when not escaped. Last character is removed. Skipped when + * first character (GNU). + */ + case TRE_CHAR('?'): + if ((cflags & REG_EXTENDED) && (i == 0)) + continue; + if ((cflags & REG_EXTENDED) ^ escaped) { pos--; END_SEGMENT; } else - heur[pos++] = regex[i]; - break; + STORE_CHAR; + continue; + + /* + * Fail if it is an ERE alternation marker. + */ + case TRE_CHAR('|'): + if ((cflags & REG_EXTENDED) && !escaped) + { + errcode = REG_BADPAT; + goto badpat2; + } + else if (!(cflags & REG_EXTENDED) && escaped) + END_SEGMENT; + else + STORE_CHAR; + continue; + + /* + * Cut the segment at an escaped dot because the fast matcher + * cannot handle it. + */ + case TRE_CHAR('.'): + if (escaped) + END_SEGMENT; + else + STORE_CHAR; + continue; /* * If escaped, terminates segment. @@ -234,7 +318,8 @@ tre_compile_heur(heur_t *h, const tre_ch default: if (escaped) END_SEGMENT; - heur[pos++] = regex[i]; + else + STORE_CHAR; continue; } }