Date: Thu, 25 Aug 2011 18:03:29 +0000 (UTC) From: Gabor Kovesdan <gabor@FreeBSD.org> To: src-committers@freebsd.org, svn-src-user@freebsd.org Subject: svn commit: r225183 - user/gabor/tre-integration/contrib/tre/lib Message-ID: <201108251803.p7PI3TO3013837@svn.freebsd.org>
next in thread | raw e-mail | index | archive | help
Author: gabor Date: Thu Aug 25 18:03:29 2011 New Revision: 225183 URL: http://svn.freebsd.org/changeset/base/225183 Log: - Add support for REG_EXTENDED - Some accuracy fixes for REG_BASIC Modified: user/gabor/tre-integration/contrib/tre/lib/tre-heuristic.c Modified: user/gabor/tre-integration/contrib/tre/lib/tre-heuristic.c ============================================================================== --- user/gabor/tre-integration/contrib/tre/lib/tre-heuristic.c Thu Aug 25 17:43:06 2011 (r225182) +++ user/gabor/tre-integration/contrib/tre/lib/tre-heuristic.c Thu Aug 25 18:03:29 2011 (r225183) @@ -125,7 +125,15 @@ st = i + 1; \ escaped = false; \ goto end_segment; \ - } while (0); + } while (0) + +#define STORE_CHAR \ + do \ + { \ + escaped = false; \ + heur[pos++] = regex[i]; \ + } while (0) + /* * Parses a regular expression and constructs a heuristic in heur_t and @@ -140,10 +148,6 @@ tre_compile_heur(heur_t *h, const tre_ch bool escaped = false; int errcode, ret; - /* XXX: only basic regexes are supported. */ - if (cflags & REG_EXTENDED) - return REG_BADPAT; - /* Temporary space, len will be enough. */ heur = xmalloc(len); if (!heur) @@ -168,10 +172,17 @@ tre_compile_heur(heur_t *h, const tre_ch switch (regex[i]) { - /* Bracketed expression is substituted with a dot. */ + /* + * Bracketed expression is substituted with a dot or the + * brackets are treated as normal if at least the opening + * bracket is escaped. + */ case TRE_CHAR('['): PARSE_BRACKETS; - heur[pos++] = TRE_CHAR('.'); + if (escaped) + STORE_CHAR; + else + heur[pos++] = TRE_CHAR('.'); continue; /* @@ -180,14 +191,22 @@ tre_compile_heur(heur_t *h, const tre_ch * character. */ case TRE_CHAR('{'): + if (escaped && (i == 1)) + STORE_CHAR; + else if ((i == 0) && !(cflags & REG_EXTENDED)) + STORE_CHAR; + else if ((i == 0) && (cflags & REG_EXTENDED)) + continue; + PARSE_UNIT('{', '}'); - if (escaped) + if (escaped ^ (cflags & REG_EXTENDED)) { pos--; END_SEGMENT; } - heur[pos++] = regex[i]; - break; + else + STORE_CHAR; + continue; /* * Terminates the current segment when escaped, @@ -195,10 +214,11 @@ tre_compile_heur(heur_t *h, const tre_ch */ case TRE_CHAR('('): PARSE_UNIT('(', ')'); - if (escaped) + if (escaped ^ (cflags & REG_EXTENDED)) END_SEGMENT; - heur[pos++] = regex[i]; - break; + else + STORE_CHAR; + continue; /* * Sets escaped flag. @@ -207,24 +227,88 @@ tre_compile_heur(heur_t *h, const tre_ch */ case TRE_CHAR('\\'): if (escaped) - heur[pos++] = regex[i]; - escaped = !escaped; + STORE_CHAR; + else + escaped = !escaped; continue; /* - * If not the first character and not escaped, erases the + * BRE: If not the first character and not escaped, erases the * last character and terminates the segment. * Otherwise treated as a normal character. + * ERE: Skipped if first character (GNU), rest is like in BRE. */ case TRE_CHAR('*'): - if ((i != 0) && !escaped) + if (escaped || (!(cflags & REG_EXTENDED) && (i == 0))) + STORE_CHAR; + else if ((i != 0)) + { + pos--; + END_SEGMENT; + } + continue; + + /* + * In BRE, it is a normal character, behavior is undefined + * when escaped. + * In ERE, it is special unless escaped. Terminate segment + * when not escaped. Last character is not removed because it + * must occur at least once. It is skipped when first + * character (GNU). + */ + case TRE_CHAR('+'): + if ((cflags & REG_EXTENDED) && (i == 0)) + continue; + else if ((cflags & REG_EXTENDED) ^ escaped) + END_SEGMENT; + else + STORE_CHAR; + continue; + + /* + * In BRE, it is a normal character, behavior is undefined + * when escaped. + * In ERE, it is special unless escaped. Terminate segment + * when not escaped. Last character is removed. Skipped when + * first character (GNU). + */ + case TRE_CHAR('?'): + if ((cflags & REG_EXTENDED) && (i == 0)) + continue; + if ((cflags & REG_EXTENDED) ^ escaped) { pos--; END_SEGMENT; } else - heur[pos++] = regex[i]; - break; + STORE_CHAR; + continue; + + /* + * Fail if it is an ERE alternation marker. + */ + case TRE_CHAR('|'): + if ((cflags & REG_EXTENDED) && !escaped) + { + errcode = REG_BADPAT; + goto badpat2; + } + else if (!(cflags & REG_EXTENDED) && escaped) + END_SEGMENT; + else + STORE_CHAR; + continue; + + /* + * Cut the segment at an escaped dot because the fast matcher + * cannot handle it. + */ + case TRE_CHAR('.'): + if (escaped) + END_SEGMENT; + else + STORE_CHAR; + continue; /* * If escaped, terminates segment. @@ -234,7 +318,8 @@ tre_compile_heur(heur_t *h, const tre_ch default: if (escaped) END_SEGMENT; - heur[pos++] = regex[i]; + else + STORE_CHAR; continue; } }
Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?201108251803.p7PI3TO3013837>