Date: Mon, 22 Aug 2011 23:20:13 +0000 (UTC) From: Gabor Kovesdan <gabor@FreeBSD.org> To: src-committers@freebsd.org, svn-src-user@freebsd.org Subject: svn commit: r225094 - user/gabor/tre-integration/contrib/tre/lib Message-ID: <201108222320.p7MNKDNg072564@svn.freebsd.org>
next in thread | raw e-mail | index | archive | help
Author: gabor Date: Mon Aug 22 23:20:13 2011 New Revision: 225094 URL: http://svn.freebsd.org/changeset/base/225094 Log: - With the previous change we can allow more accurate heuristics Modified: user/gabor/tre-integration/contrib/tre/lib/tre-heuristic.c Modified: user/gabor/tre-integration/contrib/tre/lib/tre-heuristic.c ============================================================================== --- user/gabor/tre-integration/contrib/tre/lib/tre-heuristic.c Mon Aug 22 23:19:20 2011 (r225093) +++ user/gabor/tre-integration/contrib/tre/lib/tre-heuristic.c Mon Aug 22 23:20:13 2011 (r225094) @@ -135,57 +135,63 @@ tre_compile_heur(heur_t *h, const tre_ch /* * If a repetition marker, erases the repeting character - * and terminates the segment. - * Otherwise just terminates the segment (XXX). + * and terminates the segment, otherwise treated as a normal + * character. */ case TRE_CHAR('{'): PARSE_UNIT('{', '}'); if (escaped) - pos--; - END_SEGMENT; + { + pos--; + END_SEGMENT; + } + heur[pos++] = regex[i]; break; /* - * Terminates the current segment whether a subexpression - * marker or not. (XXX) + * Terminates the current segment when escaped, + * otherwise treated as a normal character. */ case TRE_CHAR('('): PARSE_UNIT('(', ')'); - END_SEGMENT; + if (escaped) + END_SEGMENT; + heur[pos++] = regex[i]; break; /* * Sets escaped flag. - * Escaped escape terminates current segment. (XXX) + * Escaped escape is treated as a normal character. + * (This is also the GNU behaviour.) */ case TRE_CHAR('\\'): if (escaped) - END_SEGMENT; + heur[pos++] = regex[i]; escaped = !escaped; continue; /* - * If not the first character, erases the last character - * and terminates the segment. - * Otherwise heuristic construction fails. (XXX) + * If not the first character and not escaped, erases the + * last character and terminates the segment. + * Otherwise treated as a normal character. */ case TRE_CHAR('*'): - if (i != 0) + if ((i != 0) && !escaped) { pos--; END_SEGMENT; } else - goto badpat1; + heur[pos++] = regex[i]; break; /* - * If a backreference (escaped digit), terminates segment. + * If escaped, terminates segment. * Otherwise adds current character to the current segment * by copying it to the temporary space. */ default: - if (escaped && tre_isdigit(regex[i])) + if (escaped) END_SEGMENT; heur[pos++] = regex[i]; continue; @@ -218,7 +224,7 @@ end_segment: goto space1; } - ret = tre_compile_fast(h->start, heur, pos, 0); + ret = tre_compile_fast(h->start, heur, pos, _REG_HEUR); if (ret != REG_OK) { errcode = REG_BADPAT; @@ -248,7 +254,7 @@ end_segment: goto space2; } - ret = tre_compile_fast(h->end, heur, pos, 0); + ret = tre_compile_fast(h->end, heur, pos, _REG_HEUR); if (ret != REG_OK) { xfree(h->end);
Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?201108222320.p7MNKDNg072564>