From owner-svn-src-all@freebsd.org Sat Dec 5 03:13:48 2020 Return-Path: Delivered-To: svn-src-all@mailman.nyi.freebsd.org Received: from mx1.freebsd.org (mx1.freebsd.org [IPv6:2610:1c1:1:606c::19:1]) by mailman.nyi.freebsd.org (Postfix) with ESMTP id D1E1D4B32DA; Sat, 5 Dec 2020 03:13:48 +0000 (UTC) (envelope-from kevans@FreeBSD.org) Received: from mxrelay.nyi.freebsd.org (mxrelay.nyi.freebsd.org [IPv6:2610:1c1:1:606c::19:3]) (using TLSv1.3 with cipher TLS_AES_256_GCM_SHA384 (256/256 bits) key-exchange X25519 server-signature RSA-PSS (4096 bits) server-digest SHA256 client-signature RSA-PSS (4096 bits) client-digest SHA256) (Client CN "mxrelay.nyi.freebsd.org", Issuer "Let's Encrypt Authority X3" (verified OK)) by mx1.freebsd.org (Postfix) with ESMTPS id 4CnvmX5f32z4cqZ; Sat, 5 Dec 2020 03:13:48 +0000 (UTC) (envelope-from kevans@FreeBSD.org) Received: from repo.freebsd.org (repo.freebsd.org [IPv6:2610:1c1:1:6068::e6a:0]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (Client did not present a certificate) by mxrelay.nyi.freebsd.org (Postfix) with ESMTPS id B4C4716F8C; Sat, 5 Dec 2020 03:13:48 +0000 (UTC) (envelope-from kevans@FreeBSD.org) Received: from repo.freebsd.org ([127.0.1.37]) by repo.freebsd.org (8.15.2/8.15.2) with ESMTP id 0B53Dm1s094536; Sat, 5 Dec 2020 03:13:48 GMT (envelope-from kevans@FreeBSD.org) Received: (from kevans@localhost) by repo.freebsd.org (8.15.2/8.15.2/Submit) id 0B53DlOM094532; Sat, 5 Dec 2020 03:13:47 GMT (envelope-from kevans@FreeBSD.org) Message-Id: <202012050313.0B53DlOM094532@repo.freebsd.org> X-Authentication-Warning: repo.freebsd.org: kevans set sender to kevans@FreeBSD.org using -f From: Kyle Evans Date: Sat, 5 Dec 2020 03:13:47 +0000 (UTC) To: src-committers@freebsd.org, svn-src-all@freebsd.org, svn-src-head@freebsd.org Subject: svn commit: r368357 - in head/lib: libc/regex libregex/tests X-SVN-Group: head X-SVN-Commit-Author: kevans X-SVN-Commit-Paths: in head/lib: libc/regex libregex/tests X-SVN-Commit-Revision: 368357 X-SVN-Commit-Repository: base MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit X-BeenThere: svn-src-all@freebsd.org X-Mailman-Version: 2.1.34 Precedence: list List-Id: "SVN commit messages for the entire src tree \(except for " user" and " projects" \)" List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , X-List-Received-Date: Sat, 05 Dec 2020 03:13:48 -0000 Author: kevans Date: Sat Dec 5 03:13:47 2020 New Revision: 368357 URL: https://svnweb.freebsd.org/changeset/base/368357 Log: libregex: implement \` and \' (begin-of-subj, end-of-subj) These are GNU extensions, generally equivalent to ^ and $ except that the new syntax will not match beginning of line after the first in a multi-line expression or the end of line before absolute last in a multi-line expression. Modified: head/lib/libc/regex/engine.c head/lib/libc/regex/regcomp.c head/lib/libc/regex/regex2.h head/lib/libregex/tests/gnuext.in Modified: head/lib/libc/regex/engine.c ============================================================================== --- head/lib/libc/regex/engine.c Sat Dec 5 02:23:11 2020 (r368356) +++ head/lib/libc/regex/engine.c Sat Dec 5 03:13:47 2020 (r368357) @@ -109,7 +109,7 @@ static int matcher(struct re_guts *g, const char *stri static const char *dissect(struct match *m, const char *start, const char *stop, sopno startst, sopno stopst); static const char *backref(struct match *m, const char *start, const char *stop, sopno startst, sopno stopst, sopno lev, int); static const char *walk(struct match *m, const char *start, const char *stop, sopno startst, sopno stopst, bool fast); -static states step(struct re_guts *g, sopno start, sopno stop, states bef, wint_t ch, states aft); +static states step(struct re_guts *g, sopno start, sopno stop, states bef, wint_t ch, states aft, int sflags); #define MAX_RECURSION 100 #define BOL (OUT-1) #define EOL (BOL-1) @@ -119,6 +119,10 @@ static states step(struct re_guts *g, sopno start, sop #define EOW (BOL-5) #define BADCHAR (BOL-6) #define NONCHAR(c) ((c) <= OUT) +/* sflags */ +#define SBOS 0x0001 +#define SEOS 0x0002 + #ifdef REDEBUG static void print(struct match *m, const char *caption, states st, int ch, FILE *d); #endif @@ -457,6 +461,8 @@ dissect(struct match *m, case OEOL: case OBOW: case OEOW: + case OBOS: + case OEOS: break; case OANY: case OANYOF: @@ -657,6 +663,18 @@ backref(struct match *m, if (wc == BADCHAR || !CHIN(cs, wc)) return(NULL); break; + case OBOS: + if (sp == m->beginp && (m->eflags & REG_NOTBOL) == 0) + { /* yes */ } + else + return(NULL); + break; + case OEOS: + if (sp == m->endp && (m->eflags & REG_NOTEOL) == 0) + { /* yes */ } + else + return(NULL); + break; case OBOL: if ((sp == m->beginp && !(m->eflags®_NOTBOL)) || (sp > m->offp && sp < m->endp && @@ -819,15 +837,16 @@ walk(struct match *m, const char *start, const char *s wint_t c; wint_t lastc; /* previous c */ wint_t flagch; - int i; + int i, sflags; const char *matchp; /* last p at which a match ended */ size_t clen; + sflags = 0; AT("slow", start, stop, startst, stopst); CLEAR(st); SET1(st, startst); SP("sstart", st, *p); - st = step(m->g, startst, stopst, st, NOTHING, st); + st = step(m->g, startst, stopst, st, NOTHING, st, sflags); if (fast) ASSIGN(fresh, st); matchp = NULL; @@ -844,6 +863,7 @@ walk(struct match *m, const char *start, const char *s for (;;) { /* next character */ lastc = c; + sflags = 0; if (p == m->endp) { c = OUT; clen = 0; @@ -866,9 +886,20 @@ walk(struct match *m, const char *start, const char *s flagch = (flagch == BOL) ? BOLEOL : EOL; i += m->g->neol; } + if (lastc == OUT && (m->eflags & REG_NOTBOL) == 0) { + sflags |= SBOS; + /* Step one more for BOS. */ + i++; + } + if (c == OUT && (m->eflags & REG_NOTEOL) == 0) { + sflags |= SEOS; + /* Step one more for EOS. */ + i++; + } if (i != 0) { for (; i > 0; i--) - st = step(m->g, startst, stopst, st, flagch, st); + st = step(m->g, startst, stopst, st, flagch, st, + sflags); SP("sboleol", st, c); } @@ -882,7 +913,7 @@ walk(struct match *m, const char *start, const char *s flagch = EOW; } if (flagch == BOW || flagch == EOW) { - st = step(m->g, startst, stopst, st, flagch, st); + st = step(m->g, startst, stopst, st, flagch, st, sflags); SP("sboweow", st, c); } @@ -903,9 +934,10 @@ walk(struct match *m, const char *start, const char *s else ASSIGN(st, empty); assert(c != OUT); - st = step(m->g, startst, stopst, tmp, c, st); + st = step(m->g, startst, stopst, tmp, c, st, sflags); SP("saft", st, c); - assert(EQ(step(m->g, startst, stopst, st, NOTHING, st), st)); + assert(EQ(step(m->g, startst, stopst, st, NOTHING, st, sflags), + st)); p += clen; } @@ -939,7 +971,8 @@ step(struct re_guts *g, sopno stop, /* state after stop state within strip */ states bef, /* states reachable before */ wint_t ch, /* character or NONCHAR code */ - states aft) /* states already known reachable after */ + states aft, /* states already known reachable after */ + int sflags) /* state flags */ { cset *cs; sop s; @@ -958,6 +991,14 @@ step(struct re_guts *g, /* only characters can match */ assert(!NONCHAR(ch) || ch != OPND(s)); if (ch == OPND(s)) + FWD(aft, bef, 1); + break; + case OBOS: + if ((ch == BOL || ch == BOLEOL) && (sflags & SBOS) != 0) + FWD(aft, bef, 1); + break; + case OEOS: + if ((ch == EOL || ch == BOLEOL) && (sflags & SEOS) != 0) FWD(aft, bef, 1); break; case OBOL: Modified: head/lib/libc/regex/regcomp.c ============================================================================== --- head/lib/libc/regex/regcomp.c Sat Dec 5 02:23:11 2020 (r368356) +++ head/lib/libc/regex/regcomp.c Sat Dec 5 03:13:47 2020 (r368357) @@ -480,6 +480,12 @@ p_ere_exp(struct parse *p, struct branchc *bc) if (p->gnuext) { handled = 1; switch (wc) { + case '`': + EMIT(OBOS, 0); + break; + case '\'': + EMIT(OEOS, 0); + break; case 'W': case 'w': case 'S': @@ -833,6 +839,12 @@ p_simp_re(struct parse *p, struct branchc *bc) if (p->gnuext) { handled = true; switch (c) { + case BACKSL|'`': + EMIT(OBOS, 0); + break; + case BACKSL|'\'': + EMIT(OEOS, 0); + break; case BACKSL|'W': case BACKSL|'w': case BACKSL|'S': @@ -1878,6 +1890,8 @@ findmust(struct parse *p, struct re_guts *g) case OEOW: case OBOL: case OEOL: + case OBOS: + case OEOS: case O_QUEST: case O_CH: case OEND: Modified: head/lib/libc/regex/regex2.h ============================================================================== --- head/lib/libc/regex/regex2.h Sat Dec 5 02:23:11 2020 (r368356) +++ head/lib/libc/regex/regex2.h Sat Dec 5 03:13:47 2020 (r368357) @@ -104,6 +104,8 @@ typedef unsigned long sopno; #define O_CH (18L< b #\B[abc]+ - bc #\B[abc]\+ b bc -#\`abc\' & abc abc -#\`.+\' - abNc abNc -#\`.\+\' b abNc abNc -#(\`a) - Na -#(a\') - aN +\`abc & abc abc +abc\' & abc abc +\`abc\' & abc abc +\`.+\' - abNc abNc +\`.\+\' b abNc abNc +(\`a) - Na +(a\`) - aN +(a\') - aN +(\'a) - Na