From owner-svn-src-all@freebsd.org Sat Dec 5 03:16:07 2020 Return-Path: Delivered-To: svn-src-all@mailman.nyi.freebsd.org Received: from mx1.freebsd.org (mx1.freebsd.org [IPv6:2610:1c1:1:606c::19:1]) by mailman.nyi.freebsd.org (Postfix) with ESMTP id 037024B380C; Sat, 5 Dec 2020 03:16:07 +0000 (UTC) (envelope-from kevans@FreeBSD.org) Received: from mxrelay.nyi.freebsd.org (mxrelay.nyi.freebsd.org [IPv6:2610:1c1:1:606c::19:3]) (using TLSv1.3 with cipher TLS_AES_256_GCM_SHA384 (256/256 bits) key-exchange X25519 server-signature RSA-PSS (4096 bits) server-digest SHA256 client-signature RSA-PSS (4096 bits) client-digest SHA256) (Client CN "mxrelay.nyi.freebsd.org", Issuer "Let's Encrypt Authority X3" (verified OK)) by mx1.freebsd.org (Postfix) with ESMTPS id 4CnvqB6WRwz4dVF; Sat, 5 Dec 2020 03:16:06 +0000 (UTC) (envelope-from kevans@FreeBSD.org) Received: from repo.freebsd.org (repo.freebsd.org [IPv6:2610:1c1:1:6068::e6a:0]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (Client did not present a certificate) by mxrelay.nyi.freebsd.org (Postfix) with ESMTPS id C802F16F90; Sat, 5 Dec 2020 03:16:06 +0000 (UTC) (envelope-from kevans@FreeBSD.org) Received: from repo.freebsd.org ([127.0.1.37]) by repo.freebsd.org (8.15.2/8.15.2) with ESMTP id 0B53G6bS094792; Sat, 5 Dec 2020 03:16:06 GMT (envelope-from kevans@FreeBSD.org) Received: (from kevans@localhost) by repo.freebsd.org (8.15.2/8.15.2/Submit) id 0B53G5KF094788; Sat, 5 Dec 2020 03:16:05 GMT (envelope-from kevans@FreeBSD.org) Message-Id: <202012050316.0B53G5KF094788@repo.freebsd.org> X-Authentication-Warning: repo.freebsd.org: kevans set sender to kevans@FreeBSD.org using -f From: Kyle Evans Date: Sat, 5 Dec 2020 03:16:05 +0000 (UTC) To: src-committers@freebsd.org, svn-src-all@freebsd.org, svn-src-head@freebsd.org Subject: svn commit: r368358 - in head: contrib/netbsd-tests/lib/libc/regex/data lib/libc/regex lib/libregex/tests X-SVN-Group: head X-SVN-Commit-Author: kevans X-SVN-Commit-Paths: in head: contrib/netbsd-tests/lib/libc/regex/data lib/libc/regex lib/libregex/tests X-SVN-Commit-Revision: 368358 X-SVN-Commit-Repository: base MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit X-BeenThere: svn-src-all@freebsd.org X-Mailman-Version: 2.1.34 Precedence: list List-Id: "SVN commit messages for the entire src tree \(except for " user" and " projects" \)" List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , X-List-Received-Date: Sat, 05 Dec 2020 03:16:07 -0000 Author: kevans Date: Sat Dec 5 03:16:05 2020 New Revision: 368358 URL: https://svnweb.freebsd.org/changeset/base/368358 Log: libregex: implement \b and \B (word boundary, not word boundary) This is the last of the needed GNU expressions before we can unleash bsdgrep by default. \b is effectively an agnostic equivalent of \< and \>, while \B will match every space that isn't making a transition from nonchar -> char or char -> nonchar. Modified: head/contrib/netbsd-tests/lib/libc/regex/data/meta.in head/lib/libc/regex/engine.c head/lib/libc/regex/regcomp.c head/lib/libc/regex/regex2.h head/lib/libregex/tests/gnuext.in Modified: head/contrib/netbsd-tests/lib/libc/regex/data/meta.in ============================================================================== --- head/contrib/netbsd-tests/lib/libc/regex/data/meta.in Sat Dec 5 03:13:47 2020 (r368357) +++ head/contrib/netbsd-tests/lib/libc/regex/data/meta.in Sat Dec 5 03:16:05 2020 (r368358) @@ -5,7 +5,7 @@ a\*c & a*c a*c a\\b & a\b a\b a\\\*b & a\*b a\*b # Begin FreeBSD -a\bc &C EESCAPE +a\bc & abc # End FreeBSD a\ &C EESCAPE a\\bc & a\bc a\bc Modified: head/lib/libc/regex/engine.c ============================================================================== --- head/lib/libc/regex/engine.c Sat Dec 5 03:13:47 2020 (r368357) +++ head/lib/libc/regex/engine.c Sat Dec 5 03:16:05 2020 (r368358) @@ -118,6 +118,7 @@ static states step(struct re_guts *g, sopno start, sop #define BOW (BOL-4) #define EOW (BOL-5) #define BADCHAR (BOL-6) +#define NWBND (BOL-7) #define NONCHAR(c) ((c) <= OUT) /* sflags */ #define SBOS 0x0001 @@ -463,6 +464,8 @@ dissect(struct match *m, case OEOW: case OBOS: case OEOS: + case OWBND: + case ONWBND: break; case OANY: case OANYOF: @@ -691,6 +694,21 @@ backref(struct match *m, else return(NULL); break; + case OWBND: + if (ISBOW(m, sp) || ISEOW(m, sp)) + { /* yes */ } + else + return(NULL); + break; + case ONWBND: + if (((sp == m->beginp) && !ISWORD(*sp)) || + (sp == m->endp && !ISWORD(*(sp - 1)))) + { /* yes, beginning/end of subject */ } + else if (ISWORD(*(sp - 1)) == ISWORD(*sp)) + { /* yes, beginning/end of subject */ } + else + return(NULL); + break; case OBOW: if (ISBOW(m, sp)) { /* yes */ } @@ -916,6 +934,17 @@ walk(struct match *m, const char *start, const char *s st = step(m->g, startst, stopst, st, flagch, st, sflags); SP("sboweow", st, c); } + if (lastc != OUT && c != OUT && + ISWORD(lastc) == ISWORD(c)) { + flagch = NWBND; + } else if ((lastc == OUT && !ISWORD(c)) || + (c == OUT && !ISWORD(lastc))) { + flagch = NWBND; + } + if (flagch == NWBND) { + st = step(m->g, startst, stopst, st, flagch, st, sflags); + SP("snwbnd", st, c); + } /* are we done? */ if (ISSET(st, stopst)) { @@ -1016,6 +1045,14 @@ step(struct re_guts *g, case OEOW: if (ch == EOW) FWD(aft, bef, 1); + break; + case OWBND: + if (ch == BOW || ch == EOW) + FWD(aft, bef, 1); + break; + case ONWBND: + if (ch == NWBND) + FWD(aft, aft, 1); break; case OANY: if (!NONCHAR(ch)) Modified: head/lib/libc/regex/regcomp.c ============================================================================== --- head/lib/libc/regex/regcomp.c Sat Dec 5 03:13:47 2020 (r368357) +++ head/lib/libc/regex/regcomp.c Sat Dec 5 03:16:05 2020 (r368358) @@ -486,6 +486,12 @@ p_ere_exp(struct parse *p, struct branchc *bc) case '\'': EMIT(OEOS, 0); break; + case 'B': + EMIT(ONWBND, 0); + break; + case 'b': + EMIT(OWBND, 0); + break; case 'W': case 'w': case 'S': @@ -845,6 +851,12 @@ p_simp_re(struct parse *p, struct branchc *bc) case BACKSL|'\'': EMIT(OEOS, 0); break; + case BACKSL|'B': + EMIT(ONWBND, 0); + break; + case BACKSL|'b': + EMIT(OWBND, 0); + break; case BACKSL|'W': case BACKSL|'w': case BACKSL|'S': @@ -1892,6 +1904,8 @@ findmust(struct parse *p, struct re_guts *g) case OEOL: case OBOS: case OEOS: + case OWBND: + case ONWBND: case O_QUEST: case O_CH: case OEND: @@ -2043,6 +2057,8 @@ altoffset(sop *scan, int offset) try++; case OBOW: case OEOW: + case OWBND: + case ONWBND: case OLPAREN: case ORPAREN: case OOR2: Modified: head/lib/libc/regex/regex2.h ============================================================================== --- head/lib/libc/regex/regex2.h Sat Dec 5 03:13:47 2020 (r368357) +++ head/lib/libc/regex/regex2.h Sat Dec 5 03:16:05 2020 (r368358) @@ -106,6 +106,8 @@ typedef unsigned long sopno; #define OEOW (20L<, \`, \') # (is/not boundary, start/end word, start/end subject string) -# Most of these are disabled for the moment, and will be re-enabled as -# we become feature complete. -#\babc\b & abc +\babc\b & abc \ & abc -#\Babc\B & abc -#\B[abc]\B & b -#\B[abc]+ - bc -#\B[abc]\+ b bc +\Babc\B & abc +\B[abc]\B & b +\B[abc]+ - bc +\B[abc]\+ b bc \`abc & abc abc abc\' & abc abc \`abc\' & abc abc