Date: Mon, 11 Sep 2017 15:52:24 +0000 (UTC) From: Kyle Evans <kevans@FreeBSD.org> To: src-committers@freebsd.org, svn-src-all@freebsd.org, svn-src-stable@freebsd.org, svn-src-stable-11@freebsd.org Subject: svn commit: r323443 - in stable/11: contrib/netbsd-tests/usr.bin/grep usr.bin/grep Message-ID: <201709111552.v8BFqOAi005604@repo.freebsd.org>
next in thread | raw e-mail | index | archive | help
Author: kevans Date: Mon Sep 11 15:52:24 2017 New Revision: 323443 URL: https://svnweb.freebsd.org/changeset/base/323443 Log: bsdgrep: add a primitive literal matcher to unbreak fgrep in some scenarios MFC r322825: bsdgrep: add some additional tests for fgrep Previously added tests only check that fgrep is somewhat sane and works. Add some more tests that check that the implementation is basically functional and not producing incorrect results with various flags. MFC r322826: bsdgrep: add a primitive literal matcher fgrep/grep -F will error out at runtime if compiled with a regex(3) that does not define REG_NOSPEC or REG_LITERAL. glibc is one such regex(3) implementation, and as it turns out they don't support literal matching at all. Provide a primitive literal matcher for use with glibc and other implementations that don't support literal matching so that we don't completely lose fgrep/grep -F if compiled against libgnuregex on stable/10, stable/11, or other systems that we don't necessarily support. This is a wholly unoptimized implementation with no plans to optimize it as of now. This is due to both its use-case being primarily on unsupported systems in the near-distant future and that it's reinventing the wheel that we already have available as a feature of regex(3). PR: 222201 Approved by: emaste (mentor, blanket MFC) Modified: stable/11/contrib/netbsd-tests/usr.bin/grep/t_grep.sh stable/11/usr.bin/grep/grep.c stable/11/usr.bin/grep/grep.h stable/11/usr.bin/grep/util.c Directory Properties: stable/11/ (props changed) Modified: stable/11/contrib/netbsd-tests/usr.bin/grep/t_grep.sh ============================================================================== --- stable/11/contrib/netbsd-tests/usr.bin/grep/t_grep.sh Mon Sep 11 15:38:51 2017 (r323442) +++ stable/11/contrib/netbsd-tests/usr.bin/grep/t_grep.sh Mon Sep 11 15:52:24 2017 (r323443) @@ -685,6 +685,59 @@ matchall_body() atf_check -s exit:1 grep "" test1 } + +atf_test_case fgrep_multipattern +fgrep_multipattern_head() +{ + atf_set "descr" "Check proper behavior with multiple patterns supplied to fgrep" +} +fgrep_multipattern_body() +{ + printf "Foo\nBar\nBaz" > test1 + + atf_check -o inline:"Foo\nBaz\n" grep -F -e "Foo" -e "Baz" test1 + atf_check -o inline:"Foo\nBaz\n" grep -F -e "Baz" -e "Foo" test1 + atf_check -o inline:"Bar\nBaz\n" grep -F -e "Bar" -e "Baz" test1 +} + +atf_test_case fgrep_icase +fgrep_icase_head() +{ + atf_set "descr" "Check proper handling of -i supplied to fgrep" +} +fgrep_icase_body() +{ + printf "Foo\nBar\nBaz" > test1 + + atf_check -o inline:"Foo\nBaz\n" grep -Fi -e "foo" -e "baz" test1 + atf_check -o inline:"Foo\nBaz\n" grep -Fi -e "baz" -e "foo" test1 + atf_check -o inline:"Bar\nBaz\n" grep -Fi -e "bar" -e "baz" test1 + atf_check -o inline:"Bar\nBaz\n" grep -Fi -e "BAR" -e "bAz" test1 +} + +atf_test_case fgrep_oflag +fgrep_oflag_head() +{ + atf_set "descr" "Check proper handling of -o supplied to fgrep" +} +fgrep_oflag_body() +{ + printf "abcdefghi\n" > test1 + + atf_check -o inline:"a\n" grep -Fo "a" test1 + atf_check -o inline:"i\n" grep -Fo "i" test1 + atf_check -o inline:"abc\n" grep -Fo "abc" test1 + atf_check -o inline:"fgh\n" grep -Fo "fgh" test1 + atf_check -o inline:"cde\n" grep -Fo "cde" test1 + atf_check -o inline:"bcd\n" grep -Fo -e "bcd" -e "cde" test1 + atf_check -o inline:"bcd\nefg\n" grep -Fo -e "bcd" -e "efg" test1 + + atf_check -s exit:1 grep -Fo "xabc" test1 + atf_check -s exit:1 grep -Fo "abcx" test1 + atf_check -s exit:1 grep -Fo "xghi" test1 + atf_check -s exit:1 grep -Fo "ghix" test1 + atf_check -s exit:1 grep -Fo "abcdefghiklmnopqrstuvwxyz" test1 +} # End FreeBSD atf_init_test_cases() @@ -726,5 +779,8 @@ atf_init_test_cases() atf_add_test_case mmap atf_add_test_case mmap_eof_not_eol atf_add_test_case matchall + atf_add_test_case fgrep_multipattern + atf_add_test_case fgrep_icase + atf_add_test_case fgrep_oflag # End FreeBSD } Modified: stable/11/usr.bin/grep/grep.c ============================================================================== --- stable/11/usr.bin/grep/grep.c Mon Sep 11 15:38:51 2017 (r323442) +++ stable/11/usr.bin/grep/grep.c Mon Sep 11 15:52:24 2017 (r323443) @@ -721,12 +721,19 @@ main(int argc, char *argv[]) case GREP_BASIC: break; case GREP_FIXED: + /* + * regex(3) implementations that support fixed-string searches generally + * define either REG_NOSPEC or REG_LITERAL. Set the appropriate flag + * here. If neither are defined, GREP_FIXED later implies that the + * internal literal matcher should be used. Other cflags that have + * the same interpretation as REG_NOSPEC and REG_LITERAL should be + * similarly added here, and grep.h should be amended to take this into + * consideration when defining WITH_INTERNAL_NOSPEC. + */ #if defined(REG_NOSPEC) cflags |= REG_NOSPEC; #elif defined(REG_LITERAL) cflags |= REG_LITERAL; -#else - errx(2, "literal expressions not supported at compile time"); #endif break; case GREP_EXTENDED: @@ -743,7 +750,11 @@ main(int argc, char *argv[]) r_pattern = grep_calloc(patterns, sizeof(*r_pattern)); /* Don't process any patterns if we have a blank one */ +#ifdef WITH_INTERNAL_NOSPEC + if (!matchall && grepbehave != GREP_FIXED) { +#else if (!matchall) { +#endif /* Check if cheating is allowed (always is for fgrep). */ for (i = 0; i < patterns; ++i) { #ifndef WITHOUT_FASTMATCH Modified: stable/11/usr.bin/grep/grep.h ============================================================================== --- stable/11/usr.bin/grep/grep.h Mon Sep 11 15:38:51 2017 (r323442) +++ stable/11/usr.bin/grep/grep.h Mon Sep 11 15:52:24 2017 (r323443) @@ -57,6 +57,10 @@ extern const char *errstr[]; #define GREP_BASIC 1 #define GREP_EXTENDED 2 +#if !defined(REG_NOSPEC) && !defined(REG_LITERAL) +#define WITH_INTERNAL_NOSPEC +#endif + #define BINFILE_BIN 0 #define BINFILE_SKIP 1 #define BINFILE_TEXT 2 Modified: stable/11/usr.bin/grep/util.c ============================================================================== --- stable/11/usr.bin/grep/util.c Mon Sep 11 15:38:51 2017 (r323442) +++ stable/11/usr.bin/grep/util.c Mon Sep 11 15:52:24 2017 (r323443) @@ -70,7 +70,10 @@ struct parsec { bool binary; /* Binary file? */ }; - +#ifdef WITH_INTERNAL_NOSPEC +static int litexec(const struct pat *pat, const char *string, + size_t nmatch, regmatch_t pmatch[]); +#endif static int procline(struct parsec *pc); static void printline(struct parsec *pc, int sep); static void printline_metadata(struct str *line, int sep); @@ -350,6 +353,67 @@ procfile(const char *fn) return (c); } +#ifdef WITH_INTERNAL_NOSPEC +/* + * Internal implementation of literal string search within a string, modeled + * after regexec(3), for use when the regex(3) implementation doesn't offer + * either REG_NOSPEC or REG_LITERAL. This does not apply in the default FreeBSD + * config, but in other scenarios such as building against libgnuregex or on + * some non-FreeBSD OSes. + */ +static int +litexec(const struct pat *pat, const char *string, size_t nmatch, + regmatch_t pmatch[]) +{ + char *(*strstr_fn)(const char *, const char *); + char *sub, *subject; + const char *search; + size_t idx, n, ofs, stringlen; + + if (cflags & REG_ICASE) + strstr_fn = strcasestr; + else + strstr_fn = strstr; + idx = 0; + ofs = pmatch[0].rm_so; + stringlen = pmatch[0].rm_eo; + if (ofs >= stringlen) + return (REG_NOMATCH); + subject = strndup(string, stringlen); + if (subject == NULL) + return (REG_ESPACE); + for (n = 0; ofs < stringlen;) { + search = (subject + ofs); + if ((unsigned long)pat->len > strlen(search)) + break; + sub = strstr_fn(search, pat->pat); + /* + * Ignoring the empty string possibility due to context: grep optimizes + * for empty patterns and will never reach this point. + */ + if (sub == NULL) + break; + ++n; + /* Fill in pmatch if necessary */ + if (nmatch > 0) { + pmatch[idx].rm_so = ofs + (sub - search); + pmatch[idx].rm_eo = pmatch[idx].rm_so + pat->len; + if (++idx == nmatch) + break; + ofs = pmatch[idx].rm_so + 1; + } else + /* We only needed to know if we match or not */ + break; + } + free(subject); + if (n > 0 && nmatch > 0) + for (n = idx; n < nmatch; ++n) + pmatch[n].rm_so = pmatch[n].rm_eo = -1; + + return (n > 0 ? 0 : REG_NOMATCH); +} +#endif /* WITH_INTERNAL_NOSPEC */ + #define iswword(x) (iswalnum((x)) || (x) == L'_') /* @@ -400,6 +464,11 @@ procline(struct parsec *pc) for (i = 0; i < patterns; i++) { pmatch.rm_so = st; pmatch.rm_eo = pc->ln.len; +#ifdef WITH_INTERNAL_NOSPEC + if (grepbehave == GREP_FIXED) + r = litexec(&pattern[i], pc->ln.dat, 1, &pmatch); + else +#endif #ifndef WITHOUT_FASTMATCH if (fg_pattern[i].pattern) r = fastexec(&fg_pattern[i],
Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?201709111552.v8BFqOAi005604>