From owner-p4-projects@FreeBSD.ORG Sat Aug 16 12:44:23 2008 Return-Path: Delivered-To: p4-projects@freebsd.org Received: by hub.freebsd.org (Postfix, from userid 32767) id 476EA1065684; Sat, 16 Aug 2008 12:44:23 +0000 (UTC) Delivered-To: perforce@freebsd.org Received: from mx1.freebsd.org (mx1.freebsd.org [IPv6:2001:4f8:fff6::34]) by hub.freebsd.org (Postfix) with ESMTP id 0AAA1106566C for ; Sat, 16 Aug 2008 12:44:23 +0000 (UTC) (envelope-from gabor@freebsd.org) Received: from repoman.freebsd.org (repoman.freebsd.org [IPv6:2001:4f8:fff6::29]) by mx1.freebsd.org (Postfix) with ESMTP id E8C1D8FC19 for ; Sat, 16 Aug 2008 12:44:22 +0000 (UTC) (envelope-from gabor@freebsd.org) Received: from repoman.freebsd.org (localhost [127.0.0.1]) by repoman.freebsd.org (8.14.2/8.14.2) with ESMTP id m7GCiMLg002625 for ; Sat, 16 Aug 2008 12:44:22 GMT (envelope-from gabor@freebsd.org) Received: (from perforce@localhost) by repoman.freebsd.org (8.14.2/8.14.1/Submit) id m7GCiMOu002623 for perforce@freebsd.org; Sat, 16 Aug 2008 12:44:22 GMT (envelope-from gabor@freebsd.org) Date: Sat, 16 Aug 2008 12:44:22 GMT Message-Id: <200808161244.m7GCiMOu002623@repoman.freebsd.org> X-Authentication-Warning: repoman.freebsd.org: perforce set sender to gabor@freebsd.org using -f From: Gabor Kovesdan To: Perforce Change Reviews Cc: Subject: PERFORCE change 147530 for review X-BeenThere: p4-projects@freebsd.org X-Mailman-Version: 2.1.5 Precedence: list List-Id: p4 projects tree changes List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , X-List-Received-Date: Sat, 16 Aug 2008 12:44:23 -0000 http://perforce.freebsd.org/chv.cgi?CH=147530 Change 147530 by gabor@gabor_server on 2008/08/16 12:43:58 IFC Affected files ... .. //depot/projects/soc2008/gabor_textproc/src/usr.bin/grep/Makefile#3 integrate .. //depot/projects/soc2008/gabor_textproc/src/usr.bin/grep/fastgrep.c#1 branch .. //depot/projects/soc2008/gabor_textproc/src/usr.bin/grep/file.c#5 integrate .. //depot/projects/soc2008/gabor_textproc/src/usr.bin/grep/grep.c#3 integrate .. //depot/projects/soc2008/gabor_textproc/src/usr.bin/grep/grep.h#3 integrate .. //depot/projects/soc2008/gabor_textproc/src/usr.bin/grep/util.c#3 integrate Differences ... ==== //depot/projects/soc2008/gabor_textproc/src/usr.bin/grep/Makefile#3 (text+ko) ==== @@ -2,7 +2,7 @@ # $OpenBSD: Makefile,v 1.6 2003/06/25 15:00:04 millert Exp $ PROG= grep -SRCS= file.c grep.c queue.c util.c +SRCS= fastgrep.c file.c grep.c queue.c util.c LINKS= ${BINDIR}/grep ${BINDIR}/egrep \ ${BINDIR}/grep ${BINDIR}/fgrep \ ${BINDIR}/grep ${BINDIR}/zgrep \ ==== //depot/projects/soc2008/gabor_textproc/src/usr.bin/grep/file.c#5 (text+ko) ==== @@ -119,36 +119,22 @@ { int i = 0; char ch; - size_t size; - wchar_t wbinbuf[BUFSIZ]; - const char *src = binbuf; - mbstate_t mbs; /* Fill in the buffer if it is empty. */ if (binbufptr == NULL) { /* Only pre-read to the buffer if we need the binary check. */ if (binbehave != BINFILE_TEXT) { - for (; i < sizeof(wbinbuf) && !grep_feof(f); i++) { + for (; i < sizeof(binbuf) && !grep_feof(f); i++) { ch = grep_fgetc(f); - binbuf[i] = ch; + if (ch != EOF) + binbuf[i] = ch; + else + break; } - binbufsiz = i; - binbufptr = binbuf; - - /* Convert at most (BUFSIZ * sizeof(wint_t)) characters or - (BUFSIZ - 1) bytes to wide character string. */ - size = mbsnrtowcs(wbinbuf, &src, sizeof(wbinbuf), BUFSIZ - 1, &mbs); - f->binary = 0; - for (; size > 0; size--) - if (iswbinary(wbinbuf[size])) { - f->binary = 1; - break; - } - - } else { - binbufsiz = i; - binbufptr = binbuf; + f->binary = memchr(binbuf, (filebehave != FILE_GZIP) ? '\0' : '\200', i - 1) != 0; } + binbufsiz = i; + binbufptr = binbuf; } /* Read a line whether from the buffer or from the file itself. */ ==== //depot/projects/soc2008/gabor_textproc/src/usr.bin/grep/grep.c#3 (text+ko) ==== @@ -95,6 +95,7 @@ int patterns, pattern_sz; char **pattern; regex_t *r_pattern; +fastgrep_t *fg_pattern; #ifdef WITH_PCRE pcre **perl_pattern; #endif @@ -590,14 +591,26 @@ usage(); } if (grepbehave != GREP_PERL) { - /* Compile regexes with regcomp() */ + fg_pattern = grep_calloc(patterns, sizeof(*fg_pattern)); r_pattern = grep_calloc(patterns, sizeof(*r_pattern)); +/* + * XXX: fgrepcomp() and fastcomp() are workarounds for regexec() performance. + * Optimizations should be done there. + */ for (i = 0; i < patterns; ++i) { - c = regcomp(&r_pattern[i], pattern[i], cflags); - if (c != 0) { - regerror(c, &r_pattern[i], re_error, - RE_ERROR_BUF); - errx(2, "%s", re_error); + /* Check if cheating is allowed (always is for fgrep). */ + if (grepbehave == GREP_FIXED) + fgrepcomp(&fg_pattern[i], pattern[i]); + else { + if (fastcomp(&fg_pattern[i], pattern[i])) { + /* Fall back to full regex library */ + c = regcomp(&r_pattern[i], pattern[i], cflags); + if (c != 0) { + regerror(c, &r_pattern[i], re_error, + RE_ERROR_BUF); + errx(2, "%s", re_error); + } + } } } } else { ==== //depot/projects/soc2008/gabor_textproc/src/usr.bin/grep/grep.h#3 (text+ko) ==== @@ -27,6 +27,7 @@ */ #include +#include #include #include #include @@ -90,6 +91,16 @@ char *dat; }; +typedef struct { + unsigned char *pattern; + int len; + int qsBc[UCHAR_MAX + 1]; + /* flags */ + int bol; + int eol; + int reversed; +} fastgrep_t; + /* Flags passed to regcomp() and regexec() */ extern int cflags, eflags; @@ -105,6 +116,7 @@ extern int first, prev, matchall, patterns, epatterns, tail, notfound; extern char **pattern, **epattern; extern regex_t *r_pattern, *er_pattern; +extern fastgrep_t *fg_pattern; #ifdef WITH_PCRE extern pcre **perl_pattern; @@ -138,3 +150,8 @@ int grep_feof(struct file *f); int grep_fgetc(struct file *f); char *grep_fgetln(struct file *f, size_t *len); + +/* fastgrep.c */ +int fastcomp(fastgrep_t *, const char *); +void fgrepcomp(fastgrep_t *, const char *); +int grep_search(fastgrep_t *, unsigned char *, size_t, regmatch_t *); ==== //depot/projects/soc2008/gabor_textproc/src/usr.bin/grep/util.c#3 (text+ko) ==== @@ -243,14 +243,12 @@ regmatch_t pmatch; regmatch_t matches[MAX_LINE_MATCHES]; regoff_t st = 0; - int c = 0, i, r = 0, m = 0, t; + int c = 0, i, r = 0, m = 0; #ifdef WITH_PCRE int ovector[3]; #endif if (!matchall) { - t = vflag ? REG_NOMATCH : 0; - /* Loop to process the whole line */ while (st <= l->len) { pmatch.rm_so = st; @@ -258,8 +256,18 @@ /* Loop to compare with all the patterns */ for (i = 0; i < patterns; i++) { - if (grepbehave != GREP_PERL) { +/* + * XXX: grep_search() is a workaround for speed up and should be + * removed in the future. See fastgrep.c. + */ + if (fg_pattern[i].pattern) { + r = grep_search(&fg_pattern[i], (unsigned char *)l->dat, + l->len, &pmatch); + r = (r == 0) ? (vflag ? REG_NOMATCH : 0) : (vflag ? 0 : REG_NOMATCH); + st = pmatch.rm_eo; + } else if (grepbehave != GREP_PERL) { r = regexec(&r_pattern[i], l->dat, 1, &pmatch, eflags); + r = (r == 0) ? (vflag ? REG_NOMATCH : 0) : (vflag ? 0 : REG_NOMATCH); st = pmatch.rm_eo; } else { #ifdef WITH_PCRE @@ -271,7 +279,7 @@ ; #endif } - if (r == REG_NOMATCH && t == 0) + if (r == REG_NOMATCH) continue; /* Check for full match */ if (r == 0 && xflag) @@ -290,7 +298,7 @@ r = REG_NOMATCH; free(wbegin); } - if (r == t) { + if (r == 0) { if (m == 0) c++; if (m < MAX_LINE_MATCHES) @@ -313,7 +321,7 @@ return (c); /* Binary file */ /* Dealing with the context */ - if ((tail || (c && !vflag)) && !cflag && !qflag) { + if ((tail || c) && !cflag && !qflag) { if (c) { if (!first && !prev && !tail && Aflag) printf("--\n");