From owner-p4-projects@FreeBSD.ORG Sat Jul 12 16:03:47 2008 Return-Path: Delivered-To: p4-projects@freebsd.org Received: by hub.freebsd.org (Postfix, from userid 32767) id 455381065679; Sat, 12 Jul 2008 16:03:47 +0000 (UTC) Delivered-To: perforce@freebsd.org Received: from mx1.freebsd.org (mx1.freebsd.org [IPv6:2001:4f8:fff6::34]) by hub.freebsd.org (Postfix) with ESMTP id E467C1065670 for ; Sat, 12 Jul 2008 16:03:46 +0000 (UTC) (envelope-from gabor@freebsd.org) Received: from repoman.freebsd.org (repoman.freebsd.org [IPv6:2001:4f8:fff6::29]) by mx1.freebsd.org (Postfix) with ESMTP id D5FE48FC08 for ; Sat, 12 Jul 2008 16:03:46 +0000 (UTC) (envelope-from gabor@freebsd.org) Received: from repoman.freebsd.org (localhost [127.0.0.1]) by repoman.freebsd.org (8.14.1/8.14.1) with ESMTP id m6CG3kNn095993 for ; Sat, 12 Jul 2008 16:03:46 GMT (envelope-from gabor@freebsd.org) Received: (from perforce@localhost) by repoman.freebsd.org (8.14.2/8.14.1/Submit) id m6CG3kYM095991 for perforce@freebsd.org; Sat, 12 Jul 2008 16:03:46 GMT (envelope-from gabor@freebsd.org) Date: Sat, 12 Jul 2008 16:03:46 GMT Message-Id: <200807121603.m6CG3kYM095991@repoman.freebsd.org> X-Authentication-Warning: repoman.freebsd.org: perforce set sender to gabor@freebsd.org using -f From: Gabor Kovesdan To: Perforce Change Reviews Cc: Subject: PERFORCE change 145103 for review X-BeenThere: p4-projects@freebsd.org X-Mailman-Version: 2.1.5 Precedence: list List-Id: p4 projects tree changes List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , X-List-Received-Date: Sat, 12 Jul 2008 16:03:47 -0000 http://perforce.freebsd.org/chv.cgi?CH=145103 Change 145103 by gabor@gabor_server on 2008/07/12 16:03:15 - First read the first chunk of the file to a buffer and use a wrapper function, which returns a line from this buffer if it has data or reads the file itself if it doesn't have data any more. The content of the buffer will be used for the binary check. In this way we won't need to read some chunk of the file and then seek to the beginning, which does not work for bzip2 files, nor will we need to pre-extract the compressed files to the tmp directory. Affected files ... .. //depot/projects/soc2008/gabor_textproc/grep/file.c#15 edit .. //depot/projects/soc2008/gabor_textproc/grep/grep.h#36 edit .. //depot/projects/soc2008/gabor_textproc/grep/util.c#58 edit Differences ... ==== //depot/projects/soc2008/gabor_textproc/grep/file.c#15 (text+ko) ==== @@ -51,37 +51,61 @@ #include "grep.h" static char fname[MAXPATHLEN]; +static char *lnbuf; +static size_t lnbuflen; +static char binbuf[BUFSIZ * 4]; +static int binbufsiz; +char *binbufptr; #define iswbinary(ch) (!iswspace((ch)) && iswcntrl((ch))) -int -bin_file(struct file *f) +char * +grep_fgetln(struct file *f, size_t *len) { - wint_t ch = L'\0'; - size_t i; - int ret = 0; + int i; + char ch; + size_t size; + wchar_t wbinbuf[BUFSIZ]; - if (f->noseek) - return (0); + if ((binbufptr == NULL) && (binbehave != BINFILE_TEXT)) { + for (i = 0; i < (BUFSIZ * 4); i++) { + if (feof(f->f)) + break; + ch = fgetc(f->f); + binbuf[i] = ch; + } + binbufsiz = i; + binbufptr = binbuf; +// size = mbsnrtowcs(wbinbuf, &binbuf, BUFSIZ * 4, BUFSIZ - 1, NULL); + f->binary = 0; +// for (; size > 0; size--) +// if (iswbinary(wbinbuf[size])) { +// f->binary = 1; +// break; +// } + } - if (fseek(f->f, 0L, SEEK_SET) == -1) - return (0); - - errno = 0; - for (i = 0; i <= BUFSIZ; i++) { - if ((ch = fgetwc(f->f)) == WEOF) { - if (errno == EILSEQ) - ret = 1; - break; + for (i = 0; ; i++) { + if (binbufptr == &binbuf[binbufsiz]) { + if (feof(f->f)) + break; + ch = fgetc(f->f); + } else { + ch = binbufptr[0]; + binbufptr++; + } + if (i >= lnbuflen) { + lnbuflen *= 2; + lnbuf = grep_realloc(lnbuf, ++lnbuflen); } - if (iswbinary(ch)) { - ret = 1; + if (ch == '\n') break; - } + lnbuf[i] = ch; } - - rewind(f->f); - return (ret); + if (feof(f->f) && (i == 0)) + return NULL; + *len = i; + return (lnbuf); } struct file * @@ -93,7 +117,6 @@ f = grep_malloc(sizeof *f); - f->noseek = isatty(STDIN_FILENO); if ((f->f = fdopen(STDIN_FILENO, "r")) != NULL) return (f); @@ -111,7 +134,6 @@ snprintf(fname, sizeof fname, "%s", path); f = grep_malloc(sizeof *f); - f->noseek = 0; if (Zflag || Jflag) { templ = grep_malloc(sizeof(char) * 15); @@ -140,7 +162,6 @@ char buf[BUFSIZ]; int bzerror; FILE *file; - if ((file = fopen(fname, "r")) == NULL) err(2, NULL); if ((bzf = BZ2_bzReadOpen(&bzerror, file, 0, 0, NULL, 0)) == NULL) ==== //depot/projects/soc2008/gabor_textproc/grep/grep.h#36 (text+ko) ==== @@ -69,7 +69,7 @@ #define MAX_LINE_MATCHES 32 struct file { - int noseek; + int binary; FILE *f; struct mmfile *mmf; }; @@ -117,6 +117,6 @@ void clearqueue(void); /* file.c */ -int bin_file(struct file * f); struct file *grep_stdin_open(void); struct file *grep_open(char *path); +char *grep_fgetln(struct file *f, size_t *len); ==== //depot/projects/soc2008/gabor_textproc/grep/util.c#58 (text+ko) ==== @@ -129,7 +129,7 @@ struct file *f; struct stat sb; mode_t s; - int c, t, nottext; + int c, t; if (mflag && (mcount <= 0)) return (0); @@ -159,8 +159,7 @@ return (0); } - nottext = bin_file(f); - if (nottext && binbehave == BINFILE_SKIP) { + if (f->binary && binbehave == BINFILE_SKIP) { fclose(f->f); free(f); return (0); @@ -177,13 +176,13 @@ initqueue(); for (c = 0; c == 0 || !(lflag || qflag); ) { ln.off += ln.len + 1; - if ((ln.dat = fgetln(f->f, &ln.len)) == NULL) + if ((ln.dat = grep_fgetln(f, &ln.len)) == NULL) break; if (ln.len > 0 && ln.dat[ln.len - 1] == '\n') --ln.len; ln.line_no++; - if ((t = procline(&ln, nottext)) == 0 && Bflag > 0) { + if ((t = procline(&ln, f->binary)) == 0 && Bflag > 0) { enqueue(&ln); linesqueued++; } @@ -210,7 +209,7 @@ if (Lflag && c == 0) printf("%s\n", fn); if (c && !cflag && !lflag && !Lflag && - binbehave == BINFILE_BIN && nottext && !qflag) + binbehave == BINFILE_BIN && f->binary && !qflag) printf(getstr(12), fn); return (c);