Date: Tue, 24 Jun 2008 11:52:47 GMT From: Gabor Kovesdan <gabor@FreeBSD.org> To: Perforce Change Reviews <perforce@freebsd.org> Subject: PERFORCE change 144019 for review Message-ID: <200806241152.m5OBqloS052689@repoman.freebsd.org>
next in thread | raw e-mail | index | archive | help
http://perforce.freebsd.org/chv.cgi?CH=144019 Change 144019 by gabor@gabor_server on 2008/06/24 11:51:51 - Remove the gzip and bzip2 support. I'll pre-extract these files to /temp, using mktemp, which will get rid of some limitations, simplify the implementation of the wide character set and make the code much more cleaner - Rename grep_fdopen to grep_stdin_open and simplify the cases, where it is involved. That function is only used to open the standard input, thus it doesn't need to be such complicated. Affected files ... .. //depot/projects/soc2008/gabor_textproc/grep/binary.c#9 edit .. //depot/projects/soc2008/gabor_textproc/grep/file.c#7 edit .. //depot/projects/soc2008/gabor_textproc/grep/grep.c#41 edit .. //depot/projects/soc2008/gabor_textproc/grep/grep.h#23 edit .. //depot/projects/soc2008/gabor_textproc/grep/util.c#36 edit Differences ... ==== //depot/projects/soc2008/gabor_textproc/grep/binary.c#9 (text+ko) ==== @@ -34,70 +34,57 @@ #endif #endif /* not lint */ -#include <bzlib.h> #include <ctype.h> #include <err.h> #include <stdio.h> -#include <zlib.h> +#include <stdlib.h> +#include <wchar.h> +#include <wctype.h> #include "grep.h" -#define isbinary(ch) (!isspace((ch)) && iscntrl((ch))) +#define iswbinary(ch) (!iswspace((ch)) && iswcntrl((ch))) int bin_file(FILE *f) { - char buf[BUFSIZ]; - size_t i, m; + wint_t ch = L'\0'; + size_t i; int ret = 0; if (fseek(f, 0L, SEEK_SET) == -1) return (0); - if ((m = fread(buf, 1, BUFSIZ, f)) == 0) - return (0); - - for (i = 0; i < m; i++) - if (isbinary(buf[i])) { + for (i = 0; (i <= BUFSIZ) && (ch != WEOF); i++) { + ch = fgetwc(f); + if (iswbinary(ch)) { ret = 1; break; } + } rewind(f); return (ret); } int -gzbin_file(gzFile *f) +mmbin_file(struct mmfile *f) { - char buf[BUFSIZ]; - int i, m, ret = 0; - - if (gzseek(f, 0L, SEEK_SET) == -1) + int i; + wchar_t *wbuf; + size_t s; + + if ((s = mbstowcs(NULL, f->base, 0)) == -1) return (0); - if ((m = gzread(f, buf, BUFSIZ)) <= 0) - return (0); + if ((wbuf = malloc((s + 1) * sizeof(wchar_t))) == NULL) + err(2, NULL); - for (i = 0; i < m; i++) - if (isbinary(buf[i])) { - ret = 1; - break; - } + mbstowcs(wbuf, f->base, s); - if (gzrewind(f) != 0) - err(1, "gzbin_file"); - return (ret); -} - -int -mmbin_file(struct mmfile *f) -{ - int i; - /* XXX knows too much about mmf internals */ for (i = 0; i < BUFSIZ && i < f->len; i++) - if (isbinary(f->base[i])) + if (iswbinary(wbuf[i])) return (1); return (0); } ==== //depot/projects/soc2008/gabor_textproc/grep/file.c#7 (text+ko) ==== @@ -46,8 +46,6 @@ #include "grep.h" static char fname[MAXPATHLEN]; -static char *lnbuf; -static size_t lnbuflen; #define FILE_STDIO 0 #define FILE_MMAP 1 @@ -59,106 +57,21 @@ int noseek; FILE *f; struct mmfile *mmf; - gzFile *gzf; - BZFILE *bzf; }; -static char * -bzfgetln(BZFILE *f, size_t *len) -{ - size_t n; - char c; - int bzerr = 0; - - for (n = 0; ; ++n) { - BZ2_bzRead(&bzerr, f, &c, 1); - - if ((bzerr == BZ_STREAM_END) || (bzerr == BZ_SEQUENCE_ERROR)) { - if (n == 0) - return (NULL); - else - break; - } else if (bzerr != BZ_OK) - err(2, NULL); - - if (n >= lnbuflen) { - lnbuflen *= 2; - lnbuf = grep_realloc(lnbuf, ++lnbuflen); - } - if (c == '\n') - break; - lnbuf[n] = c; - } - - *len = n; - return (lnbuf); -} - -static char * -gzfgetln(gzFile *f, size_t *len) -{ - size_t n; - int c; - - for (n = 0; ; ++n) { - c = gzgetc(f); - if (c == -1) { - const char *gzerrstr; - int gzerr; - - if (gzeof(f)) - break; - - gzerrstr = gzerror(f, &gzerr); - if (gzerr == Z_ERRNO) - err(2, "%s", fname); - else - errx(2, "%s: %s", fname, gzerrstr); - } - if (n >= lnbuflen) { - lnbuflen *= 2; - lnbuf = grep_realloc(lnbuf, ++lnbuflen); - } - if (c == '\n') - break; - lnbuf[n] = c; - } - - if (gzeof(f) && n == 0) - return (NULL); - *len = n; - return (lnbuf); -} - struct file * -grep_fdopen(int fd, char *mode) +grep_stdin_open(char *mode) { struct file *f; - if (fd == STDIN_FILENO) - snprintf(fname, sizeof fname, getstr(1)); - else - snprintf(fname, sizeof fname, getstr(2), fd); + snprintf(fname, sizeof fname, getstr(1)); f = grep_malloc(sizeof *f); - if (Zflag) { - f->type = FILE_GZIP; - f->noseek = lseek(fd, 0L, SEEK_SET) == -1; - if ((f->gzf = gzdopen(fd, mode)) != NULL) - return (f); - } else if (Jflag) { - f->type = FILE_BZIP; - f->noseek = lseek(fd, 0L, SEEK_SET) == -1; - if ((f->bzf = BZ2_bzdopen(fd, mode)) != NULL) - return (f); - } else - { - f->type = FILE_STDIO; - f->noseek = isatty(fd); - if ((f->f = fdopen(fd, mode)) != NULL) - return (f); - } + f->type = FILE_STDIO; + f->noseek = isatty(FILE_STDIO); + if ((f->f = fdopen(FILE_STDIO, mode)) != NULL) + return (f); free(f); return (NULL); @@ -174,24 +87,16 @@ f = grep_malloc(sizeof *f); f->noseek = 0; - if (Zflag) { - f->type = FILE_GZIP; - if ((f->gzf = gzopen(fname, mode)) != NULL) - return (f); - } else if (Jflag) { - f->type = FILE_BZIP; - if ((f->bzf = BZ2_bzopen(fname, mode)) != NULL) - return (f); - } else { - /* try mmap first; if it fails, try stdio */ - if ((f->mmf = mmopen(fname, mode)) != NULL) { - f->type = FILE_MMAP; - return (f); - } - f->type = FILE_STDIO; - if ((f->f = fopen(path, mode)) != NULL) - return (f); +/* XXX: pre-extract gzip and bzip2 files */ + + /* try mmap first; if it fails, try stdio */ + if ((f->mmf = mmopen(fname, mode)) != NULL) { + f->type = FILE_MMAP; + return (f); } + f->type = FILE_STDIO; + if ((f->f = fopen(path, mode)) != NULL) + return (f); free(f); return (NULL); @@ -208,12 +113,6 @@ return (bin_file(f->f)); case FILE_MMAP: return (mmbin_file(f->mmf)); - case FILE_GZIP: - return (gzbin_file(f->gzf)); - case FILE_BZIP: -/* XXX: we cannot seek in bzip2 files, just suppose that it is not binary - return (bzbin_file(f->bzf)); */ - return (0); default: /* NOTREACHED */ errx(2, getstr(3)); @@ -228,10 +127,6 @@ return (fgetln(f->f, l)); case FILE_MMAP: return (mmfgetln(f->mmf, l)); - case FILE_GZIP: - return (gzfgetln(f->gzf, l)); - case FILE_BZIP: - return (bzfgetln(f->bzf, l)); default: /* NOTREACHED */ errx(2, getstr(3)); @@ -248,12 +143,6 @@ case FILE_MMAP: mmclose(f->mmf); break; - case FILE_GZIP: - gzclose(f->gzf); - break; - case FILE_BZIP: - BZ2_bzclose(f->bzf); - break; default: /* NOTREACHED */ errx(2, getstr(3)); ==== //depot/projects/soc2008/gabor_textproc/grep/grep.c#41 (text+ko) ==== @@ -553,6 +553,7 @@ xflag = 1; break; case 'Z': + Jflag = 0; Zflag++; break; case BIN_OPT: ==== //depot/projects/soc2008/gabor_textproc/grep/grep.h#23 (text+ko) ==== @@ -129,7 +129,7 @@ /* file.c */ struct file; -struct file *grep_fdopen(int fd, char *mode); +struct file *grep_stdin_open(char *mode); struct file *grep_open(char *path, char *mode); int grep_bin_file(struct file *f); char *grep_fgetln(struct file *f, size_t *l); @@ -137,5 +137,4 @@ /* binary.c */ int bin_file(FILE * f); -int gzbin_file(gzFile * f); int mmbin_file(struct mmfile *f); ==== //depot/projects/soc2008/gabor_textproc/grep/util.c#36 (text+ko) ==== @@ -136,7 +136,7 @@ fn = label; else fn = getstr(1); - f = grep_fdopen(STDIN_FILENO, "r"); + f = grep_stdin_open("r"); } else { f = grep_open(fn, "r"); }
Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?200806241152.m5OBqloS052689>