From owner-svn-src-head@FreeBSD.ORG Mon Mar 8 22:27:46 2010 Return-Path: Delivered-To: svn-src-head@freebsd.org Received: from mx1.freebsd.org (mx1.freebsd.org [IPv6:2001:4f8:fff6::34]) by hub.freebsd.org (Postfix) with ESMTP id C251E1065675; Mon, 8 Mar 2010 22:27:46 +0000 (UTC) (envelope-from ache@FreeBSD.org) Received: from svn.freebsd.org (svn.freebsd.org [IPv6:2001:4f8:fff6::2c]) by mx1.freebsd.org (Postfix) with ESMTP id B0CFF8FC1E; Mon, 8 Mar 2010 22:27:46 +0000 (UTC) Received: from svn.freebsd.org (localhost [127.0.0.1]) by svn.freebsd.org (8.14.3/8.14.3) with ESMTP id o28MRkf9026210; Mon, 8 Mar 2010 22:27:46 GMT (envelope-from ache@svn.freebsd.org) Received: (from ache@localhost) by svn.freebsd.org (8.14.3/8.14.3/Submit) id o28MRkKT026208; Mon, 8 Mar 2010 22:27:46 GMT (envelope-from ache@svn.freebsd.org) Message-Id: <201003082227.o28MRkKT026208@svn.freebsd.org> From: "Andrey A. Chernov" Date: Mon, 8 Mar 2010 22:27:46 +0000 (UTC) To: src-committers@freebsd.org, svn-src-all@freebsd.org, svn-src-head@freebsd.org X-SVN-Group: head MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Cc: Subject: svn commit: r204896 - head/usr.bin/comm X-BeenThere: svn-src-head@freebsd.org X-Mailman-Version: 2.1.5 Precedence: list List-Id: SVN commit messages for the src tree for head/-current List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , X-List-Received-Date: Mon, 08 Mar 2010 22:27:46 -0000 Author: ache Date: Mon Mar 8 22:27:46 2010 New Revision: 204896 URL: http://svn.freebsd.org/changeset/base/204896 Log: Rewrite input processing to not exit with error on the first EILSEQ found in the input data but fallback to "binary comparison" instead. POSIX says: "The input files shall be text files", nothing more, so the text file with illegal sequence is valid input. BTW, GNU sort does not fails on EILSEQ too. Modified: head/usr.bin/comm/comm.c Modified: head/usr.bin/comm/comm.c ============================================================================== --- head/usr.bin/comm/comm.c Mon Mar 8 21:42:19 2010 (r204895) +++ head/usr.bin/comm/comm.c Mon Mar 8 22:27:46 2010 (r204896) @@ -53,6 +53,7 @@ __FBSDID("$FreeBSD$"); #include #include #include +#define _WITH_GETLINE #include #include #include @@ -60,40 +61,31 @@ __FBSDID("$FreeBSD$"); #include #include -#define INITLINELEN (LINE_MAX + 1) -#define MAXLINELEN ((SIZE_MAX / sizeof(wchar_t)) / 2) - -const wchar_t *tabs[] = { L"", L"\t", L"\t\t" }; +int iflag; +const char *tabs[] = { "", "\t", "\t\t" }; FILE *file(const char *); -wchar_t *getline(wchar_t *, size_t *, FILE *); -void show(FILE *, const char *, const wchar_t *, wchar_t *, size_t *); -int wcsicoll(const wchar_t *, const wchar_t *); +wchar_t *convert(const char *); +void show(FILE *, const char *, const char *, char **, size_t *); static void usage(void); int main(int argc, char *argv[]) { int comp, read1, read2; - int ch, flag1, flag2, flag3, iflag; + int ch, flag1, flag2, flag3; FILE *fp1, *fp2; - const wchar_t *col1, *col2, *col3; + const char *col1, *col2, *col3; size_t line1len, line2len; - wchar_t *line1, *line2; - const wchar_t **p; - - flag1 = flag2 = flag3 = 1; - iflag = 0; - - line1len = INITLINELEN; - line2len = INITLINELEN; - line1 = malloc(line1len * sizeof(*line1)); - line2 = malloc(line2len * sizeof(*line2)); - if (line1 == NULL || line2 == NULL) - err(1, "malloc"); + char *line1, *line2; + ssize_t n1, n2; + wchar_t *tline1, *tline2; + const char **p; (void) setlocale(LC_ALL, ""); + flag1 = flag2 = flag3 = 1; + while ((ch = getopt(argc, argv, "123i")) != -1) switch(ch) { case '1': @@ -131,41 +123,57 @@ main(int argc, char *argv[]) if (flag3) col3 = *p; + line1len = line2len = 0; + line1 = line2 = NULL; + n1 = n2 = -1; + for (read1 = read2 = 1;;) { /* read next line, check for EOF */ if (read1) { - line1 = getline(line1, &line1len, fp1); - if (line1 == NULL && ferror(fp1)) + n1 = getline(&line1, &line1len, fp1); + if (n1 < 0 && ferror(fp1)) err(1, "%s", argv[0]); + if (n1 > 0 && line1[n1 - 1] == '\n') + line1[n1 - 1] = '\0'; + } if (read2) { - line2 = getline(line2, &line2len, fp2); - if (line2 == NULL && ferror(fp2)) + n2 = getline(&line2, &line2len, fp2); + if (n2 < 0 && ferror(fp2)) err(1, "%s", argv[1]); + if (n2 > 0 && line2[n2 - 1] == '\n') + line2[n2 - 1] = '\0'; } /* if one file done, display the rest of the other file */ - if (line1 == NULL) { - if (line2 != NULL && col2 != NULL) - show(fp2, argv[1], col2, line2, &line2len); + if (n1 < 0) { + if (n2 >= 0 && col2 != NULL) + show(fp2, argv[1], col2, &line2, &line2len); break; } - if (line2 == NULL) { - if (line1 != NULL && col1 != NULL) - show(fp1, argv[0], col1, line1, &line1len); + if (n2 < 0) { + if (n1 >= 0 && col1 != NULL) + show(fp1, argv[0], col1, &line1, &line1len); break; } - /* lines are the same */ - if(iflag) - comp = wcsicoll(line1, line2); + tline2 = NULL; + if ((tline1 = convert(line1)) != NULL) + tline2 = convert(line2); + if (tline1 == NULL || tline2 == NULL) + comp = strcmp(line1, line2); else - comp = wcscoll(line1, line2); + comp = wcscoll(tline1, tline2); + if (tline1 != NULL) + free(tline1); + if (tline2 != NULL) + free(tline2); + /* lines are the same */ if (!comp) { read1 = read2 = 1; if (col3 != NULL) - (void)printf("%ls%ls\n", col3, line1); + (void)printf("%s%s\n", col3, line1); continue; } @@ -174,49 +182,50 @@ main(int argc, char *argv[]) read1 = 1; read2 = 0; if (col1 != NULL) - (void)printf("%ls%ls\n", col1, line1); + (void)printf("%s%s\n", col1, line1); } else { read1 = 0; read2 = 1; if (col2 != NULL) - (void)printf("%ls%ls\n", col2, line2); + (void)printf("%s%s\n", col2, line2); } } exit(0); } wchar_t * -getline(wchar_t *buf, size_t *buflen, FILE *fp) +convert(const char *str) { - size_t bufpos; - wint_t ch; + size_t n; + wchar_t *buf, *p; - bufpos = 0; - while ((ch = getwc(fp)) != WEOF && ch != '\n') { - if (bufpos + 1 >= *buflen) { - *buflen = *buflen * 2; - if (*buflen > MAXLINELEN) - errx(1, - "Maximum line buffer length (%zu) exceeded", - MAXLINELEN); - buf = reallocf(buf, *buflen * sizeof(*buf)); - if (buf == NULL) - err(1, "reallocf"); - } - buf[bufpos++] = ch; + if ((n = mbstowcs(NULL, str, 0)) == (size_t)-1) + return (NULL); + if ((buf = malloc((n + 1) * sizeof(*buf))) == NULL) + err(1, "malloc"); + if (mbstowcs(buf, str, n + 1) != n) + errx(1, "internal mbstowcs() error"); + + if (iflag) { + for (p = buf; *p != L'\0'; p++) + *p = towlower(*p); } - buf[bufpos] = '\0'; - return (bufpos != 0 || ch == '\n' ? buf : NULL); + return (buf); } void -show(FILE *fp, const char *fn, const wchar_t *offset, wchar_t *buf, size_t *buflen) +show(FILE *fp, const char *fn, const char *offset, char **bufp, size_t *buflenp) { + ssize_t n; do { - (void)printf("%ls%ls\n", offset, buf); - } while ((buf = getline(buf, buflen, fp)) != NULL); + (void)printf("%s%s\n", offset, *bufp); + if ((n = getline(bufp, buflenp, fp)) < 0) + break; + if (n > 0 && (*bufp)[n - 1] == '\n') + (*bufp)[n - 1] = '\0'; + } while (1); if (ferror(fp)) err(1, "%s", fn); } @@ -240,52 +249,3 @@ usage(void) (void)fprintf(stderr, "usage: comm [-123i] file1 file2\n"); exit(1); } - -static size_t wcsicoll_l1_buflen = 0, wcsicoll_l2_buflen = 0; -static wchar_t *wcsicoll_l1_buf = NULL, *wcsicoll_l2_buf = NULL; - -int -wcsicoll(const wchar_t *s1, const wchar_t *s2) -{ - wchar_t *p; - size_t l1, l2; - size_t new_l1_buflen, new_l2_buflen; - - l1 = wcslen(s1) + 1; - l2 = wcslen(s2) + 1; - new_l1_buflen = wcsicoll_l1_buflen; - new_l2_buflen = wcsicoll_l2_buflen; - while (new_l1_buflen < l1) { - if (new_l1_buflen == 0) - new_l1_buflen = INITLINELEN; - else - new_l1_buflen *= 2; - } - while (new_l2_buflen < l2) { - if (new_l2_buflen == 0) - new_l2_buflen = INITLINELEN; - else - new_l2_buflen *= 2; - } - if (new_l1_buflen > wcsicoll_l1_buflen) { - wcsicoll_l1_buf = reallocf(wcsicoll_l1_buf, new_l1_buflen * sizeof(*wcsicoll_l1_buf)); - if (wcsicoll_l1_buf == NULL) - err(1, "reallocf"); - wcsicoll_l1_buflen = new_l1_buflen; - } - if (new_l2_buflen > wcsicoll_l2_buflen) { - wcsicoll_l2_buf = reallocf(wcsicoll_l2_buf, new_l2_buflen * sizeof(*wcsicoll_l2_buf)); - if (wcsicoll_l2_buf == NULL) - err(1, "reallocf"); - wcsicoll_l2_buflen = new_l2_buflen; - } - - for (p = wcsicoll_l1_buf; *s1; s1++) - *p++ = towlower(*s1); - *p = '\0'; - for (p = wcsicoll_l2_buf; *s2; s2++) - *p++ = towlower(*s2); - *p = '\0'; - - return (wcscoll(wcsicoll_l1_buf, wcsicoll_l2_buf)); -}