From owner-svn-src-all@FreeBSD.ORG Tue Aug 17 15:07:23 2010 Return-Path: Delivered-To: svn-src-all@freebsd.org Received: from mx1.freebsd.org (mx1.freebsd.org [IPv6:2001:4f8:fff6::34]) by hub.freebsd.org (Postfix) with ESMTP id 702C61065674; Tue, 17 Aug 2010 15:07:23 +0000 (UTC) (envelope-from jh@FreeBSD.org) Received: from svn.freebsd.org (svn.freebsd.org [IPv6:2001:4f8:fff6::2c]) by mx1.freebsd.org (Postfix) with ESMTP id 546288FC1A; Tue, 17 Aug 2010 15:07:23 +0000 (UTC) Received: from svn.freebsd.org (localhost [127.0.0.1]) by svn.freebsd.org (8.14.3/8.14.3) with ESMTP id o7HF7NdO036281; Tue, 17 Aug 2010 15:07:23 GMT (envelope-from jh@svn.freebsd.org) Received: (from jh@localhost) by svn.freebsd.org (8.14.3/8.14.3/Submit) id o7HF7NiU036279; Tue, 17 Aug 2010 15:07:23 GMT (envelope-from jh@svn.freebsd.org) Message-Id: <201008171507.o7HF7NiU036279@svn.freebsd.org> From: Jaakko Heinonen Date: Tue, 17 Aug 2010 15:07:23 +0000 (UTC) To: src-committers@freebsd.org, svn-src-all@freebsd.org, svn-src-stable@freebsd.org, svn-src-stable-8@freebsd.org X-SVN-Group: stable-8 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Cc: Subject: svn commit: r211429 - stable/8/usr.bin/uniq X-BeenThere: svn-src-all@freebsd.org X-Mailman-Version: 2.1.5 Precedence: list List-Id: "SVN commit messages for the entire src tree \(except for " user" and " projects" \)" List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , X-List-Received-Date: Tue, 17 Aug 2010 15:07:23 -0000 Author: jh Date: Tue Aug 17 15:07:23 2010 New Revision: 211429 URL: http://svn.freebsd.org/changeset/base/211429 Log: MFC r200633: Sync getline() with comm(1): - Prevent overflowing of the buffer length variable in getline() by limiting its maximum value. - Exit if reallocf(3) fails in getline(). Failure was silently considered as end-of-file. MFC r204803 by ache: 1) Rewrite input processing to not exit with error on the first EILSEQ found in the input data but fallback to "binary equal" check instead. POSIX says: "The input file shall be a text file", nothing more, so the text file with illegal sequence is valid input. BTW, GNU sort does not fails on EILSEQ too. 2) Speedup input processing a bit in complex cases like skipping fields, chars or ignore case. 3) Enforce the implied LINE_MAX limit (from POSIX definition of "text file" and POSIX uniq(1) description). MFC r204811 by ache: Remove vestiges of old %-format which prevents build on amd64 MFC r204876 by ache: 1) Reimplement (differently) unlimited line length restricted in prev. commit. 2) Honor missing the very last \n (if absent) on output. MFC r204927 by ache: Add SIZE_MAX overflow check Modified: stable/8/usr.bin/uniq/uniq.c Directory Properties: stable/8/usr.bin/uniq/ (props changed) Modified: stable/8/usr.bin/uniq/uniq.c ============================================================================== --- stable/8/usr.bin/uniq/uniq.c Tue Aug 17 15:02:33 2010 (r211428) +++ stable/8/usr.bin/uniq/uniq.c Tue Aug 17 15:07:23 2010 (r211429) @@ -52,6 +52,8 @@ static const char rcsid[] = #include #include #include +#include +#define _WITH_GETLINE #include #include #include @@ -59,30 +61,26 @@ static const char rcsid[] = #include #include -#define MAXLINELEN (LINE_MAX + 1) - -int cflag, dflag, uflag; +int cflag, dflag, uflag, iflag; int numchars, numfields, repeats; FILE *file(const char *, const char *); -wchar_t *getline(wchar_t *, size_t *, FILE *); -void show(FILE *, wchar_t *); +wchar_t *convert(const char *); +int inlcmp(const char *, const char *); +void show(FILE *, const char *); wchar_t *skip(wchar_t *); void obsolete(char *[]); static void usage(void); -int wcsicoll(wchar_t *, wchar_t *); int main (int argc, char *argv[]) { - wchar_t *t1, *t2; + wchar_t *tprev, *tthis; FILE *ifp, *ofp; - int ch, b1; - size_t prevbuflen, thisbuflen; - wchar_t *prevline, *thisline; - char *p; + int ch, comp; + size_t prevbuflen, thisbuflen, b1; + char *prevline, *thisline, *p; const char *ifn; - int iflag = 0, comp; (void) setlocale(LC_ALL, ""); @@ -137,48 +135,48 @@ main (int argc, char *argv[]) if (argc > 1) ofp = file(argv[1], "w"); - prevbuflen = MAXLINELEN; - thisbuflen = MAXLINELEN; - prevline = malloc(prevbuflen * sizeof(*prevline)); - thisline = malloc(thisbuflen * sizeof(*thisline)); - if (prevline == NULL || thisline == NULL) - err(1, "malloc"); + prevbuflen = thisbuflen = 0; + prevline = thisline = NULL; - if ((prevline = getline(prevline, &prevbuflen, ifp)) == NULL) { + if (getline(&prevline, &prevbuflen, ifp) < 0) { if (ferror(ifp)) err(1, "%s", ifn); exit(0); } + tprev = convert(prevline); + if (!cflag && uflag && dflag) show(ofp, prevline); - while ((thisline = getline(thisline, &thisbuflen, ifp)) != NULL) { - /* If requested get the chosen fields + character offsets. */ - if (numfields || numchars) { - t1 = skip(thisline); - t2 = skip(prevline); - } else { - t1 = thisline; - t2 = prevline; - } - - /* If different, print; set previous to new value. */ - if (iflag) - comp = wcsicoll(t1, t2); + tthis = NULL; + while (getline(&thisline, &thisbuflen, ifp) >= 0) { + if (tthis != NULL) + free(tthis); + tthis = convert(thisline); + + if (tthis == NULL && tprev == NULL) + comp = inlcmp(thisline, prevline); + else if (tthis == NULL || tprev == NULL) + comp = 1; else - comp = wcscoll(t1, t2); + comp = wcscoll(tthis, tprev); if (comp) { + /* If different, print; set previous to new value. */ if (cflag || !dflag || !uflag) show(ofp, prevline); - t1 = prevline; + p = prevline; b1 = prevbuflen; prevline = thisline; prevbuflen = thisbuflen; + if (tprev != NULL) + free(tprev); + tprev = tthis; if (!cflag && uflag && dflag) show(ofp, prevline); - thisline = t1; + thisline = p; thisbuflen = b1; + tthis = NULL; repeats = 0; } else ++repeats; @@ -191,25 +189,55 @@ main (int argc, char *argv[]) } wchar_t * -getline(wchar_t *buf, size_t *buflen, FILE *fp) +convert(const char *str) { - size_t bufpos; - wint_t ch; + size_t n; + wchar_t *buf, *ret, *p; - bufpos = 0; - while ((ch = getwc(fp)) != WEOF && ch != '\n') { - if (bufpos + 2 >= *buflen) { - *buflen = *buflen * 2; - buf = reallocf(buf, *buflen * sizeof(*buf)); - if (buf == NULL) - return (NULL); - } - buf[bufpos++] = ch; + if ((n = mbstowcs(NULL, str, 0)) == (size_t)-1) + return (NULL); + if (SIZE_MAX / sizeof(*buf) < n + 1) + errx(1, "conversion buffer length overflow"); + if ((buf = malloc((n + 1) * sizeof(*buf))) == NULL) + err(1, "malloc"); + if (mbstowcs(buf, str, n + 1) != n) + errx(1, "internal mbstowcs() error"); + /* The last line may not end with \n. */ + if (n > 0 && buf[n - 1] == L'\n') + buf[n - 1] = L'\0'; + + /* If requested get the chosen fields + character offsets. */ + if (numfields || numchars) { + if ((ret = wcsdup(skip(buf))) == NULL) + err(1, "wcsdup"); + free(buf); + } else + ret = buf; + + if (iflag) { + for (p = ret; *p != L'\0'; p++) + *p = towlower(*p); } - if (bufpos + 1 != *buflen) - buf[bufpos] = '\0'; - return (bufpos != 0 || ch == '\n' ? buf : NULL); + return (ret); +} + +int +inlcmp(const char *s1, const char *s2) +{ + int c1, c2; + + while (*s1 == *s2++) + if (*s1++ == '\0') + return (0); + c1 = (unsigned char)*s1; + c2 = (unsigned char)*(s2 - 1); + /* The last line may not end with \n. */ + if (c1 == '\n') + c1 = '\0'; + if (c2 == '\n') + c2 = '\0'; + return (c1 - c2); } /* @@ -218,13 +246,13 @@ getline(wchar_t *buf, size_t *buflen, FI * of the line. */ void -show(FILE *ofp, wchar_t *str) +show(FILE *ofp, const char *str) { if (cflag) - (void)fprintf(ofp, "%4d %ls\n", repeats + 1, str); + (void)fprintf(ofp, "%4d %s", repeats + 1, str); if ((dflag && repeats) || (uflag && !repeats)) - (void)fprintf(ofp, "%ls\n", str); + (void)fprintf(ofp, "%s", str); } wchar_t * @@ -232,13 +260,14 @@ skip(wchar_t *str) { int nchars, nfields; - for (nfields = 0; *str != '\0' && nfields++ != numfields; ) { + for (nfields = 0; *str != L'\0' && nfields++ != numfields; ) { while (iswblank(*str)) str++; - while (*str != '\0' && !iswblank(*str)) + while (*str != L'\0' && !iswblank(*str)) str++; } - for (nchars = numchars; nchars-- && *str; ++str); + for (nchars = numchars; nchars-- && *str != L'\0'; ++str) + ; return(str); } @@ -288,52 +317,3 @@ usage(void) "usage: uniq [-c | -d | -u] [-i] [-f fields] [-s chars] [input [output]]\n"); exit(1); } - -static size_t wcsicoll_l1_buflen = 0, wcsicoll_l2_buflen = 0; -static wchar_t *wcsicoll_l1_buf = NULL, *wcsicoll_l2_buf = NULL; - -int -wcsicoll(wchar_t *s1, wchar_t *s2) -{ - wchar_t *p; - size_t l1, l2; - size_t new_l1_buflen, new_l2_buflen; - - l1 = wcslen(s1) + 1; - l2 = wcslen(s2) + 1; - new_l1_buflen = wcsicoll_l1_buflen; - new_l2_buflen = wcsicoll_l2_buflen; - while (new_l1_buflen < l1) { - if (new_l1_buflen == 0) - new_l1_buflen = MAXLINELEN; - else - new_l1_buflen *= 2; - } - while (new_l2_buflen < l2) { - if (new_l2_buflen == 0) - new_l2_buflen = MAXLINELEN; - else - new_l2_buflen *= 2; - } - if (new_l1_buflen > wcsicoll_l1_buflen) { - wcsicoll_l1_buf = reallocf(wcsicoll_l1_buf, new_l1_buflen * sizeof(*wcsicoll_l1_buf)); - if (wcsicoll_l1_buf == NULL) - err(1, "reallocf"); - wcsicoll_l1_buflen = new_l1_buflen; - } - if (new_l2_buflen > wcsicoll_l2_buflen) { - wcsicoll_l2_buf = reallocf(wcsicoll_l2_buf, new_l2_buflen * sizeof(*wcsicoll_l2_buf)); - if (wcsicoll_l2_buf == NULL) - err(1, "reallocf"); - wcsicoll_l2_buflen = new_l2_buflen; - } - - for (p = wcsicoll_l1_buf; *s1; s1++) - *p++ = towlower(*s1); - *p = '\0'; - for (p = wcsicoll_l2_buf; *s2; s2++) - *p++ = towlower(*s2); - *p = '\0'; - - return (wcscoll(wcsicoll_l1_buf, wcsicoll_l2_buf)); -}