Skip site navigation (1)Skip section navigation (2)
Date:      Tue, 17 Aug 2010 15:02:33 +0000 (UTC)
From:      Jaakko Heinonen <jh@FreeBSD.org>
To:        src-committers@freebsd.org, svn-src-all@freebsd.org, svn-src-stable@freebsd.org, svn-src-stable-8@freebsd.org
Subject:   svn commit: r211428 - in stable/8: tools/regression/usr.bin tools/regression/usr.bin/comm tools/regression/usr.bin/pkill usr.bin/comm
Message-ID:  <201008171502.o7HF2XY5036159@svn.freebsd.org>

next in thread | raw e-mail | index | archive | help
Author: jh
Date: Tue Aug 17 15:02:33 2010
New Revision: 211428
URL: http://svn.freebsd.org/changeset/base/211428

Log:
  MFC r200442:
  
  Don't read the newline character to line buffer because lines are passed
  to wcscoll(3). Newline characters could cause incorrect results when
  comparing lines.
  
  Also, if an input line didn't contain a newline character, it was
  omitted from the output. According to my interpretation, SUSv3 requires
  that the newline is always printed.
  
  Add regression tests for the cases.
  
  PR:		bin/140976
  
  MFC r200604:
  
  - Prevent overflowing of the buffer length variable in getline() by
    limiting its maximum value.
  - Exit if reallocf(3) fails in getline(). Failure was silently
    considered as end-of-file.
  
  MFC r204896 by ache:
  
  Rewrite input processing to not exit with error on the first EILSEQ found
  in the input data but fallback to "binary comparison" instead.
  
  POSIX says: "The input files shall be text files", nothing more,
  so the text file with illegal sequence is valid input.
  BTW, GNU sort does not fails on EILSEQ too.
  
  MFC r204928 by ache:
  
  Add SIZE_MAX overflow check

Added:
  stable/8/tools/regression/usr.bin/comm/
     - copied from r200442, head/tools/regression/usr.bin/comm/
Modified:
  stable/8/tools/regression/usr.bin/Makefile
  stable/8/usr.bin/comm/comm.c
Directory Properties:
  stable/8/tools/regression/usr.bin/   (props changed)
  stable/8/tools/regression/usr.bin/pkill/pgrep-_g.t   (props changed)
  stable/8/tools/regression/usr.bin/pkill/pgrep-_s.t   (props changed)
  stable/8/tools/regression/usr.bin/pkill/pkill-_g.t   (props changed)
  stable/8/tools/regression/usr.bin/sed/   (props changed)
  stable/8/usr.bin/comm/   (props changed)

Modified: stable/8/tools/regression/usr.bin/Makefile
==============================================================================
--- stable/8/tools/regression/usr.bin/Makefile	Tue Aug 17 13:02:08 2010	(r211427)
+++ stable/8/tools/regression/usr.bin/Makefile	Tue Aug 17 15:02:33 2010	(r211428)
@@ -1,5 +1,5 @@
 # $FreeBSD$
 
-SUBDIR=	apply calendar file2c join jot m4 printf sed tr uudecode uuencode xargs lastcomm
+SUBDIR=	apply calendar comm file2c join jot m4 printf sed tr uudecode uuencode xargs lastcomm
 
 .include <bsd.subdir.mk>

Modified: stable/8/usr.bin/comm/comm.c
==============================================================================
--- stable/8/usr.bin/comm/comm.c	Tue Aug 17 13:02:08 2010	(r211427)
+++ stable/8/usr.bin/comm/comm.c	Tue Aug 17 15:02:33 2010	(r211428)
@@ -52,6 +52,8 @@ __FBSDID("$FreeBSD$");
 #include <err.h>
 #include <limits.h>
 #include <locale.h>
+#include <stdint.h>
+#define _WITH_GETLINE
 #include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
@@ -59,39 +61,31 @@ __FBSDID("$FreeBSD$");
 #include <wchar.h>
 #include <wctype.h>
 
-#define	MAXLINELEN	(LINE_MAX + 1)
-
-const wchar_t *tabs[] = { L"", L"\t", L"\t\t" };
+int iflag;
+const char *tabs[] = { "", "\t", "\t\t" };
 
 FILE   *file(const char *);
-wchar_t	*getline(wchar_t *, size_t *, FILE *);
-void	show(FILE *, const char *, const wchar_t *, wchar_t *, size_t *);
-int     wcsicoll(const wchar_t *, const wchar_t *);
+wchar_t	*convert(const char *);
+void	show(FILE *, const char *, const char *, char **, size_t *);
 static void	usage(void);
 
 int
 main(int argc, char *argv[])
 {
 	int comp, read1, read2;
-	int ch, flag1, flag2, flag3, iflag;
+	int ch, flag1, flag2, flag3;
 	FILE *fp1, *fp2;
-	const wchar_t *col1, *col2, *col3;
+	const char *col1, *col2, *col3;
 	size_t line1len, line2len;
-	wchar_t *line1, *line2;
-	const wchar_t **p;
-
-	flag1 = flag2 = flag3 = 1;
-	iflag = 0;
-
- 	line1len = MAXLINELEN;
- 	line2len = MAXLINELEN;
- 	line1 = malloc(line1len * sizeof(*line1));
- 	line2 = malloc(line2len * sizeof(*line2));
-	if (line1 == NULL || line2 == NULL)
-		err(1, "malloc");
+	char *line1, *line2;
+	ssize_t n1, n2;
+	wchar_t *tline1, *tline2;
+	const char **p;
 
 	(void) setlocale(LC_ALL, "");
 
+	flag1 = flag2 = flag3 = 1;
+
 	while ((ch = getopt(argc, argv, "123i")) != -1)
 		switch(ch) {
 		case '1':
@@ -129,41 +123,57 @@ main(int argc, char *argv[])
 	if (flag3)
 		col3 = *p;
 
+	line1len = line2len = 0;
+	line1 = line2 = NULL;
+	n1 = n2 = -1;
+
 	for (read1 = read2 = 1;;) {
 		/* read next line, check for EOF */
 		if (read1) {
-			line1 = getline(line1, &line1len, fp1);
-			if (line1 == NULL && ferror(fp1))
+			n1 = getline(&line1, &line1len, fp1);
+			if (n1 < 0 && ferror(fp1))
 				err(1, "%s", argv[0]);
+			if (n1 > 0 && line1[n1 - 1] == '\n')
+				line1[n1 - 1] = '\0';
+
 		}
 		if (read2) {
-			line2 = getline(line2, &line2len, fp2);
-			if (line2 == NULL && ferror(fp2))
+			n2 = getline(&line2, &line2len, fp2);
+			if (n2 < 0 && ferror(fp2))
 				err(1, "%s", argv[1]);
+			if (n2 > 0 && line2[n2 - 1] == '\n')
+				line2[n2 - 1] = '\0';
 		}
 
 		/* if one file done, display the rest of the other file */
-		if (line1 == NULL) {
-			if (line2 != NULL && col2 != NULL)
-				show(fp2, argv[1], col2, line2, &line2len);
+		if (n1 < 0) {
+			if (n2 >= 0 && col2 != NULL)
+				show(fp2, argv[1], col2, &line2, &line2len);
 			break;
 		}
-		if (line2 == NULL) {
-			if (line1 != NULL && col1 != NULL)
-				show(fp1, argv[0], col1, line1, &line1len);
+		if (n2 < 0) {
+			if (n1 >= 0 && col1 != NULL)
+				show(fp1, argv[0], col1, &line1, &line1len);
 			break;
 		}
 
-		/* lines are the same */
-		if(iflag)
-			comp = wcsicoll(line1, line2);
+		tline2 = NULL;
+		if ((tline1 = convert(line1)) != NULL)
+			tline2 = convert(line2);
+		if (tline1 == NULL || tline2 == NULL)
+			comp = strcmp(line1, line2);
 		else
-			comp = wcscoll(line1, line2);
+			comp = wcscoll(tline1, tline2);
+		if (tline1 != NULL)
+			free(tline1);
+		if (tline2 != NULL)
+			free(tline2);
 
+		/* lines are the same */
 		if (!comp) {
 			read1 = read2 = 1;
 			if (col3 != NULL)
-				(void)printf("%ls%ls", col3, line1);
+				(void)printf("%s%s\n", col3, line1);
 			continue;
 		}
 
@@ -172,48 +182,52 @@ main(int argc, char *argv[])
 			read1 = 1;
 			read2 = 0;
 			if (col1 != NULL)
-				(void)printf("%ls%ls", col1, line1);
+				(void)printf("%s%s\n", col1, line1);
 		} else {
 			read1 = 0;
 			read2 = 1;
 			if (col2 != NULL)
-				(void)printf("%ls%ls", col2, line2);
+				(void)printf("%s%s\n", col2, line2);
 		}
 	}
 	exit(0);
 }
 
 wchar_t *
-getline(wchar_t *buf, size_t *buflen, FILE *fp)
+convert(const char *str)
 {
-	size_t bufpos;
-	wint_t ch;
+	size_t n;
+	wchar_t *buf, *p;
 
-	bufpos = 0;
-	do {
-		if ((ch = getwc(fp)) != WEOF) {
-			if (bufpos + 2 >= *buflen) {
-				*buflen = *buflen * 2;
-				buf = reallocf(buf, *buflen * sizeof(*buf));
-				if (buf == NULL)
-					return (NULL);
-			}
-			buf[bufpos++] = ch;
-		}
-	} while (ch != WEOF && ch != '\n');
-	if (bufpos + 1 != *buflen)
-		buf[bufpos] = '\0';
+	if ((n = mbstowcs(NULL, str, 0)) == (size_t)-1)
+		return (NULL);
+	if (SIZE_MAX / sizeof(*buf) < n + 1)
+		errx(1, "conversion buffer length overflow");
+	if ((buf = malloc((n + 1) * sizeof(*buf))) == NULL)
+		err(1, "malloc");
+	if (mbstowcs(buf, str, n + 1) != n)
+		errx(1, "internal mbstowcs() error");
+
+	if (iflag) {
+		for (p = buf; *p != L'\0'; p++)
+			*p = towlower(*p);
+	}
 
-	return (bufpos != 0 || ch == '\n' ? buf : NULL);
+	return (buf);
 }
 
 void
-show(FILE *fp, const char *fn, const wchar_t *offset, wchar_t *buf, size_t *buflen)
+show(FILE *fp, const char *fn, const char *offset, char **bufp, size_t *buflenp)
 {
+	ssize_t n;
 
 	do {
-		(void)printf("%ls%ls", offset, buf);
-	} while ((buf = getline(buf, buflen, fp)) != NULL);
+		(void)printf("%s%s\n", offset, *bufp);
+		if ((n = getline(bufp, buflenp, fp)) < 0)
+			break;
+		if (n > 0 && (*bufp)[n - 1] == '\n')
+			(*bufp)[n - 1] = '\0';
+	} while (1);
 	if (ferror(fp))
 		err(1, "%s", fn);
 }
@@ -237,52 +251,3 @@ usage(void)
 	(void)fprintf(stderr, "usage: comm [-123i] file1 file2\n");
 	exit(1);
 }
-
-static size_t wcsicoll_l1_buflen = 0, wcsicoll_l2_buflen = 0;
-static wchar_t *wcsicoll_l1_buf = NULL, *wcsicoll_l2_buf = NULL;
-
-int
-wcsicoll(const wchar_t *s1, const wchar_t *s2)
-{
-	wchar_t *p;
-	size_t l1, l2;
-	size_t new_l1_buflen, new_l2_buflen;
-
-	l1 = wcslen(s1) + 1;
-	l2 = wcslen(s2) + 1;
-	new_l1_buflen = wcsicoll_l1_buflen;
-	new_l2_buflen = wcsicoll_l2_buflen;
-	while (new_l1_buflen < l1) {
-		if (new_l1_buflen == 0)
-			new_l1_buflen = MAXLINELEN;
-		else
-			new_l1_buflen *= 2;
-	}
-	while (new_l2_buflen < l2) {
-		if (new_l2_buflen == 0)
-			new_l2_buflen = MAXLINELEN;
-		else
-			new_l2_buflen *= 2;
-	}
-	if (new_l1_buflen > wcsicoll_l1_buflen) {
-		wcsicoll_l1_buf = reallocf(wcsicoll_l1_buf, new_l1_buflen * sizeof(*wcsicoll_l1_buf));
-		if (wcsicoll_l1_buf == NULL)
-                	err(1, "reallocf");
-		wcsicoll_l1_buflen = new_l1_buflen;
-	}
-	if (new_l2_buflen > wcsicoll_l2_buflen) {
-		wcsicoll_l2_buf = reallocf(wcsicoll_l2_buf, new_l2_buflen * sizeof(*wcsicoll_l2_buf));
-		if (wcsicoll_l2_buf == NULL)
-                	err(1, "reallocf");
-		wcsicoll_l2_buflen = new_l2_buflen;
-	}
-
-	for (p = wcsicoll_l1_buf; *s1; s1++)
-		*p++ = towlower(*s1);
-	*p = '\0';
-	for (p = wcsicoll_l2_buf; *s2; s2++)
-		*p++ = towlower(*s2);
-	*p = '\0';
-
-	return (wcscoll(wcsicoll_l1_buf, wcsicoll_l2_buf));
-}



Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?201008171502.o7HF2XY5036159>