Skip site navigation (1)Skip section navigation (2)
Date:      Wed, 25 Sep 2024 17:15:21 GMT
From:      Dag-Erling =?utf-8?Q?Sm=C3=B8rgrav?= <des@FreeBSD.org>
To:        src-committers@FreeBSD.org, dev-commits-src-all@FreeBSD.org, dev-commits-src-main@FreeBSD.org
Subject:   git: cf73401c4f7a - main - diff3: Fix merge mode.
Message-ID:  <202409251715.48PHFLuQ047265@gitrepo.freebsd.org>

next in thread | raw e-mail | index | archive | help
The branch main has been updated by des:

URL: https://cgit.FreeBSD.org/src/commit/?id=cf73401c4f7af063c91a0c8e4d5b46e08f06db87

commit cf73401c4f7af063c91a0c8e4d5b46e08f06db87
Author:     Dag-Erling Smørgrav <des@FreeBSD.org>
AuthorDate: 2024-09-25 17:14:32 +0000
Commit:     Dag-Erling Smørgrav <des@FreeBSD.org>
CommitDate: 2024-09-25 17:14:55 +0000

    diff3: Fix merge mode.
    
    This is mostly thj@'s work, with some tweaks and cleanup by me.  There
    are still some cases where our output differs from GNU diff3, but it's
    much better than before and I'd rather commit what I have now than let
    it continue to languish in a metaphorical drawer.
    
    MFC after       3 weeks
    Sponsored by:   Klara, Inc.
    Reviewed by:    thj
    Differential Revision:  https://reviews.freebsd.org/D46762
---
 usr.bin/diff3/diff3.c | 255 +++++++++++++++++++++++++++++++++++---------------
 1 file changed, 179 insertions(+), 76 deletions(-)

diff --git a/usr.bin/diff3/diff3.c b/usr.bin/diff3/diff3.c
index c72ea0747589..f20b1d74678d 100644
--- a/usr.bin/diff3/diff3.c
+++ b/usr.bin/diff3/diff3.c
@@ -79,7 +79,6 @@
 #include <string.h>
 #include <unistd.h>
 
-
 /*
  * "from" is first in range of changed lines; "to" is last+1
  * from=to=line after point of insertion for added lines.
@@ -90,6 +89,7 @@ struct range {
 };
 
 struct diff {
+#define DIFF_TYPE1 1
 #define DIFF_TYPE2 2
 #define DIFF_TYPE3 3
 	int type;
@@ -147,6 +147,7 @@ static void keep(int, struct range *);
 static void merge(int, int);
 static void prange(struct range *, bool);
 static void repos(int);
+static void separate(const char *);
 static void edscript(int) __dead2;
 static void Ascript(int) __dead2;
 static void mergescript(int) __dead2;
@@ -154,7 +155,7 @@ static void increase(void);
 static void usage(void);
 static void printrange(FILE *, struct range *);
 
-static const char diff3_version[] = "FreeBSD diff3 20220517";
+static const char diff3_version[] = "FreeBSD diff3 20240925";
 
 enum {
 	DIFFPROG_OPT,
@@ -189,49 +190,110 @@ usage(void)
 	    "[-L label3] file1 file2 file3\n");
 }
 
+static int
+strtoi(char *str, char **end)
+{
+	intmax_t num;
+
+	errno = 0;
+	num = strtoimax(str, end, 10);
+	if ((end != NULL && *end == str) ||
+	    num < 0 || num > INT_MAX ||
+	    errno == EINVAL || errno == ERANGE)
+		err(1, "error in diff output");
+	return (int)num;
+}
+
+/*
+ * Read diff hunks into the array pointed to by *dd.
+ *
+ * The output from `diff foo bar` consists of a series of hunks describing
+ * an addition (lines in bar not present in foo), change (lines in bar
+ * different from lines in foo), or deletion (lines in foo not present in
+ * bar).  Each record starts with a line of the form:
+ *
+ * a[,b]xc[,d]
+ *
+ * where a, b, c, and d are nonnegative integers (b and d are printed only
+ * if they differ from a and c, respectively), and x is either 'a' for an
+ * addition, 'c' for a change, or 'd' for a deletion.  This is then
+ * followed by a series of lines (which we ignore) giving the added,
+ * changed, or deleted text.
+ *
+ * For an addition, a == b is the last line in 'foo' before the addition,
+ * while c through d is the range of lines in 'bar' to be added to 'foo'.
+ *
+ * For a change, a through b is the range of lines in 'foo' to be replaced
+ * and c through d is the range of lines in 'bar' to replace them with.
+ *
+ * For a deletion, a through b is the range of lines in 'foo' to remove
+ * and c == d is the line in 'bar' which corresponds to the last line
+ * before the deletion.
+ *
+ * The observant reader will have noticed that x is not really needed and
+ * that we can fully describe any hunk using only a, b, c, and d:
+ *
+ * - an addition replaces a zero-length range in one file with a
+ *   non-zero-length range from the other
+ *
+ * - a change replaces a non-zero-length range in one file with a
+ *   non-zero-length range from the other
+ *
+ * - a deletion replaces a non-zero-length range in one file with a
+ *   zero-length range from the other
+ */
 static int
 readin(int fd, struct diff **dd)
 {
 	int a, b, c, d;
-	size_t i;
+	int i;
 	char kind, *p;
 	FILE *f;
 
 	f = fdopen(fd, "r");
 	if (f == NULL)
 		err(2, "fdopen");
-	for (i = 0; (p = getchange(f)); i++) {
+	for (i = 0; (p = getchange(f)) != NULL; i++) {
+		if ((size_t)i >= szchanges - 1)
+			increase();
 #if DEBUG
 		(*dd)[i].line = strdup(p);
 #endif	/* DEBUG */
 
-		if (i >= szchanges - 1)
-			increase();
-		a = b = (int)strtoimax(p, &p, 10);
-		if (*p == ',') {
-			p++;
-			b = (int)strtoimax(p, &p, 10);
-		}
+		a = b = strtoi(p, &p);
+		if (*p == ',')
+			b = strtoi(p + 1, &p);
 		kind = *p++;
-		c = d = (int)strtoimax(p, &p, 10);
-		if (*p == ',') {
-			p++;
-			d = (int)strtoimax(p, &p, 10);
-		}
+		c = d = strtoi(p, &p);
+		if (*p == ',')
+			d = strtoi(p + 1, &p);
+		if (*p != '\n')
+			errx(1, "error in diff output");
 		if (kind == 'a')
 			a++;
-		if (kind == 'd')
+		else if (kind == 'c')
+			/* nothing */ ;
+		else if (kind == 'd')
 			c++;
+		else
+			errx(1, "error in diff output");
 		b++;
 		d++;
+		if (b < a || d < c)
+			errx(1, "error in diff output");
 		(*dd)[i].old.from = a;
 		(*dd)[i].old.to = b;
 		(*dd)[i].new.from = c;
 		(*dd)[i].new.to = d;
+		if (i > 0) {
+			if ((*dd)[i].old.from < (*dd)[i - 1].old.to ||
+			    (*dd)[i].new.from < (*dd)[i - 1].new.to)
+				errx(1, "diff output out of order");
+		}
 	}
-	if (i) {
-		(*dd)[i].old.from = (*dd)[i - 1].old.to;
-		(*dd)[i].new.from = (*dd)[i - 1].new.to;
+	if (i > 0) {
+		(*dd)[i].old.from = (*dd)[i].old.to = (*dd)[i - 1].old.to;
+		(*dd)[i].new.from = (*dd)[i].new.to = (*dd)[i - 1].new.to;
 	}
 	fclose(f);
 	return (i);
@@ -264,7 +326,7 @@ getchange(FILE *b)
 {
 	char *line;
 
-	while ((line = get_line(b, NULL))) {
+	while ((line = get_line(b, NULL)) != NULL) {
 		if (isdigit((unsigned char)line[0]))
 			return (line);
 	}
@@ -305,15 +367,23 @@ merge(int m1, int m2)
 	d2 = d23;
 	j = 0;
 
-	while (t1 = d1 < d13 + m1, t2 = d2 < d23 + m2, t1 || t2) {
+	for (;;) {
+		t1 = (d1 < d13 + m1);
+		t2 = (d2 < d23 + m2);
+		if (!t1 && !t2)
+			break;
+
 		/* first file is different from the others */
 		if (!t2 || (t1 && d1->new.to < d2->new.from)) {
 			/* stuff peculiar to 1st file */
 			if (eflag == EFLAG_NONE) {
-				printf("====1\n");
+				separate("1");
 				change(1, &d1->old, false);
 				keep(2, &d1->new);
 				change(3, &d1->new, false);
+			} else if (eflag == EFLAG_OVERLAP) {
+				j = edit(d2, dup, j, DIFF_TYPE1);
+				printdiff(d2);
 			}
 			d1++;
 			continue;
@@ -321,7 +391,7 @@ merge(int m1, int m2)
 		/* second file is different from others */
 		if (!t1 || (t2 && d2->new.to < d1->new.from)) {
 			if (eflag == EFLAG_NONE) {
-				printf("====2\n");
+				separate("2");
 				keep(1, &d2->new);
 				change(3, &d2->new, false);
 				change(2, &d2->old, false);
@@ -359,7 +429,7 @@ merge(int m1, int m2)
 			 * dup = 1 means files 1 and 2 identical
 			 */
 			if (eflag == EFLAG_NONE) {
-				printf("====%s\n", dup ? "3" : "");
+				separate(dup ? "3" : "");
 				change(1, &d1->old, dup);
 				change(2, &d2->old, false);
 				d3 = d1->old.to > d1->old.from ? d1 : d2;
@@ -400,6 +470,12 @@ merge(int m1, int m2)
 		edscript(j);
 }
 
+static void
+separate(const char *s)
+{
+	printf("====%s\n", s);
+}
+
 /*
  * The range of lines rold.from thru rold.to in file i is to be changed.
  * It is to be printed only if it does not duplicate something to be
@@ -555,7 +631,6 @@ printrange(FILE *p, struct range *r)
 	char *line = NULL;
 	size_t len = 0;
 	int i = 1;
-	ssize_t rlen = 0;
 
 	/* We haven't been asked to print anything */
 	if (r->from == r->to)
@@ -570,7 +645,7 @@ printrange(FILE *p, struct range *r)
 	 * files with lots of ranges.
 	 */
 	fseek(p, 0L, SEEK_SET);
-	while ((rlen = getline(&line, &len, p)) > 0) {
+	while (getline(&line, &len, p) > 0) {
 		if (i >= r->from)
 			printf("%s", line);
 		if (++i > r->to - 1)
@@ -591,20 +666,31 @@ edscript(int n)
 		old = &de[n].old;
 
 		delete = (new->from == new->to);
-		if (!oflag || !overlap[n]) {
-			prange(old, delete);
-		} else {
-			printf("%da\n", old->to - 1);
-			printf("%s\n", divider);
-		}
-		printrange(fp[2], new);
-		if (!oflag || !overlap[n]) {
-			if (!delete)
+		if (de[n].type == DIFF_TYPE1) {
+			if (delete)
+				printf("%dd\n", new->from - 1);
+			else if (old->from == new->from && old->to == new->to) {
+				printf("%dc\n", old->from);
+				printrange(fp[2], old);
 				printf(".\n");
+			}
+			continue;
 		} else {
-			printf("%s %s\n.\n", newmark, f3mark);
-			printf("%da\n%s %s\n.\n", old->from - 1,
-				oldmark, f1mark);
+			if (!oflag || !overlap[n]) {
+				prange(old, delete);
+			} else {
+				printf("%da\n", old->to - 1);
+				printf("%s\n", divider);
+			}
+			printrange(fp[2], new);
+			if (!oflag || !overlap[n]) {
+				if (!delete)
+					printf(".\n");
+			} else {
+				printf("%s %s\n.\n", newmark, f3mark);
+				printf("%da\n%s %s\n.\n", old->from - 1,
+					oldmark, f1mark);
+			}
 		}
 	}
 	if (iflag)
@@ -639,10 +725,7 @@ Ascript(int n)
 				prange(old, deletenew);
 				printrange(fp[2], new);
 			} else {
-				startmark = new->to;
-
-				if (!deletenew)
-					startmark--;
+				startmark = new->to - 1;
 
 				printf("%da\n", startmark);
 				printf("%s %s\n", newmark, f3mark);
@@ -711,25 +794,43 @@ mergescript(int i)
 {
 	struct range r, *new, *old;
 	int n;
+	bool delete = false;
 
 	r.from = 1;
 	r.to = 1;
 
-	for (n = 1; n < i+1; n++) {
+	for (n = 1; n <= i; n++) {
 		new = &de[n].new;
 		old = &de[n].old;
 
-		/* print any lines leading up to here */
-		r.to = old->from;
-		printrange(fp[0], &r);
+		/*
+		 * Print any lines leading up to here. If we are merging don't
+		 * print deleted ranges.
+		 */
+		delete = (new->from == new->to);
+		if (de[n].type == DIFF_TYPE1 && delete)
+			r.to = new->from - 1;
+		else if (de[n].type == DIFF_TYPE3 && (old->from == old->to)) {
+			r.from = old->from - 1;
+			r.to = new->from;
+		} else
+			r.to = old->from;
 
-		if (de[n].type == DIFF_TYPE2) {
+		printrange(fp[0], &r);
+		switch (de[n].type) {
+		case DIFF_TYPE1:
+			/* If this isn't a delete print it */
+			if (!delete)
+				printrange(fp[2], new);
+			break;
+		case DIFF_TYPE2:
 			printf("%s %s\n", oldmark, f2mark);
 			printrange(fp[1], old);
 			printf("%s\n", divider);
 			printrange(fp[2], new);
 			printf("%s %s\n", newmark, f3mark);
-		} else if (de[n].type == DIFF_TYPE3) {
+			break;
+		case DIFF_TYPE3:
 			if (!oflag || !overlap[n]) {
 				printrange(fp[2], new);
 			} else {
@@ -737,20 +838,27 @@ mergescript(int i)
 				printf("%s %s\n", oldmark, f1mark);
 				printrange(fp[0], old);
 
-				printf("%s %s\n", orgmark, f2mark);
-				if (old->from == old->to) {
-					struct range or;
-					or.from = old->from - 1;
-					or.to = new->to;
-					printrange(fp[1], &or);
-				} else
-					printrange(fp[1], old);
+				if (eflag != EFLAG_OVERLAP) {
+					printf("%s %s\n", orgmark, f2mark);
+					if (old->from == old->to) {
+						struct range or;
+						or.from = old->from - 1;
+						or.to = new->to;
+						printrange(fp[1], &or);
+					} else {
+						printrange(fp[1], old);
+					}
+				}
 
 				printf("%s\n", divider);
 
 				printrange(fp[2], new);
 				printf("%s %s\n", newmark, f3mark);
 			}
+			break;
+		default:
+			printf("Error: Unhandled diff type - exiting\n");
+			exit(EXIT_FAILURE);
 		}
 
 		if (old->from == old->to)
@@ -758,6 +866,7 @@ mergescript(int i)
 		else
 			r.from = old->to;
 	}
+
 	/*
 	 * Print from the final range to the end of 'myfile'. Any deletions or
 	 * additions to this file should have been handled by now.
@@ -768,21 +877,14 @@ mergescript(int i)
 	 */
 	new = &de[n-1].new;
 	old = &de[n-1].old;
-	if ((old->from == new->from) &&
-		(old->to == new->to))
+
+	if (old->from == new->from && old->to == new->to)
 		r.from--;
 	else if (new->from == new->to)
 		r.from = old->from;
 
-	/*
-	 * If the range is a 3 way merge then we need to skip a line in the
-	 * trailing output.
-	 */
-	if (de[n-1].type == DIFF_TYPE3)
-		r.from++;
-
 	r.to = INT_MAX;
-	printrange(fp[0], &r);
+	printrange(fp[2], &r);
 	exit(overlapcnt > 0);
 }
 
@@ -797,25 +899,25 @@ increase(void)
 	newsz = szchanges == 0 ? 64 : 2 * szchanges;
 	incr = newsz - szchanges;
 
-	p = reallocarray(d13, newsz, sizeof(struct diff));
+	p = reallocarray(d13, newsz, sizeof(*p));
 	if (p == NULL)
 		err(1, NULL);
-	memset(p + szchanges, 0, incr * sizeof(struct diff));
+	memset(p + szchanges, 0, incr * sizeof(*p));
 	d13 = p;
-	p = reallocarray(d23, newsz, sizeof(struct diff));
+	p = reallocarray(d23, newsz, sizeof(*p));
 	if (p == NULL)
 		err(1, NULL);
-	memset(p + szchanges, 0, incr * sizeof(struct diff));
+	memset(p + szchanges, 0, incr * sizeof(*p));
 	d23 = p;
-	p = reallocarray(de, newsz, sizeof(struct diff));
+	p = reallocarray(de, newsz, sizeof(*p));
 	if (p == NULL)
 		err(1, NULL);
-	memset(p + szchanges, 0, incr * sizeof(struct diff));
+	memset(p + szchanges, 0, incr * sizeof(*p));
 	de = p;
-	q = reallocarray(overlap, newsz, sizeof(char));
+	q = reallocarray(overlap, newsz, 1);
 	if (q == NULL)
 		err(1, NULL);
-	memset(q + szchanges, 0, incr * sizeof(char));
+	memset(q + szchanges, 0, incr * 1);
 	overlap = q;
 	szchanges = newsz;
 }
@@ -919,7 +1021,7 @@ main(int argc, char **argv)
 	if (kq == -1)
 		err(2, "kqueue");
 
-	e = malloc(2 * sizeof(struct kevent));
+	e = malloc(2 * sizeof(*e));
 	if (e == NULL)
 		err(2, "malloc");
 
@@ -1007,6 +1109,7 @@ main(int argc, char **argv)
 		}
 		nleft -= nke;
 	}
+	free(e);
 	merge(m, n);
 
 	return (EXIT_SUCCESS);



Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?202409251715.48PHFLuQ047265>