Date: Wed, 25 Sep 2024 17:15:21 GMT From: Dag-Erling =?utf-8?Q?Sm=C3=B8rgrav?= <des@FreeBSD.org> To: src-committers@FreeBSD.org, dev-commits-src-all@FreeBSD.org, dev-commits-src-main@FreeBSD.org Subject: git: cf73401c4f7a - main - diff3: Fix merge mode. Message-ID: <202409251715.48PHFLuQ047265@gitrepo.freebsd.org>
next in thread | raw e-mail | index | archive | help
The branch main has been updated by des: URL: https://cgit.FreeBSD.org/src/commit/?id=cf73401c4f7af063c91a0c8e4d5b46e08f06db87 commit cf73401c4f7af063c91a0c8e4d5b46e08f06db87 Author: Dag-Erling Smørgrav <des@FreeBSD.org> AuthorDate: 2024-09-25 17:14:32 +0000 Commit: Dag-Erling Smørgrav <des@FreeBSD.org> CommitDate: 2024-09-25 17:14:55 +0000 diff3: Fix merge mode. This is mostly thj@'s work, with some tweaks and cleanup by me. There are still some cases where our output differs from GNU diff3, but it's much better than before and I'd rather commit what I have now than let it continue to languish in a metaphorical drawer. MFC after 3 weeks Sponsored by: Klara, Inc. Reviewed by: thj Differential Revision: https://reviews.freebsd.org/D46762 --- usr.bin/diff3/diff3.c | 255 +++++++++++++++++++++++++++++++++++--------------- 1 file changed, 179 insertions(+), 76 deletions(-) diff --git a/usr.bin/diff3/diff3.c b/usr.bin/diff3/diff3.c index c72ea0747589..f20b1d74678d 100644 --- a/usr.bin/diff3/diff3.c +++ b/usr.bin/diff3/diff3.c @@ -79,7 +79,6 @@ #include <string.h> #include <unistd.h> - /* * "from" is first in range of changed lines; "to" is last+1 * from=to=line after point of insertion for added lines. @@ -90,6 +89,7 @@ struct range { }; struct diff { +#define DIFF_TYPE1 1 #define DIFF_TYPE2 2 #define DIFF_TYPE3 3 int type; @@ -147,6 +147,7 @@ static void keep(int, struct range *); static void merge(int, int); static void prange(struct range *, bool); static void repos(int); +static void separate(const char *); static void edscript(int) __dead2; static void Ascript(int) __dead2; static void mergescript(int) __dead2; @@ -154,7 +155,7 @@ static void increase(void); static void usage(void); static void printrange(FILE *, struct range *); -static const char diff3_version[] = "FreeBSD diff3 20220517"; +static const char diff3_version[] = "FreeBSD diff3 20240925"; enum { DIFFPROG_OPT, @@ -189,49 +190,110 @@ usage(void) "[-L label3] file1 file2 file3\n"); } +static int +strtoi(char *str, char **end) +{ + intmax_t num; + + errno = 0; + num = strtoimax(str, end, 10); + if ((end != NULL && *end == str) || + num < 0 || num > INT_MAX || + errno == EINVAL || errno == ERANGE) + err(1, "error in diff output"); + return (int)num; +} + +/* + * Read diff hunks into the array pointed to by *dd. + * + * The output from `diff foo bar` consists of a series of hunks describing + * an addition (lines in bar not present in foo), change (lines in bar + * different from lines in foo), or deletion (lines in foo not present in + * bar). Each record starts with a line of the form: + * + * a[,b]xc[,d] + * + * where a, b, c, and d are nonnegative integers (b and d are printed only + * if they differ from a and c, respectively), and x is either 'a' for an + * addition, 'c' for a change, or 'd' for a deletion. This is then + * followed by a series of lines (which we ignore) giving the added, + * changed, or deleted text. + * + * For an addition, a == b is the last line in 'foo' before the addition, + * while c through d is the range of lines in 'bar' to be added to 'foo'. + * + * For a change, a through b is the range of lines in 'foo' to be replaced + * and c through d is the range of lines in 'bar' to replace them with. + * + * For a deletion, a through b is the range of lines in 'foo' to remove + * and c == d is the line in 'bar' which corresponds to the last line + * before the deletion. + * + * The observant reader will have noticed that x is not really needed and + * that we can fully describe any hunk using only a, b, c, and d: + * + * - an addition replaces a zero-length range in one file with a + * non-zero-length range from the other + * + * - a change replaces a non-zero-length range in one file with a + * non-zero-length range from the other + * + * - a deletion replaces a non-zero-length range in one file with a + * zero-length range from the other + */ static int readin(int fd, struct diff **dd) { int a, b, c, d; - size_t i; + int i; char kind, *p; FILE *f; f = fdopen(fd, "r"); if (f == NULL) err(2, "fdopen"); - for (i = 0; (p = getchange(f)); i++) { + for (i = 0; (p = getchange(f)) != NULL; i++) { + if ((size_t)i >= szchanges - 1) + increase(); #if DEBUG (*dd)[i].line = strdup(p); #endif /* DEBUG */ - if (i >= szchanges - 1) - increase(); - a = b = (int)strtoimax(p, &p, 10); - if (*p == ',') { - p++; - b = (int)strtoimax(p, &p, 10); - } + a = b = strtoi(p, &p); + if (*p == ',') + b = strtoi(p + 1, &p); kind = *p++; - c = d = (int)strtoimax(p, &p, 10); - if (*p == ',') { - p++; - d = (int)strtoimax(p, &p, 10); - } + c = d = strtoi(p, &p); + if (*p == ',') + d = strtoi(p + 1, &p); + if (*p != '\n') + errx(1, "error in diff output"); if (kind == 'a') a++; - if (kind == 'd') + else if (kind == 'c') + /* nothing */ ; + else if (kind == 'd') c++; + else + errx(1, "error in diff output"); b++; d++; + if (b < a || d < c) + errx(1, "error in diff output"); (*dd)[i].old.from = a; (*dd)[i].old.to = b; (*dd)[i].new.from = c; (*dd)[i].new.to = d; + if (i > 0) { + if ((*dd)[i].old.from < (*dd)[i - 1].old.to || + (*dd)[i].new.from < (*dd)[i - 1].new.to) + errx(1, "diff output out of order"); + } } - if (i) { - (*dd)[i].old.from = (*dd)[i - 1].old.to; - (*dd)[i].new.from = (*dd)[i - 1].new.to; + if (i > 0) { + (*dd)[i].old.from = (*dd)[i].old.to = (*dd)[i - 1].old.to; + (*dd)[i].new.from = (*dd)[i].new.to = (*dd)[i - 1].new.to; } fclose(f); return (i); @@ -264,7 +326,7 @@ getchange(FILE *b) { char *line; - while ((line = get_line(b, NULL))) { + while ((line = get_line(b, NULL)) != NULL) { if (isdigit((unsigned char)line[0])) return (line); } @@ -305,15 +367,23 @@ merge(int m1, int m2) d2 = d23; j = 0; - while (t1 = d1 < d13 + m1, t2 = d2 < d23 + m2, t1 || t2) { + for (;;) { + t1 = (d1 < d13 + m1); + t2 = (d2 < d23 + m2); + if (!t1 && !t2) + break; + /* first file is different from the others */ if (!t2 || (t1 && d1->new.to < d2->new.from)) { /* stuff peculiar to 1st file */ if (eflag == EFLAG_NONE) { - printf("====1\n"); + separate("1"); change(1, &d1->old, false); keep(2, &d1->new); change(3, &d1->new, false); + } else if (eflag == EFLAG_OVERLAP) { + j = edit(d2, dup, j, DIFF_TYPE1); + printdiff(d2); } d1++; continue; @@ -321,7 +391,7 @@ merge(int m1, int m2) /* second file is different from others */ if (!t1 || (t2 && d2->new.to < d1->new.from)) { if (eflag == EFLAG_NONE) { - printf("====2\n"); + separate("2"); keep(1, &d2->new); change(3, &d2->new, false); change(2, &d2->old, false); @@ -359,7 +429,7 @@ merge(int m1, int m2) * dup = 1 means files 1 and 2 identical */ if (eflag == EFLAG_NONE) { - printf("====%s\n", dup ? "3" : ""); + separate(dup ? "3" : ""); change(1, &d1->old, dup); change(2, &d2->old, false); d3 = d1->old.to > d1->old.from ? d1 : d2; @@ -400,6 +470,12 @@ merge(int m1, int m2) edscript(j); } +static void +separate(const char *s) +{ + printf("====%s\n", s); +} + /* * The range of lines rold.from thru rold.to in file i is to be changed. * It is to be printed only if it does not duplicate something to be @@ -555,7 +631,6 @@ printrange(FILE *p, struct range *r) char *line = NULL; size_t len = 0; int i = 1; - ssize_t rlen = 0; /* We haven't been asked to print anything */ if (r->from == r->to) @@ -570,7 +645,7 @@ printrange(FILE *p, struct range *r) * files with lots of ranges. */ fseek(p, 0L, SEEK_SET); - while ((rlen = getline(&line, &len, p)) > 0) { + while (getline(&line, &len, p) > 0) { if (i >= r->from) printf("%s", line); if (++i > r->to - 1) @@ -591,20 +666,31 @@ edscript(int n) old = &de[n].old; delete = (new->from == new->to); - if (!oflag || !overlap[n]) { - prange(old, delete); - } else { - printf("%da\n", old->to - 1); - printf("%s\n", divider); - } - printrange(fp[2], new); - if (!oflag || !overlap[n]) { - if (!delete) + if (de[n].type == DIFF_TYPE1) { + if (delete) + printf("%dd\n", new->from - 1); + else if (old->from == new->from && old->to == new->to) { + printf("%dc\n", old->from); + printrange(fp[2], old); printf(".\n"); + } + continue; } else { - printf("%s %s\n.\n", newmark, f3mark); - printf("%da\n%s %s\n.\n", old->from - 1, - oldmark, f1mark); + if (!oflag || !overlap[n]) { + prange(old, delete); + } else { + printf("%da\n", old->to - 1); + printf("%s\n", divider); + } + printrange(fp[2], new); + if (!oflag || !overlap[n]) { + if (!delete) + printf(".\n"); + } else { + printf("%s %s\n.\n", newmark, f3mark); + printf("%da\n%s %s\n.\n", old->from - 1, + oldmark, f1mark); + } } } if (iflag) @@ -639,10 +725,7 @@ Ascript(int n) prange(old, deletenew); printrange(fp[2], new); } else { - startmark = new->to; - - if (!deletenew) - startmark--; + startmark = new->to - 1; printf("%da\n", startmark); printf("%s %s\n", newmark, f3mark); @@ -711,25 +794,43 @@ mergescript(int i) { struct range r, *new, *old; int n; + bool delete = false; r.from = 1; r.to = 1; - for (n = 1; n < i+1; n++) { + for (n = 1; n <= i; n++) { new = &de[n].new; old = &de[n].old; - /* print any lines leading up to here */ - r.to = old->from; - printrange(fp[0], &r); + /* + * Print any lines leading up to here. If we are merging don't + * print deleted ranges. + */ + delete = (new->from == new->to); + if (de[n].type == DIFF_TYPE1 && delete) + r.to = new->from - 1; + else if (de[n].type == DIFF_TYPE3 && (old->from == old->to)) { + r.from = old->from - 1; + r.to = new->from; + } else + r.to = old->from; - if (de[n].type == DIFF_TYPE2) { + printrange(fp[0], &r); + switch (de[n].type) { + case DIFF_TYPE1: + /* If this isn't a delete print it */ + if (!delete) + printrange(fp[2], new); + break; + case DIFF_TYPE2: printf("%s %s\n", oldmark, f2mark); printrange(fp[1], old); printf("%s\n", divider); printrange(fp[2], new); printf("%s %s\n", newmark, f3mark); - } else if (de[n].type == DIFF_TYPE3) { + break; + case DIFF_TYPE3: if (!oflag || !overlap[n]) { printrange(fp[2], new); } else { @@ -737,20 +838,27 @@ mergescript(int i) printf("%s %s\n", oldmark, f1mark); printrange(fp[0], old); - printf("%s %s\n", orgmark, f2mark); - if (old->from == old->to) { - struct range or; - or.from = old->from - 1; - or.to = new->to; - printrange(fp[1], &or); - } else - printrange(fp[1], old); + if (eflag != EFLAG_OVERLAP) { + printf("%s %s\n", orgmark, f2mark); + if (old->from == old->to) { + struct range or; + or.from = old->from - 1; + or.to = new->to; + printrange(fp[1], &or); + } else { + printrange(fp[1], old); + } + } printf("%s\n", divider); printrange(fp[2], new); printf("%s %s\n", newmark, f3mark); } + break; + default: + printf("Error: Unhandled diff type - exiting\n"); + exit(EXIT_FAILURE); } if (old->from == old->to) @@ -758,6 +866,7 @@ mergescript(int i) else r.from = old->to; } + /* * Print from the final range to the end of 'myfile'. Any deletions or * additions to this file should have been handled by now. @@ -768,21 +877,14 @@ mergescript(int i) */ new = &de[n-1].new; old = &de[n-1].old; - if ((old->from == new->from) && - (old->to == new->to)) + + if (old->from == new->from && old->to == new->to) r.from--; else if (new->from == new->to) r.from = old->from; - /* - * If the range is a 3 way merge then we need to skip a line in the - * trailing output. - */ - if (de[n-1].type == DIFF_TYPE3) - r.from++; - r.to = INT_MAX; - printrange(fp[0], &r); + printrange(fp[2], &r); exit(overlapcnt > 0); } @@ -797,25 +899,25 @@ increase(void) newsz = szchanges == 0 ? 64 : 2 * szchanges; incr = newsz - szchanges; - p = reallocarray(d13, newsz, sizeof(struct diff)); + p = reallocarray(d13, newsz, sizeof(*p)); if (p == NULL) err(1, NULL); - memset(p + szchanges, 0, incr * sizeof(struct diff)); + memset(p + szchanges, 0, incr * sizeof(*p)); d13 = p; - p = reallocarray(d23, newsz, sizeof(struct diff)); + p = reallocarray(d23, newsz, sizeof(*p)); if (p == NULL) err(1, NULL); - memset(p + szchanges, 0, incr * sizeof(struct diff)); + memset(p + szchanges, 0, incr * sizeof(*p)); d23 = p; - p = reallocarray(de, newsz, sizeof(struct diff)); + p = reallocarray(de, newsz, sizeof(*p)); if (p == NULL) err(1, NULL); - memset(p + szchanges, 0, incr * sizeof(struct diff)); + memset(p + szchanges, 0, incr * sizeof(*p)); de = p; - q = reallocarray(overlap, newsz, sizeof(char)); + q = reallocarray(overlap, newsz, 1); if (q == NULL) err(1, NULL); - memset(q + szchanges, 0, incr * sizeof(char)); + memset(q + szchanges, 0, incr * 1); overlap = q; szchanges = newsz; } @@ -919,7 +1021,7 @@ main(int argc, char **argv) if (kq == -1) err(2, "kqueue"); - e = malloc(2 * sizeof(struct kevent)); + e = malloc(2 * sizeof(*e)); if (e == NULL) err(2, "malloc"); @@ -1007,6 +1109,7 @@ main(int argc, char **argv) } nleft -= nke; } + free(e); merge(m, n); return (EXIT_SUCCESS);
Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?202409251715.48PHFLuQ047265>