Skip site navigation (1)Skip section navigation (2)
Date:      Wed, 27 Mar 2024 11:26:16 GMT
From:      Dag-Erling =?utf-8?Q?Sm=C3=B8rgrav?= <des@FreeBSD.org>
To:        src-committers@FreeBSD.org, dev-commits-src-all@FreeBSD.org, dev-commits-src-main@FreeBSD.org
Subject:   git: d9a9f23d0b3f - main - diff: Integrate libdiff from OpenBSD GoT.
Message-ID:  <202403271126.42RBQGoX003685@gitrepo.freebsd.org>

next in thread | raw e-mail | index | archive | help
The branch main has been updated by des:

URL: https://cgit.FreeBSD.org/src/commit/?id=d9a9f23d0b3f1676d5656b76301341c0037d15b7

commit d9a9f23d0b3f1676d5656b76301341c0037d15b7
Author:     Dag-Erling Smørgrav <des@FreeBSD.org>
AuthorDate: 2024-03-27 10:03:33 +0000
Commit:     Dag-Erling Smørgrav <des@FreeBSD.org>
CommitDate: 2024-03-27 10:03:33 +0000

    diff: Integrate libdiff from OpenBSD GoT.
    
    This adds support for two new diff algorithms, Myers diff and Patience
    diff.
    
    These algorithms perform a different form of search compared to the
    classic Stone algorithm and support escapes when worst case scenarios
    are encountered.
    
    Add the -A flag to allow selection of the algorithm, but default to
    using the new Myers diff implementation.
    
    The libdiff implementation currently only supports a subset of input and
    output options supported by diff.  When these options are used, but the
    algorithm is not selected, automatically fallback to the classic Stone
    algorithm until support for these modes can be added.
    
    Based on work originally done by thj@ with contributions from kevans@.
    
    Sponsored by:   Klara, Inc.
    Reviewed by:    thj
    Differential Revision:  https://reviews.freebsd.org/D44302
---
 lib/Makefile                    |   1 +
 lib/libdiff/Makefile            |  15 ++
 share/mk/src.libnames.mk        |   4 +
 usr.bin/diff/Makefile           |   7 +-
 usr.bin/diff/diff.1             |  56 ++++++-
 usr.bin/diff/diff.c             |  50 ++++++-
 usr.bin/diff/diff.h             |  18 ++-
 usr.bin/diff/diffdir.c          |   1 -
 usr.bin/diff/diffreg.c          |  41 ++++-
 usr.bin/diff/diffreg_new.c      | 325 ++++++++++++++++++++++++++++++++++++++++
 usr.bin/diff/pr.c               |   1 -
 usr.bin/diff/tests/diff_test.sh |   4 +
 usr.bin/diff/xmalloc.c          |   1 -
 13 files changed, 502 insertions(+), 22 deletions(-)

diff --git a/lib/Makefile b/lib/Makefile
index a632a77e6071..a3c4dd966040 100644
--- a/lib/Makefile
+++ b/lib/Makefile
@@ -16,6 +16,7 @@ SUBDIR_BOOTSTRAP= \
 	libc++ \
 	libc++experimental \
 	libcxxrt \
+	libdiff \
 	libelf \
 	libssp \
 	libssp_nonshared \
diff --git a/lib/libdiff/Makefile b/lib/libdiff/Makefile
new file mode 100644
index 000000000000..3fa8e6b05d2d
--- /dev/null
+++ b/lib/libdiff/Makefile
@@ -0,0 +1,15 @@
+LIB=	diff
+INTERNALLIB=	# API not published or supported.
+
+.PATH: ${SRCTOP}/contrib/libdiff/compat
+.PATH: ${SRCTOP}/contrib/libdiff/lib
+
+SRCS=	diff_atomize_text.c diff_main.c diff_myers.c \
+	diff_patience.c diff_output.c diff_output_plain.c \
+	diff_output_unidiff.c diff_output_edscript.c recallocarray.c
+
+WARNS=
+CFLAGS+= -I${SRCTOP}/contrib/libdiff/compat/include
+CFLAGS+= -I${SRCTOP}/contrib/libdiff/include
+
+.include <bsd.lib.mk>
diff --git a/share/mk/src.libnames.mk b/share/mk/src.libnames.mk
index d9fe88146dbe..a12c3755ea72 100644
--- a/share/mk/src.libnames.mk
+++ b/share/mk/src.libnames.mk
@@ -43,6 +43,7 @@ _INTERNALLIBS=	\
 		bsnmptools \
 		c_nossp_pic \
 		cron \
+		diff \
 		elftc \
 		fdt \
 		fifolog \
@@ -544,6 +545,9 @@ LDADD+=		${LDADD_${_l}}
 
 _LIB_OBJTOP?=	${OBJTOP}
 # INTERNALLIB definitions.
+LIBDIFFDIR=	${_LIB_OBJTOP}/lib/libdiff
+LIBDIFF?=	${LIBDIFFDIR}/libdiff${PIE_SUFFIX}.a
+
 LIBELFTCDIR=	${_LIB_OBJTOP}/lib/libelftc
 LIBELFTC?=	${LIBELFTCDIR}/libelftc${PIE_SUFFIX}.a
 
diff --git a/usr.bin/diff/Makefile b/usr.bin/diff/Makefile
index 198e107ec1c4..20eaaf8e1dff 100644
--- a/usr.bin/diff/Makefile
+++ b/usr.bin/diff/Makefile
@@ -1,9 +1,10 @@
-
 .include <src.opts.mk>
 
 PROG=	diff
-SRCS=	diff.c diffdir.c diffreg.c xmalloc.c pr.c
-LIBADD=	m
+SRCS=	diff.c diffdir.c diffreg.c xmalloc.c pr.c diffreg_new.c
+
+LIBADD=	m diff
+CFLAGS+= -I${.CURDIR} -I${SRCTOP}/contrib/libdiff/lib -I${SRCTOP}/contrib/libdiff/include
 
 HAS_TESTS=
 SUBDIR.${MK_TESTS}+= tests
diff --git a/usr.bin/diff/diff.1 b/usr.bin/diff/diff.1
index 1df2d5f8081f..9bc46d07b58a 100644
--- a/usr.bin/diff/diff.1
+++ b/usr.bin/diff/diff.1
@@ -27,7 +27,7 @@
 .\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 .\" SUCH DAMAGE.
 .\"
-.Dd February 26, 2024
+.Dd March 7, 2024
 .Dt DIFF 1
 .Os
 .Sh NAME
@@ -40,6 +40,7 @@
 .Fl c | e | f |
 .Fl n | q | u | y
 .Oc
+.Op Fl A Ar algo | Fl -algorithm Ar algo
 .Op Fl -brief
 .Op Fl -color Ns = Ns Ar when
 .Op Fl -changed-group-format Ar GFMT
@@ -67,6 +68,7 @@
 .Ar file1 file2
 .Nm diff
 .Op Fl aBbdilpTtw
+.Op Fl A Ar algo | Fl -algorithm Ar algo
 .Op Fl I Ar pattern | Fl -ignore-matching-lines Ar pattern
 .Op Fl F Ar pattern | Fl -show-function-line Ar pattern
 .Op Fl L Ar label | Fl -label Ar label
@@ -95,6 +97,7 @@
 .Ar file1 file2
 .Nm diff
 .Op Fl aBbdiltw
+.Op Fl A Ar algo | Fl -algorithm Ar algo
 .Op Fl I Ar pattern | Fl -ignore-matching-lines Ar pattern
 .Op Fl -brief
 .Op Fl -color Ns = Ns Ar when
@@ -121,6 +124,7 @@
 .Ar file1 file2
 .Nm diff
 .Op Fl aBbdilpTtw
+.Op Fl A Ar algo | Fl -algorithm Ar algo
 .Op Fl I Ar pattern | Fl -ignore-matching-lines Ar pattern
 .Op Fl F Ar pattern | Fl -show-function-line Ar pattern
 .Op Fl L Ar label | Fl -label Ar label
@@ -153,6 +157,7 @@
 .Fl c | e | f |
 .Fl n | q | u
 .Oc
+.Op Fl A Ar algo | Fl -algorithm Ar algo
 .Op Fl -brief
 .Op Fl -color Ns = Ns Ar when
 .Op Fl -changed-group-format Ar GFMT
@@ -276,6 +281,18 @@ from their state in
 .Ar dir1
 to their state in
 .Ar dir2 .
+Note that when comparing directories with
+.Fl e ,
+the resulting file may no longer be interpreted as an
+.Xr ed 1
+script.
+Output is added to indicate which file each set of
+.Xr ed 1
+commands applies to.
+These hunks can be manually extracted to produce an
+.Xr ed 1
+script, which can also be applied with
+.Xr patch 1 .
 .It Fl f -forward-ed
 Identical output to that of the
 .Fl e
@@ -331,6 +348,38 @@ Files differ and only the second file contains the line.
 .Pp
 Comparison options:
 .Bl -tag -width Ds
+.It Fl A Ar algo, Fl -algorithm Ar algo
+Configure the algorithm used when comparing files.
+.Nm
+supports 3 algorithms:
+.Pp
+.Bl -tag -width Ds -compact
+.It Cm myers
+The Myers diff algorithm finds the shortest edit which transforms one
+input into the other.
+It generally runs in O(N+D\(S2) time, requiring O(N) space, where N is
+the sum of the lengths of the inputs and D is the length of the
+difference between them, with a theoretical O(N\(pcD) worst case.
+If it encounters worst-case input, the implementation used by
+.Nm
+falls back to a less optimal but faster algorithm.
+.It Cm patience
+The Patience variant of the Myers algorithm attempts to create more
+aesthetically pleasing diff output by logically grouping lines.
+.It Cm stone
+The Stone algorithm (commonly known as Hunt-McIlroy or Hunt-Szymanski)
+looks for the longest common subsequence between compared files.
+Stone encounters worst case performance when there are long common
+subsequences.
+In large files this can lead to a significant performance impact.
+The Stone algorithm is maintained for compatibility.
+.El
+.Pp
+The
+.Nm
+utility defaults to the Myers algorithm, but will fall back to the
+Stone algorithm if the input or output options are not supported by
+the Myers implementation.
 .It Fl a -text
 Treat all files as ASCII text.
 Normally
@@ -741,7 +790,7 @@ utility is compliant with the
 specification.
 .Pp
 The flags
-.Op Fl aDdIiLlNnPpqSsTtwXxy
+.Op Fl AaDdIiLlNnPpqSsTtwXxy
 are extensions to that specification.
 .Sh HISTORY
 A
@@ -759,3 +808,6 @@ This was replaced in
 by a BSD-licensed implementation written by
 .An Todd Miller .
 Some GNUisms were lost in the process.
+.Pp
+libdiff was imported from the Game of Trees version control system and default
+algorithm was changed to Myers for FreeBSD 15.
diff --git a/usr.bin/diff/diff.c b/usr.bin/diff/diff.c
index d947c1e01705..15ebbca28bd7 100644
--- a/usr.bin/diff/diff.c
+++ b/usr.bin/diff/diff.c
@@ -20,7 +20,6 @@
  * Materiel Command, USAF, under agreement number F39502-99-1-0512.
  */
 
-#include <sys/cdefs.h>
 #include <sys/stat.h>
 
 #include <ctype.h>
@@ -36,11 +35,12 @@
 #include "diff.h"
 #include "xmalloc.h"
 
-static const char diff_version[] = "FreeBSD diff 20220309";
+static const char diff_version[] = "FreeBSD diff 20240307";
 bool	 lflag, Nflag, Pflag, rflag, sflag, Tflag, cflag;
 bool	 ignore_file_case, suppress_common, color, noderef;
 static bool help = false;
-int	 diff_format, diff_context, status;
+int	 diff_format, diff_context, diff_algorithm, status;
+bool	 diff_algorithm_set;
 int	 tabsize = 8, width = 130;
 static int	colorflag = COLORFLAG_NEVER;
 char	*start, *ifdefname, *diffargs, *label[2];
@@ -51,7 +51,17 @@ struct stat stb1, stb2;
 struct excludes *excludes_list;
 regex_t	 ignore_re, most_recent_re;
 
-#define	OPTIONS	"0123456789aBbC:cdD:efF:HhI:iL:lnNPpqrS:sTtU:uwW:X:x:y"
+static struct algorithm {
+	const char *name;
+	int id;
+} algorithms[] = {
+	{"stone", D_DIFFSTONE},
+	{"myers", D_DIFFMYERS},
+	{"patience", D_DIFFPATIENCE},
+	{NULL, D_DIFFNONE}
+};
+
+#define	OPTIONS	"0123456789A:aBbC:cdD:efF:HhI:iL:lnNPpqrS:sTtU:uwW:X:x:y"
 enum {
 	OPT_TSIZE = CHAR_MAX + 1,
 	OPT_STRIPCR,
@@ -68,6 +78,7 @@ enum {
 };
 
 static struct option longopts[] = {
+	{ "algorithm",			required_argument,	0,	'A' },
 	{ "text",			no_argument,		0,	'a' },
 	{ "ignore-space-change",	no_argument,		0,	'b' },
 	{ "context",			optional_argument,	0,	'C' },
@@ -139,6 +150,8 @@ main(int argc, char **argv)
 	newarg = 1;
 	diff_context = 3;
 	diff_format = D_UNSET;
+	diff_algorithm = D_DIFFMYERS;
+	diff_algorithm_set = false;
 #define	FORMAT_MISMATCHED(type)	\
 	(diff_format != D_UNSET && diff_format != (type))
 	while ((ch = getopt_long(argc, argv, OPTIONS, longopts, NULL)) != -1) {
@@ -153,6 +166,21 @@ main(int argc, char **argv)
 				usage();
 			diff_context = (diff_context * 10) + (ch - '0');
 			break;
+		case 'A':
+			diff_algorithm = D_DIFFNONE;
+			for (struct algorithm *a = algorithms; a->name;a++) {
+				if(strcasecmp(optarg, a->name) == 0) {
+					diff_algorithm = a->id;
+					diff_algorithm_set = true;
+					break;
+				}
+			}
+
+			if (diff_algorithm == D_DIFFNONE) {
+				printf("unknown algorithm: %s\n", optarg);
+				usage();
+			}
+			break;
 		case 'a':
 			dflags |= D_FORCEASCII;
 			break;
@@ -276,8 +304,10 @@ main(int argc, char **argv)
 			break;
 		case 'W':
 			width = (int) strtonum(optarg, 1, INT_MAX, &errstr);
-			if (errstr)
-				errx(1, "width is %s: %s", errstr, optarg);
+			if (errstr) {
+				warnx("Invalid argument for width");
+				usage();
+			}
 			break;
 		case 'X':
 			read_excludes_file(optarg);
@@ -315,8 +345,10 @@ main(int argc, char **argv)
 			break;
 		case OPT_TSIZE:
 			tabsize = (int) strtonum(optarg, 1, INT_MAX, &errstr);
-			if (errstr)
-				errx(1, "tabsize is %s: %s", errstr, optarg);
+			if (errstr) {
+				warnx("Invalid argument for tabsize");
+				usage();
+			}
 			break;
 		case OPT_STRIPCR:
 			dflags |= D_STRIPCR;
@@ -437,6 +469,8 @@ main(int argc, char **argv)
 		print_status(diffreg(argv[0], argv[1], dflags, 1), argv[0],
 		    argv[1], "");
 	}
+	if (fflush(stdout) != 0)
+		err(2, "stdout");
 	exit(status);
 }
 
diff --git a/usr.bin/diff/diff.h b/usr.bin/diff/diff.h
index 214ff77a362f..679b95e7ca94 100644
--- a/usr.bin/diff/diff.h
+++ b/usr.bin/diff/diff.h
@@ -51,6 +51,14 @@
 
 #define	D_UNSET		-2
 
+/*
+ * Algorithms
+ */
+
+#define D_DIFFNONE     0
+#define D_DIFFSTONE    1       /* Stone or 'old diff' algorithm */
+#define D_DIFFMYERS    2       /* Myers diff algorithm */
+#define D_DIFFPATIENCE 3       /* Patience diff algorithm */
 
 /*
  * Output flags
@@ -73,6 +81,9 @@
 #define D_SKIPBLANKLINES	0x800	/* Skip blank lines */
 #define D_MATCHLAST		0x1000	/* Display last line matching provided regex */
 
+/* Features supported by new algorithms */
+#define D_NEWALGO_FLAGS                (D_FORCEASCII | D_PROTOTYPE | D_IGNOREBLANKS)
+
 /*
  * Status values for print_status() and diffreg() return values
  */
@@ -98,8 +109,9 @@ struct excludes {
 };
 
 extern bool	lflag, Nflag, Pflag, rflag, sflag, Tflag, cflag;
-extern bool	ignore_file_case, suppress_common, color, noderef;
-extern int	diff_format, diff_context, status;
+extern bool    ignore_file_case, suppress_common, color, noderef, algorithm_set;
+extern int     diff_format, diff_context, diff_algorithm, status;
+extern bool	diff_algorithm_set;
 extern int	tabsize, width;
 extern char	*start, *ifdefname, *diffargs, *label[2];
 extern char	*ignore_pats, *most_recent_pat;
@@ -110,5 +122,7 @@ extern struct	excludes *excludes_list;
 extern regex_t	ignore_re, most_recent_re;
 
 int	diffreg(char *, char *, int, int);
+int	diffreg_new(char *, char *, int, int);
+bool	can_libdiff(int);
 void	diffdir(char *, char *, int);
 void	print_status(int, char *, char *, const char *);
diff --git a/usr.bin/diff/diffdir.c b/usr.bin/diff/diffdir.c
index 798229e990df..539f537a08c3 100644
--- a/usr.bin/diff/diffdir.c
+++ b/usr.bin/diff/diffdir.c
@@ -20,7 +20,6 @@
  * Materiel Command, USAF, under agreement number F39502-99-1-0512.
  */
 
-#include <sys/cdefs.h>
 #include <sys/stat.h>
 
 #include <dirent.h>
diff --git a/usr.bin/diff/diffreg.c b/usr.bin/diff/diffreg.c
index c67f3105290f..06d914215b11 100644
--- a/usr.bin/diff/diffreg.c
+++ b/usr.bin/diff/diffreg.c
@@ -72,6 +72,7 @@
 #include <err.h>
 #include <errno.h>
 #include <fcntl.h>
+#include <limits.h>
 #include <math.h>
 #include <paths.h>
 #include <regex.h>
@@ -166,6 +167,7 @@ struct context_vec {
 
 enum readhash { RH_BINARY, RH_OK, RH_EOF };
 
+static int	 diffreg_stone(char *, char *, int, int);
 static FILE	*opentemp(const char *);
 static void	 output(char *, FILE *, char *, FILE *, int);
 static void	 check(FILE *, FILE *, int);
@@ -205,7 +207,7 @@ static int	 len[2];
 static int	 pref, suff;	/* length of prefix and suffix */
 static int	 slen[2];
 static int	 anychange;
-static int	 hw, lpad, rpad;	/* half width and padding */
+static int	 hw, lpad,rpad;		/* half width and padding */
 static int	 edoffset;
 static long	*ixnew;		/* will be overlaid on file[1] */
 static long	*ixold;		/* will be overlaid on klist */
@@ -222,6 +224,32 @@ static char lastbuf[FUNCTION_CONTEXT_SIZE];
 static int lastline;
 static int lastmatchline;
 
+int
+diffreg(char *file1, char *file2, int flags, int capsicum)
+{
+	/*
+	 * If we have set the algorithm with -A or --algorithm use that if we
+	 * can and if not print an error.
+	 */
+	if (diff_algorithm_set) {
+		if (diff_algorithm == D_DIFFMYERS ||
+		    diff_algorithm == D_DIFFPATIENCE) {
+			if (can_libdiff(flags))
+				return diffreg_new(file1, file2, flags, capsicum);
+			else
+				errx(2, "cannot use Myers algorithm with selected options");
+		} else {
+			/* Fallback to using stone. */
+			return diffreg_stone(file1, file2, flags, capsicum);
+		}
+	} else {
+		if (can_libdiff(flags))
+			return diffreg_new(file1, file2, flags, capsicum);
+		else
+			return diffreg_stone(file1, file2, flags, capsicum);
+	}
+}
+
 static int
 clow2low(int c)
 {
@@ -237,7 +265,7 @@ cup2low(int c)
 }
 
 int
-diffreg(char *file1, char *file2, int flags, int capsicum)
+diffreg_stone(char *file1, char *file2, int flags, int capsicum)
 {
 	FILE *f1, *f2;
 	int i, rval;
@@ -726,10 +754,10 @@ check(FILE *f1, FILE *f2, int flags)
 				 * in one file for -b or -w.
 				 */
 				if (flags & (D_FOLDBLANKS | D_IGNOREBLANKS)) {
-					if (c == EOF && d == '\n') {
+					if (c == EOF && isspace(d)) {
 						ctnew++;
 						break;
-					} else if (c == '\n' && d == EOF) {
+					} else if (isspace(c) && d == EOF) {
 						ctold++;
 						break;
 					}
@@ -1219,6 +1247,7 @@ fetch(long *f, int a, int b, FILE *lb, int ch, int oldfile, int flags)
 
 	edoffset = 0;
 	nc = 0;
+	col = 0;
 	/*
 	 * When doing #ifdef's, copy down to current line
 	 * if this is the first file, so that stuff makes it to output.
@@ -1284,6 +1313,10 @@ fetch(long *f, int a, int b, FILE *lb, int ch, int oldfile, int flags)
 					printf("\n\\ No newline at end of file\n");
 				return (col);
 			}
+			/*
+			 * when using --side-by-side, col needs to be increased
+			 * in any case to keep the columns aligned
+			 */
 			if (c == '\t') {
 				/*
 				 * Calculate where the tab would bring us.
diff --git a/usr.bin/diff/diffreg_new.c b/usr.bin/diff/diffreg_new.c
new file mode 100644
index 000000000000..39b359422f39
--- /dev/null
+++ b/usr.bin/diff/diffreg_new.c
@@ -0,0 +1,325 @@
+/*
+ * Copyright (c) 2018 Martin Pieuchot
+ * Copyright (c) 2020 Neels Hofmeyr <neels@hofmeyr.de>
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+#include <sys/capsicum.h>
+#include <sys/mman.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+
+#include <capsicum_helpers.h>
+#include <err.h>
+#include <fcntl.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <stdbool.h>
+#include <time.h>
+#include <string.h>
+#include <unistd.h>
+
+#include "diff.h"
+#include <arraylist.h>
+#include <diff_main.h>
+#include <diff_output.h>
+
+const char *format_label(const char *, struct stat *);
+
+enum diffreg_algo {
+	DIFFREG_ALGO_MYERS_THEN_MYERS_DIVIDE = 0,
+	DIFFREG_ALGO_MYERS_THEN_PATIENCE = 1,
+	DIFFREG_ALGO_PATIENCE = 2,
+	DIFFREG_ALGO_NONE = 3,
+};
+
+int		 diffreg_new(char *, char *, int, int);
+FILE *		 openfile(const char *, char **, struct stat *);
+
+static const struct diff_algo_config myers_then_patience;
+static const struct diff_algo_config myers_then_myers_divide;
+static const struct diff_algo_config patience;
+static const struct diff_algo_config myers_divide;
+
+static const struct diff_algo_config myers_then_patience = (struct diff_algo_config){
+	.impl = diff_algo_myers,
+	.permitted_state_size = 1024 * 1024 * sizeof(int),
+	.fallback_algo = &patience,
+};
+
+static const struct diff_algo_config myers_then_myers_divide =
+	(struct diff_algo_config){
+	.impl = diff_algo_myers,
+	.permitted_state_size = 1024 * 1024 * sizeof(int),
+	.fallback_algo = &myers_divide,
+};
+
+static const struct diff_algo_config patience = (struct diff_algo_config){
+	.impl = diff_algo_patience,
+	/* After subdivision, do Patience again: */
+	.inner_algo = &patience,
+	/* If subdivision failed, do Myers Divide et Impera: */
+	.fallback_algo = &myers_then_myers_divide,
+};
+
+static const struct diff_algo_config myers_divide = (struct diff_algo_config){
+	.impl = diff_algo_myers_divide,
+	/* When division succeeded, start from the top: */
+	.inner_algo = &myers_then_myers_divide,
+	/* (fallback_algo = NULL implies diff_algo_none). */
+};
+
+static const struct diff_algo_config no_algo = (struct diff_algo_config){
+	.impl = diff_algo_none,
+};
+
+/* If the state for a forward-Myers is small enough, use Myers, otherwise first
+ * do a Myers-divide. */
+static const struct diff_config diff_config_myers_then_myers_divide = {
+	.atomize_func = diff_atomize_text_by_line,
+	.algo = &myers_then_myers_divide,
+};
+
+/* If the state for a forward-Myers is small enough, use Myers, otherwise first
+ * do a Patience. */
+static const struct diff_config diff_config_myers_then_patience = {
+	.atomize_func = diff_atomize_text_by_line,
+	.algo = &myers_then_patience,
+};
+
+/* Directly force Patience as a first divider of the source file. */
+static const struct diff_config diff_config_patience = {
+	.atomize_func = diff_atomize_text_by_line,
+	.algo = &patience,
+};
+
+/* Directly force Patience as a first divider of the source file. */
+static const struct diff_config diff_config_no_algo = {
+	.atomize_func = diff_atomize_text_by_line,
+};
+
+const char *
+format_label(const char *oldlabel, struct stat *stb)
+{
+	const char *time_format = "%Y-%m-%d %H:%M:%S";
+	char *newlabel;
+	char buf[256];
+	char end[10];
+	struct tm tm, *tm_ptr;
+	int nsec = stb->st_mtim.tv_nsec;
+	size_t newlabellen, timelen, endlen;
+	tm_ptr = localtime_r(&stb->st_mtime, &tm);
+
+	timelen = strftime(buf, 256, time_format, tm_ptr);
+	endlen = strftime(end, 10, "%z", tm_ptr);
+
+	/*
+	 * The new label is the length of the time, old label, timezone,
+	 * 9 characters for nanoseconds, and 4 characters for a period
+	 * and for formatting.
+	 */
+	newlabellen = timelen + strlen(oldlabel) + endlen + 9 + 4;
+	newlabel = calloc(newlabellen, sizeof(char));
+
+	snprintf(newlabel, newlabellen ,"%s\t%s.%.9d %s\n",
+		oldlabel, buf, nsec, end);
+
+	return newlabel;
+}
+
+int
+diffreg_new(char *file1, char *file2, int flags, int capsicum)
+{
+	char *str1, *str2;
+	FILE *f1, *f2;
+	struct stat st1, st2;
+	struct diff_input_info info;
+	struct diff_data left = {}, right = {};
+	struct diff_result *result = NULL;
+	bool force_text, have_binary;
+	int rc, atomizer_flags, rflags, diff_flags = 0;
+	int context_lines = diff_context;
+	const struct diff_config *cfg;
+	enum diffreg_algo algo;
+	cap_rights_t rights_ro;
+
+	algo = DIFFREG_ALGO_MYERS_THEN_MYERS_DIVIDE;
+
+	switch (algo) {
+	default:
+	case DIFFREG_ALGO_MYERS_THEN_MYERS_DIVIDE:
+		cfg = &diff_config_myers_then_myers_divide;
+		break;
+	case DIFFREG_ALGO_MYERS_THEN_PATIENCE:
+		cfg = &diff_config_myers_then_patience;
+		break;
+	case DIFFREG_ALGO_PATIENCE:
+		cfg = &diff_config_patience;
+		break;
+	case DIFFREG_ALGO_NONE:
+		cfg = &diff_config_no_algo;
+		break;
+	}
+
+	f1 = openfile(file1, &str1, &st1);
+	f2 = openfile(file2, &str2, &st2);
+
+	if (capsicum) {
+		cap_rights_init(&rights_ro, CAP_READ, CAP_FSTAT, CAP_SEEK);
+		if (caph_rights_limit(fileno(f1), &rights_ro) < 0)
+			err(2, "unable to limit rights on: %s", file1);
+		if (caph_rights_limit(fileno(f2), &rights_ro) < 0)
+			err(2, "unable to limit rights on: %s", file2);
+		if (fileno(f1) == STDIN_FILENO || fileno(f2) == STDIN_FILENO) {
+			/* stdin has already been limited */
+			if (caph_limit_stderr() == -1)
+				err(2, "unable to limit stderr");
+			if (caph_limit_stdout() == -1)
+				err(2, "unable to limit stdout");
+		} else if (caph_limit_stdio() == -1)
+				err(2, "unable to limit stdio");
+		caph_cache_catpages();
+		caph_cache_tzdata();
+		if (caph_enter() < 0)
+			err(2, "unable to enter capability mode");
+	}
+	/*
+	 * If we have been given a label use that for the paths, if not format
+	 * the path with the files modification time.
+	 */
+	info.flags = 0;
+	info.left_path = (label[0] != NULL) ?
+		label[0] : format_label(file1, &stb1);
+	info.right_path = (label[1] != NULL) ?
+		label[1] : format_label(file2, &stb2);
+
+	if (flags & D_FORCEASCII)
+		diff_flags |= DIFF_FLAG_FORCE_TEXT_DATA;
+	if (flags & D_IGNOREBLANKS)
+		diff_flags |= DIFF_FLAG_IGNORE_WHITESPACE;
+	if (flags & D_PROTOTYPE)
+		diff_flags |= DIFF_FLAG_SHOW_PROTOTYPES;
+
+	if (diff_atomize_file(&left, cfg, f1, (uint8_t *)str1, st1.st_size, diff_flags)) {
+		rc = D_ERROR;
+		goto done;
+	}
+	if (diff_atomize_file(&right, cfg, f2, (uint8_t *)str2, st2.st_size, diff_flags)) {
+		rc = D_ERROR;
+		goto done;
+	}
+
+	result = diff_main(cfg, &left, &right);
+	if (result->rc != DIFF_RC_OK) {
+		rc = D_ERROR;
+		status |= 2;
+		goto done;
+	}
+	/*
+	 * If there wasn't an error, but we don't have any printable chunks
+	 * then the files must match.
+	 */
+	if (!diff_result_contains_printable_chunks(result)) {
+		rc = D_SAME;
+		goto done;
+	}
+
+	atomizer_flags = (result->left->atomizer_flags | result->right->atomizer_flags);
+	rflags = (result->left->root->diff_flags | result->right->root->diff_flags);
+	force_text = (rflags & DIFF_FLAG_FORCE_TEXT_DATA);
+	have_binary = (atomizer_flags & DIFF_ATOMIZER_FOUND_BINARY_DATA);
+
+	if (have_binary && !force_text) {
+		rc = D_BINARY;
+		status |= 1;
+		goto done;
+	}
+
+	if (diff_format == D_NORMAL) {
+		rc = diff_output_plain(NULL, stdout, &info, result, false);
+	} else if (diff_format == D_EDIT) {
+		rc = diff_output_edscript(NULL, stdout, &info, result);
+	} else {
+		rc = diff_output_unidiff(NULL, stdout, &info, result,
+		    context_lines);
+	}
+	if (rc != DIFF_RC_OK) {
+		rc = D_ERROR;
+		status |= 2;
+	} else {
+		rc = D_DIFFER;
+		status |= 1;
+	}
+done:
+	diff_result_free(result);
+	diff_data_free(&left);
+	diff_data_free(&right);
+	if (str1)
+		munmap(str1, st1.st_size);
+	if (str2)
+		munmap(str2, st2.st_size);
+	fclose(f1);
+	fclose(f2);
+
+	return rc;
+}
+
+FILE *
+openfile(const char *path, char **p, struct stat *st)
+{
+	FILE *f = NULL;
+
+	if (strcmp(path, "-") == 0)
+		f = stdin;
+	else
+		f = fopen(path, "r");
+
+	if (f == NULL)
+		err(2, "%s", path);
+
+	if (fstat(fileno(f), st) == -1)
+		err(2, "%s", path);
+
+#ifndef DIFF_NO_MMAP
+	*p = mmap(NULL, st->st_size, PROT_READ, MAP_PRIVATE, fileno(f), 0);
+	if (*p == MAP_FAILED)
+#endif
+		*p = NULL; /* fall back on file I/O */
+
+	return f;
+}
+
+bool
+can_libdiff(int flags)
+{
+	/* We can't use fifos with libdiff yet */
+	if (S_ISFIFO(stb1.st_mode) || S_ISFIFO(stb2.st_mode))
+		return false;
+
+	/* Is this one of the supported input/output modes for diffreg_new? */
+	if ((flags == 0 || !(flags & ~D_NEWALGO_FLAGS)) &&
+		ignore_pats == NULL && (
+		diff_format == D_NORMAL ||
+#if 0
+		diff_format == D_EDIT ||
+#endif
+		diff_format == D_UNIFIED) &&
+		(diff_algorithm == D_DIFFMYERS || diff_algorithm == D_DIFFPATIENCE)) {
+		return true;
+	}
+
+	/* Fallback to using stone. */
+	return false;
+}
diff --git a/usr.bin/diff/pr.c b/usr.bin/diff/pr.c
index 5dedf689351c..c3ea280073af 100644
--- a/usr.bin/diff/pr.c
+++ b/usr.bin/diff/pr.c
@@ -24,7 +24,6 @@
  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  */
 
-#include <sys/cdefs.h>
 #include <sys/procdesc.h>
 #include <sys/wait.h>
 
diff --git a/usr.bin/diff/tests/diff_test.sh b/usr.bin/diff/tests/diff_test.sh
index 66596bae8a46..146c23fa303d 100755
--- a/usr.bin/diff/tests/diff_test.sh
+++ b/usr.bin/diff/tests/diff_test.sh
@@ -266,6 +266,10 @@ report_identical_body()
 {
 	printf "\tA\n" > A
 	printf "\tB\n" > B
+	atf_check -s exit:0 -o match:"are identical" \
+		  diff -s A A
+	atf_check -s exit:1 -o not-match:"are identical" \
+		  diff -s A B
 	chmod -r B
 	atf_check -s exit:2 -e inline:"diff: B: Permission denied\n" \
 		-o empty diff -s A B
diff --git a/usr.bin/diff/xmalloc.c b/usr.bin/diff/xmalloc.c
index 69ccae83cfdd..ce0f4545aee8 100644
--- a/usr.bin/diff/xmalloc.c
+++ b/usr.bin/diff/xmalloc.c
@@ -13,7 +13,6 @@
  * called by a name other than "ssh" or "Secure Shell".
  */
 
-#include <sys/cdefs.h>
 #include <err.h>
 #include <stdarg.h>
 #include <stdint.h>



Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?202403271126.42RBQGoX003685>